pacman::p_load("tidyverse");
pacman::p_load("ggthemes");
pacman::p_load("ggrepel");
pacman::p_load("brglm2");
pacman::p_load("BradleyTerry2");

# Bonus rule history:
# 2008: +2, unbalanced (outside podium)
# 2009: +2 to infinity (outside top 12)
# 2010: +2 to +4
# 2011-2014: +1 to +3 (from 2014: outside top 6)
# 2015-2018: +3
# 2019-: +1

# Reading replay data and other fundamental definitions.

pmin_exclusions <- c("ZCT255", "ZCT266");

car_coeffs <- read_csv("car_coeffs.csv", col_types = "ccii");

all_cars <- unique(car_coeffs |> select(car)) |>
    arrange(car) |> pull();

almost_all_cars <- all_cars |> setdiff(c("XCFX"));

core_cars <- c("AUDI", "COUN", "JAGU", "LANC", "LM02", "P962", "PC04") |>
    fct(levels = all_cars);

extended_core_cars <- all_cars |>
    setdiff(c("XCFX")) |>
    setdiff(c("PMIN", "VETT", "FGTO", "ANSX")) |>
    setdiff(c("GATE")) |>
    #setdiff(c("ZPTR", "DTPA")) |>
    #setdiff(c("LOLA", "ZCS7", "ZGT3", "ZMP4")) |>
    #setdiff(c("50FT", "CERV", "DBMW", "DC70")) |>
    #setdiff(c("STRA", "ZLET", "ZTST")) |>
    fct(levels = all_cars);

ctg2014 <- read_csv("ctg2014.csv",
                    col_types = cols(result_id = "i",
                                     season = "i",
                                     car = col_factor(levels = all_cars),
                                     bonus = "i",
                                     corrected_hsec = "i"));


replays_oct24 <- read_csv("replays.csv",
                          col_types = cols(result_id = "i",
                                           season = "i",
                                           car = col_factor(levels = all_cars),
                                           bonus = "i",
                                           corrected_hsec = "i"));

replays <- replays_oct24 |>
    bind_rows(ctg2014) |>
    identity();

# Grouping by track and car and setting up matches.

track_results <- replays |>
    group_by(track) |>
    summarize(winning_lap = min(corrected_hsec),
        num_rpl = n(),
        .groups = "drop");

car_results <- replays |>
    group_by(track, car) |>
    summarize(lap = min(corrected_hsec),
        num_rpl = n(),
        season = first(season),
        .groups = "drop") |>
        # bonus = first(bonus)) |>
    right_join(car_coeffs, by = c("track", "car")) |>
    left_join(track_results, by = "track", suffix = c(".car", ".all")) |>
    mutate(bonus = base + author_delta,
           num_rpl.car = coalesce(num_rpl.car, 0),
           lap_frac = lap / winning_lap,
           num_rpl_frac = num_rpl.car / num_rpl.all,
           car = fct(car, levels = all_cars)) |>
    select(track, car, bonus, lap_frac, num_rpl_frac, season) |>
    arrange(track, lap_frac);

bonus_to_adv <- \(b) -log(1 - b / 100);
adv_to_bonus <- \(a) 100 * (1 - exp(-a));

# Note that ties are broken arbirarily by replay rarity and car name!
car_matches <- car_results |>
    inner_join(car_results, by = join_by(track, car < car)) |>
    mutate(advantage = bonus_to_adv(bonus.x) - bonus_to_adv(bonus.y),
           score = case_when(
                             lap_frac.x < coalesce(lap_frac.y, Inf) ~ 1,
                             coalesce(lap_frac.x, Inf) > lap_frac.y ~ 0,
                             is.na(lap_frac.x) & is.na(lap_frac.y) ~ NA,
                             (num_rpl_frac.x < num_rpl_frac.y) ~ 1,
                             (num_rpl_frac.x > num_rpl_frac.y) ~ 0,
                             (as.character(car.x) < as.character(car.y)) ~ 1),
           season = coalesce(season.x, season.y)) |>
    filter(!is.na(score)) |>
    select(-bonus.x, -bonus.y, -season.x, -season.y) |>
    relocate(track, car.x, car.y, advantage, score) |>
    arrange(track, car.x);

# General filtering of relevant matches.

relevance_filter <- \(data) data |>
    filter(!(((car.x == "PMIN") | (car.y == "PMIN")) & (track %in% pmin_exclusions))) |>
    filter(lap_frac.x <= 1.2 | lap_frac.y <= 1.2 |
           num_rpl_frac.x >= 0.1 | num_rpl_frac.y >= 0.1) |>
    identity();

# Selection of matches by car.

# Any two cars.
pair_filter <- \(data, c1, c2) data |>
    filter(((car.x == c1) & (car.y == c2)) | ((car.x == c2) & (car.y == c1)));

# Flip scores and advantages to a chosen car.
focus_car <- \(data, c1) data |>
    mutate(score = case_when(
                             c1 == car.y ~ 1 - score,
                             c1 == car.x ~ score),
           advantage = case_when(
                                 c1 == car.y ~ -advantage,
                                 c1 == car.x ~ advantage),
           lap_frac.1 = case_when(
                                c1 == car.y ~ lap_frac.y,
                                c1 == car.x ~ lap_frac.x),
           num_rpl_frac.1 = case_when(
                                      c1 == car.y ~ num_rpl_frac.y,
                                      c1 == car.x ~ num_rpl_frac.x),
           lap_frac.2 = case_when(
                                c1 == car.y ~ lap_frac.x,
                                c1 == car.x ~ lap_frac.y),
           num_rpl_frac.2 = case_when(
                                      c1 == car.y ~ num_rpl_frac.x,
                                      c1 == car.x ~ num_rpl_frac.y),
           car.2 = case_when(
                               c1 == car.y ~ car.x,
                               c1 == car.x ~ car.y)) |>
    filter(!is.na(car.2)) |>
    select(track, car.2, advantage, score,
           lap_frac.1, lap_frac.2, num_rpl_frac.1, num_rpl_frac.2,
           season) |>
    #arrange(track, lap_frac.2) |>
    identity();

pair_focus <- \(data, c1, c2) data |>
    pair_filter(c1, c2) |> focus_car(c1);

# Compute advantages through logistic regression.

logis_mdl <- \(data)
    glm(score ~ advantage, data = data, family = "binomial",
        control = list(maxit = 100));#, method = "brglm_fit");

mdl_to_adv <- function(mdl) {
    mdl_coefs <- coef(mdl);
    -mdl_coefs[["(Intercept)"]] / mdl_coefs[["advantage"]];
}

mdl_to_err_adv <- function(mdl) {
    mdl_coefs <- coef(mdl);
    mdl_vcov <- vcov(mdl);
    abs(mdl_coefs[1]/mdl_coefs[2]) *
        sqrt(mdl_vcov[1]/mdl_coefs[1]^2 + mdl_vcov[4]/mdl_coefs[2]^2 -
             2*(mdl_vcov[2]/(mdl_coefs[1]*mdl_coefs[2])));
}

# Test stuff

plot_match <- function(data, c1, c2, lbld.tracks = NULL) {
    mdl <- logis_mdl(data);
    pred_mdl <- tibble(advantage = data |> pull(advantage),
                       score = predict(mdl, data, type = "response"));
    if (is.null(lbld.tracks)) {
        labelled_only <- data;
    }
    else {
        labelled_only <- data |> filter(track %in% lbld.tracks);
    }
    data |>
        ggplot(aes(x = advantage, y = score)) +
        geom_point(aes(color = as.factor(score)),
                   alpha = 0.3, size = 3) +
        #geom_bin2d() +
        geom_line(data = pred_mdl, aes(x = advantage, y = score),
                  linetype = 2) +
        # ggplot(aes(x = car.2, y = advantage)) +
        # geom_boxplot(aes(color = car.2)) +
        #facet_wrap(~ car.2) +
        geom_text_repel(data = labelled_only,
                        aes(label = paste0("Z", substr(track, 4, 6))),
                        box.padding = 1.6) +
        labs(title = paste(c1, "versus", c2),
             x = paste(c1, "advantage over", c2),
             y = paste(c1, "score")) +
        theme_bw(base_size = 14) +
        theme(legend.position = "none") +
        scale_color_calc();
}

example_labelled_tracks <- c("ZCT133", "ZCT219", "ZCT129", "ZCT179",
                             "ZCT150", "ZCT105", "ZCT193", "ZCT091",
                             "ZCT279", "ZCT276", "ZCT258", "ZCT247");

test_vis <- function(c1, c2, lbld.tracks = NULL) {
    foo <- car_matches |> relevance_filter() |>
        pair_focus(c1, c2);
    plot_match(foo, c1, c2, lbld.tracks = lbld.tracks);
}

# Bradley-Terry approach

cars_filter <- \(data, sel_cars) data |>
    filter((car.x %in% sel_cars) & (car.y %in% sel_cars));

btm_matches <- function(sel_cars) {
    mtchs_pre <- car_matches |>
        relevance_filter() |> cars_filter(sel_cars);
    data <- list(
                car.x = mtchs_pre |>
                    transmute(car = factor(car.x, levels = sel_cars),
                              adv = advantage,
                              lap = lap_frac.x,
                              nrpl = num_rpl_frac.x),
                car.y = mtchs_pre |>
                    transmute(car = factor(car.y, levels = sel_cars),
                              adv = 0,
                              lap = lap_frac.y,
                              nrpl = num_rpl_frac.y),
                mtchs = mtchs_pre |>
                    mutate(season = season |>
                           as.character() |> ordered()) |>
                    select(track, score, season));
    data;
}


btm_mdl <- \(sel_cars)
    BTm(outcome = score,
        player1 = car.x,
        player2 = car.y,
        data = btm_matches(sel_cars),
        #subset = season > 2018,
        id = "car", refcat = "P962",# br = TRUE,
        formula = ~ car + adv, family = binomial(link = "logit"));

ext_cars_btm_fit <- btm_mdl(extended_core_cars);
ext_cars_btm_abl <- BTabilities(ext_cars_btm_fit);
ext_cars_bonuses <- adv_to_bonus(-ext_cars_btm_abl[,"ability"] /
                                 coef(ext_cars_btm_fit)["adv"]);
ext_cars_btm_qv <- qvcalc(ext_cars_btm_abl);
ext_cars_btm_qv$qvframe <- with(ext_cars_btm_qv,
                     qvframe[order(qvframe$estimate, decreasing = TRUE), , drop = FALSE]);
plot(ext_cars_btm_qv, las = 2);
grid();
#summary(ext_cars_btm_fit, corr=TRUE)
#res <- residuals(ext_cars_btm_fit, type="grouped");
#tibble(ix=c(1:length(res)), grp=rownames(res), res=res) |>
#    ggplot(aes(x=ix, y=res)) + geom_point() + geom_text(aes(label=grp));

