library(bpcs)
library(tidyverse)
library(knitr)
library(kableExtra)
options(mc.cores = parallel::detectCores())
rstan::rstan_options(auto_write = TRUE)
set.seed(99)
Huskisson, S.M., Jacobson, S.L., Egelkamp, C.L. et al. Using a Touchscreen Paradigm to Evaluate Food Preferences and Response to Novel Photographic Stimuli of Food in Three Primate Species (Gorilla gorilla gorilla, Pan troglodytes, and Macaca fuscata). Int J Primatol 41, 5–23 (2020). https://doi.org/10.1007/s10764-020-00131-0
Let’s import the data and select only the gorillas
d <- readr::read_csv('data/touchscreen.csv') %>%
dplyr::filter(species_type=='Gorilla') %>%
dplyr::select(species_type, SubjectCode, Trial, image1, image2, image_chosen)
Let’s create a column that says which image was selected 1 for image2 0 for image 1
d <- d %>%
dplyr::mutate(y =ifelse(image_chosen==image1, 0, 1)) %>%
select(-image_chosen)
Let’s also do some recoding to facilitate interpreting
d$image1 <- dplyr::recode(d$image1, "Ca" = 'Carrot')
d$image1 <- dplyr::recode(d$image1, "Cu" = 'Cucumber')
d$image1 <- dplyr::recode(d$image1, "Tu" = 'Turnip')
d$image1 <- dplyr::recode(d$image1, "Gr" = 'Grape')
d$image1 <- dplyr::recode(d$image1, "To" = 'Tomato')
d$image1 <- dplyr::recode(d$image1, "Ap" = 'Apple')
d$image2 <- dplyr::recode(d$image2, "Ca" = 'Carrot')
d$image2 <- dplyr::recode(d$image2, "Cu" = 'Cucumber')
d$image2 <- dplyr::recode(d$image2, "Tu" = 'Turnip')
d$image2 <- dplyr::recode(d$image2, "Gr" = 'Grape')
d$image2 <- dplyr::recode(d$image2, "To" = 'Tomato')
d$image2 <- dplyr::recode(d$image2, "Ap" = 'Apple')
Sample of the data
d %>%
dplyr::sample_n(size=10) %>%
kable() %>%
kableExtra::kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>%
kableExtra::scroll_box(width = "100%")
species_type | SubjectCode | Trial | image1 | image2 | y |
---|---|---|---|---|---|
Gorilla | Gorilla3 | 2 | Grape | Cucumber | 0 |
Gorilla | Gorilla6 | 16 | Grape | Turnip | 0 |
Gorilla | Gorilla6 | 14 | Grape | Tomato | 1 |
Gorilla | Gorilla2 | 2 | Apple | Tomato | 0 |
Gorilla | Gorilla4 | 14 | Apple | Turnip | 0 |
Gorilla | Gorilla4 | 5 | Carrot | Grape | 0 |
Gorilla | Gorilla5 | 13 | Tomato | Turnip | 0 |
Gorilla | Gorilla5 | 13 | Apple | Carrot | 0 |
Gorilla | Gorilla6 | 21 | Carrot | Grape | 1 |
Gorilla | Gorilla5 | 28 | Apple | Turnip | 0 |
Now we can run a Bradley-Terry model with the bpcs package
m1_gor <-
bpc(
d,
player0 = 'image1',
player1 = 'image2',
result_column = 'y',
model_type = 'bt',
priors = list(prior_lambda_std = 0.5),
iter = 3000
)
save_bpc_model(m1_gor, 'm1_gor', 'fittedmodels')
Now that we got our posterior we can look at some interesting tables and plots
plot(m1_gor, rotate_x_labels = T)+
scale_x_discrete(
labels = c(
"Turnip",
"Cucumber",
"Carrot",
"Apple",
"Grape",
"Tomato"
))
Getting a probability table
get_probabilities_table(m1_gor, format='html')
i | j | i_beats_j | j_beats_i |
---|---|---|---|
Apple | Carrot | 0.54 | 0.46 |
Apple | Cucumber | 0.68 | 0.32 |
Apple | Grape | 0.36 | 0.64 |
Apple | Tomato | 0.39 | 0.61 |
Apple | Turnip | 0.74 | 0.26 |
Carrot | Cucumber | 0.64 | 0.36 |
Carrot | Grape | 0.25 | 0.75 |
Carrot | Tomato | 0.26 | 0.74 |
Carrot | Turnip | 0.73 | 0.27 |
Cucumber | Grape | 0.15 | 0.85 |
Cucumber | Tomato | 0.17 | 0.83 |
Cucumber | Turnip | 0.51 | 0.49 |
Grape | Tomato | 0.58 | 0.42 |
Grape | Turnip | 0.83 | 0.17 |
Tomato | Turnip | 0.83 | 0.17 |
Let’s start importing the data
d2 <- read_csv('./data/auto-label-comparison.csv') %>%
dplyr::rename(Value='value') %>%
dplyr::filter(ValueType=='accuracy') %>%
dplyr::filter(Model=='LabelPropagation_knn' |
Model=='LabelPropagation_rbf' |
Model=='LabelSpreading_knn' |
Model=='LabelSpreading_rbf')
## Warning: Missing column names filled in: 'X1' [1]
Lets visualize
d2 %>%
dplyr::sample_n(size=10) %>%
kable() %>%
kableExtra::kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>%
kableExtra::scroll_box(width = "100%")
X1 | ValueType | Value | Iteration | Variable | Model | Dataset | DataType |
---|---|---|---|---|---|---|---|
27 | accuracy | 0.6967609 | 7 | 50% | LabelSpreading_rbf | digits | image |
22 | accuracy | 0.3333333 | 2 | 50% | LabelSpreading_knn | corpus | text |
23 | accuracy | 0.1666667 | 3 | 50% | LabelPropagation_knn | reuters | text |
8 | accuracy | 0.0181818 | 8 | 10% | LabelPropagation_rbf | cifar | image |
28 | accuracy | 0.0004808 | 8 | 50% | LabelPropagation_rbf | cifar2 | image |
24 | accuracy | 0.2756910 | 4 | 50% | LabelPropagation_knn | german | numeric |
4 | accuracy | 0.0181818 | 4 | 10% | LabelPropagation_knn | mnist | image |
9 | accuracy | 0.1666667 | 9 | 10% | LabelPropagation_knn | reuters | text |
1 | accuracy | 0.1280959 | 1 | 10% | LabelPropagation_rbf | 20news | text |
20 | accuracy | 0.2635986 | 0 | 50% | LabelPropagation_knn | musk1 | numeric |
Now we need to convert it to paired comparison: First we pivot it longer and them we expand the dataset
d_acc_rank <- d2 %>%
dplyr::group_by(Dataset, Variable, Iteration) %>%
dplyr::mutate(Rank=-rank(Value, ties.method = 'random')) %>%
dplyr::ungroup() %>%
dplyr::select(-Value) %>% #we need to drop the Value variable to pivot wider
tidyr::pivot_wider(names_from = Model,
values_from=Rank)
Expanding it
models<-unique(d2$Model)
n_models = length(models)
comb <- gtools::combinations(n=n_models, r=2, v=seq(1:n_models), repeats.allowed = F) #all teh paired combinations
d_acc_bt <- dplyr::tribble(~model0, ~model1, ~y, ~Iteration, ~Dataset, ~DatasetType)
#now we loop each row of the rank wide dataset and create a new one
for(i in 1:nrow(d_acc_rank))
{
current_row <- d_acc_rank[i,]
for(j in 1:nrow(comb)){
comb_row <- comb[j,]
model0 <- models[comb_row[1]]
model0_rank <- current_row[[1,model0]]
model1 <- models[comb_row[2]]
model1_rank <- current_row[[1,model1]]
diff_rank <- model1_rank - model0_rank
#SInce higher accuracy is better if model 1 rank- model 0 rank is positive than model1 wins and y=1 else y=0
y <- ifelse(diff_rank>0, 1, 0)
d_acc_bt <-d_acc_bt %>%
add_row(model0=model0,
model1=model1,
y=y,
Iteration=current_row$Iteration,
Dataset=current_row$Dataset,
DatasetType=current_row$DataType)
}
}
d_acc_bt %>%
dplyr::sample_n(size=20) %>%
kable() %>%
kableExtra::kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>%
kableExtra::scroll_box(width = "100%")
model0 | model1 | y | Iteration | Dataset | DatasetType |
---|---|---|---|---|---|
LabelPropagation_rbf | LabelSpreading_rbf | 1 | 0 | wine | numeric |
LabelSpreading_knn | LabelSpreading_rbf | 0 | 8 | wine | numeric |
LabelPropagation_rbf | LabelSpreading_knn | 0 | 5 | corpus | text |
LabelPropagation_rbf | LabelSpreading_rbf | 0 | 6 | wine | numeric |
LabelPropagation_knn | LabelPropagation_rbf | 1 | 3 | cifar | image |
LabelPropagation_rbf | LabelSpreading_rbf | 0 | 0 | corpus | text |
LabelPropagation_knn | LabelSpreading_rbf | 0 | 7 | 20news | text |
LabelPropagation_knn | LabelSpreading_rbf | 0 | 8 | reuters | text |
LabelPropagation_rbf | LabelSpreading_knn | 0 | 5 | musk1 | numeric |
LabelSpreading_knn | LabelSpreading_rbf | 1 | 9 | cifar | image |
LabelPropagation_knn | LabelSpreading_rbf | 0 | 1 | 20news | text |
LabelPropagation_rbf | LabelSpreading_rbf | 1 | 4 | ohsumed | text |
LabelSpreading_knn | LabelSpreading_rbf | 1 | 5 | cifar | image |
LabelSpreading_knn | LabelSpreading_rbf | 0 | 1 | mnist | image |
LabelSpreading_knn | LabelSpreading_rbf | 1 | 9 | iris | numeric |
LabelPropagation_knn | LabelSpreading_knn | 0 | 3 | corpus | text |
LabelPropagation_rbf | LabelSpreading_rbf | 0 | 9 | cifar2 | image |
LabelPropagation_knn | LabelSpreading_knn | 1 | 8 | digits | image |
LabelPropagation_rbf | LabelSpreading_knn | 1 | 9 | ohsumed | text |
LabelPropagation_knn | LabelSpreading_knn | 1 | 1 | corpus | text |
Now we can do some paired comparison models
m1_autol <-
bpc(
d_acc_bt,
player0 = 'model0',
player1 = 'model1',
result_column = 'y',
model_type = 'bt-U',
cluster = c("Dataset"),
priors = list(prior_lambda_std = 0.5, prior_U1_std=1.0),
iter = 3000
)
save_bpc_model(m1_autol, 'm1_autol', 'fittedmodels')
get_probabilities_table(m1_autol, format = 'html', model_type = 'bt')
i | j | i_beats_j | j_beats_i |
---|---|---|---|
LabelPropagation_knn | LabelPropagation_rbf | 0.33 | 0.67 |
LabelPropagation_knn | LabelSpreading_knn | 0.45 | 0.55 |
LabelPropagation_knn | LabelSpreading_rbf | 0.45 | 0.55 |
LabelPropagation_rbf | LabelSpreading_knn | 0.62 | 0.38 |
LabelPropagation_rbf | LabelSpreading_rbf | 0.56 | 0.44 |
LabelSpreading_knn | LabelSpreading_rbf | 0.41 | 0.59 |
get_rank_of_players_table(m1_autol, format = 'html')
Parameter | MedianRank | MeanRank | StdRank |
---|---|---|---|
lambda[LabelPropagation_rbf] | 1 | 1.426 | 0.756 |
lambda[LabelPropagation_knn] | 3 | 2.700 | 0.976 |
lambda[LabelSpreading_knn] | 3 | 2.812 | 0.959 |
lambda[LabelSpreading_rbf] | 3 | 3.062 | 0.976 |
plot(m1_autol, rotate_x_labels = T)+
scale_x_discrete(
labels = c(
"LS-RBF",
"LS-KNN",
"LP-KNN",
"LP-RBF"
))
The data comes in the bpcs package
data("brasil_soccer_league")
knitr::kable(head(brasil_soccer_league))
Time | WeekDay | Date | HomeTeam | VisitorTeam | Round | Stadium | ScoreHomeTeam | ScoreVisitorTeam |
---|---|---|---|---|---|---|---|---|
16:00 | Saturday | 2017-05-13 | Flamengo | Atlético-MG | 1 | Maracanã | 1 | 1 |
19:00 | Saturday | 2017-05-13 | Corinthians | Chapecoense | 1 | Arena Corinthians | 1 | 1 |
16:00 | Sunday | 2017-05-14 | Avaà | Vitória | 1 | Ressacada | 0 | 0 |
16:00 | Sunday | 2017-05-14 | Bahia | Athlético-PR | 1 | Fonte Nova | 6 | 2 |
16:00 | Sunday | 2017-05-14 | Cruzeiro | São Paulo | 1 | Mineirão | 1 | 0 |
11:00 | Sunday | 2017-05-14 | Fluminense | Santos | 1 | Maracanã | 3 | 2 |
Let’s analyze only the data from 2019 and remove a few columns that are not relevant for this example:
d3 <- brasil_soccer_league %>%
dplyr::filter(Date >= as.Date("2019-01-01") &
Date <= as.Date("2019-12-31")) %>%
dplyr::select(HomeTeam, VisitorTeam, ScoreHomeTeam, ScoreVisitorTeam, Round)
m_football <- bpc(d3,
player0 = 'VisitorTeam',
player1 = 'HomeTeam',
player0_score = 'ScoreVisitorTeam',
player1_score = 'ScoreHomeTeam',
model_type = 'davidson',
solve_ties = 'none',
priors = list(prior_lambda_std=2.0), # making a more informative prior to improve convergence
iter = 3000) #stan indicates a low bulk ESS so we are increasing the number of iterations
save_bpc_model(m_football, 'm_football', 'fittedmodels')
Getting the table with the parameters:
get_parameters_table(m_football, format = 'html')
Parameter | Mean | HPD_lower | HPD_higher |
---|---|---|---|
lambda[AvaÃ] | -1.912 | -3.331 | -0.627 |
lambda[Internacional] | 0.162 | -1.031 | 1.440 |
lambda[Cruzeiro] | -0.683 | -1.921 | 0.520 |
lambda[Botafogo-rj] | -0.584 | -1.862 | 0.637 |
lambda[Vasco] | -0.067 | -1.286 | 1.137 |
lambda[Corinthians] | 0.310 | -0.867 | 1.576 |
lambda[CSA] | -1.101 | -2.368 | 0.143 |
lambda[Goiás] | -0.061 | -1.335 | 1.122 |
lambda[Fortaleza] | 0.133 | -1.107 | 1.352 |
lambda[Santos] | 1.070 | -0.217 | 2.291 |
lambda[Grêmio] | 0.631 | -0.573 | 1.912 |
lambda[Palmeiras] | 1.245 | -0.011 | 2.514 |
lambda[Atlético-MG] | -0.048 | -1.340 | 1.133 |
lambda[Chapecoense] | -0.986 | -2.280 | 0.272 |
lambda[Ceará] | -0.729 | -2.022 | 0.433 |
lambda[Athlético-PR] | 0.717 | -0.580 | 1.919 |
lambda[Flamengo] | 2.042 | 0.696 | 3.372 |
lambda[São Paulo] | 0.616 | -0.654 | 1.849 |
lambda[Bahia] | -0.129 | -1.353 | 1.098 |
lambda[Fluminense] | -0.378 | -1.594 | 0.903 |
nu | 0.385 | 0.159 | 0.624 |
Ranking the teams
get_rank_of_players_table(m_football, format = 'html')
Parameter | MedianRank | MeanRank | StdRank |
---|---|---|---|
lambda[Flamengo] | 1 | 1.286 | 0.692 |
lambda[Palmeiras] | 3 | 3.195 | 1.920 |
lambda[Santos] | 3 | 3.746 | 1.946 |
lambda[Athlético-PR] | 5 | 5.633 | 2.810 |
lambda[Grêmio] | 6 | 6.027 | 2.972 |
lambda[São Paulo] | 6 | 6.022 | 2.800 |
lambda[Corinthians] | 8 | 8.073 | 3.065 |
lambda[Fortaleza] | 9 | 9.455 | 3.391 |
lambda[Internacional] | 9 | 9.252 | 3.445 |
lambda[Atlético-MG] | 11 | 10.880 | 3.472 |
lambda[Goiás] | 11 | 10.969 | 3.493 |
lambda[Vasco] | 11 | 10.975 | 3.362 |
lambda[Bahia] | 12 | 11.527 | 3.395 |
lambda[Fluminense] | 14 | 13.380 | 3.363 |
lambda[Botafogo-rj] | 15 | 14.759 | 3.008 |
lambda[Ceará] | 16 | 15.401 | 2.785 |
lambda[Cruzeiro] | 16 | 15.271 | 2.865 |
lambda[Chapecoense] | 18 | 17.036 | 2.276 |
lambda[CSA] | 18 | 17.419 | 2.145 |
lambda[AvaÃ] | 20 | 19.694 | 0.733 |
Plot
plot(m_football, rotate_x_labels = T)
sessionInfo()
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur 10.16
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] kableExtra_1.2.1 knitr_1.30 forcats_0.5.0 stringr_1.4.0
## [5] dplyr_1.0.2 purrr_0.3.4 readr_1.3.1 tidyr_1.1.2
## [9] tibble_3.0.4 ggplot2_3.3.3 tidyverse_1.3.0 bpcs_1.1.0
##
## loaded via a namespace (and not attached):
## [1] httr_1.4.2 viridisLite_0.3.0 jsonlite_1.7.2
## [4] modelr_0.1.8 RcppParallel_5.0.2 StanHeaders_2.21.0-7
## [7] assertthat_0.2.1 highr_0.8 BiocManager_1.30.10
## [10] rvcheck_0.1.8 stats4_4.0.3 pander_0.6.3
## [13] blob_1.2.1 cellranger_1.1.0 yaml_2.2.1
## [16] pillar_1.4.7 backports_1.2.1 glue_1.4.2
## [19] jtools_2.1.2 digest_0.6.27 RColorBrewer_1.1-2
## [22] rvest_0.3.6 colorspace_2.0-0 htmltools_0.5.1
## [25] pkgconfig_2.0.3 rstan_2.21.2 broom_0.7.0
## [28] haven_2.3.1 webshot_0.5.2 scales_1.1.1
## [31] processx_3.4.5 farver_2.0.3 generics_0.1.0
## [34] ellipsis_0.3.1 withr_2.3.0 cli_2.2.0
## [37] magrittr_2.0.1 crayon_1.3.4 readxl_1.3.1
## [40] evaluate_0.14 ps_1.5.0 badger_0.0.9
## [43] fs_1.5.0 fansi_0.4.1 xml2_1.3.2
## [46] pkgbuild_1.2.0 tools_4.0.3 loo_2.4.1
## [49] prettyunits_1.1.1 hms_0.5.3 lifecycle_0.2.0
## [52] matrixStats_0.57.0 V8_3.4.0 munsell_0.5.0
## [55] reprex_0.3.0 callr_3.5.1 compiler_4.0.3
## [58] rlang_0.4.10 grid_4.0.3 rstudioapi_0.13
## [61] labeling_0.4.2 rmarkdown_2.6 gtable_0.3.0
## [64] codetools_0.2-16 inline_0.3.17 DBI_1.1.0
## [67] curl_4.3 R6_2.5.0 gridExtra_2.3
## [70] rstantools_2.1.1.9000 lubridate_1.7.9 dlstats_0.1.3
## [73] stringi_1.5.3 parallel_4.0.3 Rcpp_1.0.5
## [76] vctrs_0.3.6 dbplyr_1.4.4 tidyselect_1.1.0
## [79] xfun_0.20