Chapter 5 Case study II: PBO
5.1 Importing the data
To illustrate and make the analysis we will use 5 as the number of dimensions for the benchmark functions
<- read_csv('data/pbo.csv') %>%
d_pbo select(algId, DIM, funcId, runs, succ, budget) %>%
filter(DIM==16) %>%
mutate(algId_index = as.integer(as.factor(algId)))
#vector with the names in order
<- seq(1,23)
benchmarks <- levels(as.factor(d_pbo$algId)) algorithms
5.2 Preparing the Stan data
<- list(
pbo_standata N = nrow(d_pbo),
y_succ = as.integer(d_pbo$succ),
N_tries = as.integer(d_pbo$runs),
p = d_pbo$algId_index,
Np = as.integer(length(unique(d_pbo$algId_index))),
item = as.integer(d_pbo$funcId),
Nitem = as.integer(length(unique(d_pbo$funcId)))
)
<- cmdstan_model('models/irt2pl.stan')
irt2pl <- irt2pl$sample(
fit_pbo data= pbo_standata,
seed = seed,
chains = 4,
iter_sampling = 4000,
parallel_chains = 4,
max_treedepth = 15
)$save_object(file='fitted/pbo16.RDS') fit_pbo
To load the fitted model to save time in compiling this document
<-readRDS('fitted/pbo16.RDS') fit_pbo
5.3 Diagnostics
Getting the draws from the posterior
<- fit_pbo$draws('a')
draws_a <- fit_pbo$draws('b')
draws_b <- fit_pbo$draws('theta') draws_theta
5.3.1 Traceplots
mcmc_trace(draws_a)
mcmc_trace(draws_b)
mcmc_trace(draws_theta)
5.3.2 Rhat and Effective samples
$summary(c('a','b', 'theta')) %>%
fit_pbokable(caption='Summary values fit of the model, including effective samples and Rhat',
booktabs=T,
digits =3,
format='html') %>%
kable_styling() %>%
scroll_box()
variable | mean | median | sd | mad | q5 | q95 | rhat | ess_bulk | ess_tail |
---|---|---|---|---|---|---|---|---|---|
a[1] | 3.934 | 3.707 | 1.837 | 1.837 | 1.355 | 7.305 | 1.000 | 10482.518 | 6617.247 |
a[2] | 3.937 | 3.701 | 1.817 | 1.798 | 1.391 | 7.285 | 1.000 | 11107.081 | 6472.795 |
a[3] | 2.713 | 2.411 | 1.483 | 1.351 | 0.885 | 5.567 | 1.000 | 12066.342 | 9175.795 |
a[4] | 3.464 | 3.189 | 1.716 | 1.665 | 1.192 | 6.686 | 1.000 | 21378.536 | 10784.405 |
a[5] | 2.706 | 2.367 | 1.511 | 1.370 | 0.888 | 5.625 | 1.000 | 10187.985 | 8808.061 |
a[6] | 3.460 | 3.155 | 1.728 | 1.671 | 1.196 | 6.756 | 1.000 | 22233.362 | 11142.743 |
a[7] | 1.777 | 1.699 | 0.565 | 0.532 | 1.006 | 2.816 | 1.000 | 9235.783 | 11044.112 |
a[8] | 5.405 | 5.220 | 1.727 | 1.693 | 2.877 | 8.540 | 1.000 | 12518.009 | 11842.410 |
a[9] | 4.112 | 3.928 | 1.424 | 1.379 | 2.097 | 6.707 | 1.000 | 11774.405 | 10021.930 |
a[10] | 0.062 | 0.059 | 0.031 | 0.029 | 0.015 | 0.117 | 1.000 | 6929.124 | 3558.016 |
a[11] | 3.449 | 3.170 | 1.692 | 1.636 | 1.206 | 6.663 | 1.000 | 20504.043 | 11361.605 |
a[12] | 2.916 | 2.611 | 1.593 | 1.532 | 0.900 | 5.968 | 1.000 | 10505.108 | 7419.611 |
a[13] | 3.449 | 3.151 | 1.722 | 1.662 | 1.202 | 6.678 | 1.000 | 21554.343 | 11428.768 |
a[14] | 1.467 | 1.408 | 0.419 | 0.389 | 0.890 | 2.249 | 1.000 | 9334.328 | 10855.270 |
a[15] | 2.943 | 2.645 | 1.606 | 1.527 | 0.912 | 6.013 | 1.000 | 10472.136 | 7012.017 |
a[16] | 4.981 | 4.789 | 2.025 | 2.068 | 2.009 | 8.585 | 1.000 | 13988.593 | 8593.783 |
a[17] | 1.398 | 1.274 | 0.661 | 0.600 | 0.571 | 2.647 | 1.000 | 10909.801 | 8282.476 |
a[18] | 0.497 | 0.484 | 0.132 | 0.126 | 0.306 | 0.735 | 1.001 | 8682.765 | 9710.728 |
a[19] | 1.138 | 1.055 | 0.470 | 0.421 | 0.537 | 2.014 | 1.000 | 8857.538 | 9008.410 |
a[20] | 0.527 | 0.491 | 0.189 | 0.162 | 0.289 | 0.888 | 1.000 | 11049.366 | 11073.602 |
a[21] | 0.428 | 0.403 | 0.142 | 0.125 | 0.245 | 0.694 | 1.001 | 11939.433 | 11161.840 |
a[22] | 0.622 | 0.582 | 0.231 | 0.209 | 0.325 | 1.056 | 1.000 | 10627.937 | 9775.536 |
a[23] | 2.672 | 2.581 | 0.837 | 0.808 | 1.467 | 4.192 | 1.000 | 11187.964 | 9926.076 |
b[1] | -0.683 | -0.629 | 0.739 | 0.669 | -1.930 | 0.389 | 1.002 | 1619.587 | 3916.449 |
b[2] | -0.674 | -0.616 | 0.742 | 0.672 | -1.901 | 0.397 | 1.002 | 1711.981 | 3375.299 |
b[3] | -1.988 | -1.733 | 1.347 | 1.120 | -4.570 | -0.285 | 1.001 | 4367.543 | 6005.187 |
b[4] | -3.430 | -3.185 | 1.747 | 1.710 | -6.657 | -1.031 | 1.001 | 8847.775 | 10212.229 |
b[5] | -2.004 | -1.759 | 1.347 | 1.138 | -4.580 | -0.277 | 1.001 | 4252.917 | 6468.271 |
b[6] | -3.469 | -3.217 | 1.771 | 1.698 | -6.777 | -1.026 | 1.000 | 8131.056 | 10593.452 |
b[7] | 0.634 | 0.633 | 0.581 | 0.589 | -0.326 | 1.573 | 1.003 | 1371.617 | 2689.014 |
b[8] | -0.047 | -0.041 | 0.573 | 0.588 | -0.988 | 0.885 | 1.003 | 1285.212 | 2596.902 |
b[9] | -0.157 | -0.151 | 0.586 | 0.598 | -1.131 | 0.795 | 1.003 | 1325.181 | 2864.499 |
b[10] | -3.540 | -3.523 | 2.295 | 2.220 | -7.331 | 0.158 | 1.000 | 13997.424 | 8994.802 |
b[11] | -3.435 | -3.179 | 1.759 | 1.698 | -6.716 | -1.038 | 1.000 | 9022.686 | 10282.626 |
b[12] | -1.500 | -1.300 | 1.158 | 0.948 | -3.673 | -0.027 | 1.001 | 3084.613 | 4728.449 |
b[13] | -3.470 | -3.233 | 1.769 | 1.729 | -6.731 | -1.010 | 1.000 | 7954.683 | 9611.534 |
b[14] | 1.101 | 1.099 | 0.593 | 0.608 | 0.127 | 2.067 | 1.003 | 1465.963 | 3178.756 |
b[15] | -1.486 | -1.288 | 1.154 | 0.928 | -3.671 | -0.038 | 1.001 | 3176.183 | 4225.810 |
b[16] | -0.346 | -0.331 | 0.604 | 0.606 | -1.345 | 0.608 | 1.002 | 1349.709 | 3100.828 |
b[17] | -1.455 | -1.308 | 1.087 | 0.957 | -3.455 | 0.009 | 1.001 | 3207.599 | 5478.346 |
b[18] | 1.573 | 1.578 | 0.709 | 0.702 | 0.401 | 2.738 | 1.002 | 2073.487 | 4520.739 |
b[19] | -0.768 | -0.695 | 0.835 | 0.786 | -2.238 | 0.472 | 1.002 | 2082.773 | 4329.199 |
b[20] | -3.476 | -3.298 | 1.558 | 1.481 | -6.347 | -1.254 | 1.001 | 6173.014 | 8820.348 |
b[21] | -4.062 | -3.890 | 1.653 | 1.598 | -7.018 | -1.678 | 1.001 | 6832.772 | 9577.002 |
b[22] | -2.553 | -2.388 | 1.367 | 1.277 | -5.032 | -0.633 | 1.001 | 4999.038 | 6668.300 |
b[23] | 0.355 | 0.360 | 0.571 | 0.584 | -0.595 | 1.275 | 1.003 | 1314.129 | 2899.819 |
theta[1] | 1.504 | 1.496 | 0.633 | 0.638 | 0.464 | 2.545 | 1.003 | 1619.499 | 3267.244 |
theta[2] | 4.385 | 4.290 | 1.187 | 1.150 | 2.589 | 6.494 | 1.001 | 5924.706 | 9161.527 |
theta[3] | 6.048 | 5.931 | 1.520 | 1.503 | 3.761 | 8.694 | 1.000 | 9604.680 | 9284.926 |
theta[4] | 6.544 | 6.432 | 1.614 | 1.605 | 4.080 | 9.362 | 1.000 | 11620.501 | 10261.762 |
theta[5] | 6.041 | 5.913 | 1.526 | 1.483 | 3.736 | 8.721 | 1.001 | 9097.508 | 10472.750 |
theta[6] | 6.060 | 5.944 | 1.535 | 1.500 | 3.725 | 8.758 | 1.000 | 8938.844 | 9589.486 |
theta[7] | 1.947 | 1.939 | 0.675 | 0.669 | 0.847 | 3.077 | 1.003 | 1801.573 | 3782.787 |
theta[8] | 0.885 | 0.886 | 0.587 | 0.601 | -0.083 | 1.850 | 1.003 | 1391.774 | 3299.372 |
theta[9] | -0.161 | -0.155 | 0.581 | 0.601 | -1.120 | 0.777 | 1.003 | 1303.435 | 2552.924 |
theta[10] | 0.681 | 0.679 | 0.580 | 0.598 | -0.277 | 1.625 | 1.003 | 1363.143 | 2642.354 |
theta[11] | -0.084 | -0.081 | 0.565 | 0.583 | -1.021 | 0.833 | 1.003 | 1252.784 | 2728.216 |
theta[12] | 0.416 | 0.420 | 0.567 | 0.580 | -0.518 | 1.337 | 1.003 | 1287.660 | 2742.103 |
5.4 Results
<- fit_pbo$summary(c('a','b'))
fit_summary_a_b <- fit_pbo$summary(c('a'))
fit_summary_a <- fit_pbo$summary(c('b'))
fit_summary_b <- fit_pbo$summary(c('theta')) fit_summary_theta
5.4.1 Difficulty and discrimination
Table for the benchmark functions
<- fit_summary_a_b %>%
table_benchmarks select('Benchmark ID'=variable,
Median=median,
'CI 5%'=q5,
'CI 95%'=q95)
$'Benchmark ID'<-rep(benchmarks,2)
table_benchmarks
kable(table_benchmarks,
caption='Summary values of the discrimination and difficulty level parameters for the PBO benchmarks',
booktabs=T,
digits =3,
format='html',
linesep = "") %>%
kable_styling() %>%
pack_rows("Discrimination value (a)",1,23) %>%
pack_rows("Difficulty level (b)",23,46)
Benchmark ID | Median | CI 5% | CI 95% |
---|---|---|---|
Discrimination value (a) | |||
1 | 3.707 | 1.355 | 7.305 |
2 | 3.701 | 1.391 | 7.285 |
3 | 2.411 | 0.885 | 5.567 |
4 | 3.189 | 1.192 | 6.686 |
5 | 2.367 | 0.888 | 5.625 |
6 | 3.155 | 1.196 | 6.756 |
7 | 1.699 | 1.006 | 2.816 |
8 | 5.220 | 2.877 | 8.540 |
9 | 3.928 | 2.097 | 6.707 |
10 | 0.059 | 0.015 | 0.117 |
11 | 3.170 | 1.206 | 6.663 |
12 | 2.611 | 0.900 | 5.968 |
13 | 3.151 | 1.202 | 6.678 |
14 | 1.408 | 0.890 | 2.249 |
15 | 2.645 | 0.912 | 6.013 |
16 | 4.789 | 2.009 | 8.585 |
17 | 1.274 | 0.571 | 2.647 |
18 | 0.484 | 0.306 | 0.735 |
19 | 1.055 | 0.537 | 2.014 |
20 | 0.491 | 0.289 | 0.888 |
21 | 0.403 | 0.245 | 0.694 |
22 | 0.582 | 0.325 | 1.056 |
Difficulty level (b) | |||
23 | 2.581 | 1.467 | 4.192 |
1 | -0.629 | -1.930 | 0.389 |
2 | -0.616 | -1.901 | 0.397 |
3 | -1.733 | -4.570 | -0.285 |
4 | -3.185 | -6.657 | -1.031 |
5 | -1.759 | -4.580 | -0.277 |
6 | -3.217 | -6.777 | -1.026 |
7 | 0.633 | -0.326 | 1.573 |
8 | -0.041 | -0.988 | 0.885 |
9 | -0.151 | -1.131 | 0.795 |
10 | -3.523 | -7.331 | 0.158 |
11 | -3.179 | -6.716 | -1.038 |
12 | -1.300 | -3.673 | -0.027 |
13 | -3.233 | -6.731 | -1.010 |
14 | 1.099 | 0.127 | 2.067 |
15 | -1.288 | -3.671 | -0.038 |
16 | -0.331 | -1.345 | 0.608 |
17 | -1.308 | -3.455 | 0.009 |
18 | 1.578 | 0.401 | 2.738 |
19 | -0.695 | -2.238 | 0.472 |
20 | -3.298 | -6.347 | -1.254 |
21 | -3.890 | -7.018 | -1.678 |
22 | -2.388 | -5.032 | -0.633 |
23 | 0.360 | -0.595 | 1.275 |
mcmc_intervals(draws_a) +
scale_y_discrete(labels=benchmarks)+
labs(x='Discrimination parameter (a)',
y='Benchmark function ID',
title='Discrimination parameter distribution (PBO)')
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
mcmc_intervals(draws_b) +
scale_y_discrete(labels=benchmarks)+
labs(x='Difficulty level parameter (b)',
y='Benchmark function ID',
title='Difficulty level parameter distribution (PBO)')
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
5.4.2 Ability
Creating a table
<- fit_summary_theta %>%
table_algorithms select(Algorithms=variable,
Median=median,
'CI 5%'=q5,
'CI 95%'=q95)
$Algorithms <- algorithms
table_algorithms
kable(table_algorithms,
caption='Summary values of the ability level of the algorithms (PBO)',
booktabs=T,
digits =3,
format='html',
linesep = "") %>%
kable_styling()
Algorithms | Median | CI 5% | CI 95% |
---|---|---|---|
(1+(λ,λ)) GA | 1.496 | 0.464 | 2.545 |
(1+1) EA_>0 | 4.290 | 2.589 | 6.494 |
(1+1) fGA | 5.931 | 3.761 | 8.694 |
(1+10) EA_{r/2,2r} | 6.432 | 4.080 | 9.362 |
(1+10) EA_>0 | 5.913 | 3.736 | 8.721 |
(1+10) EA_logNormal | 5.944 | 3.725 | 8.758 |
(1+10) EA_normalized | 1.939 | 0.847 | 3.077 |
(1+10) EA_var_ctrl | 0.886 | -0.083 | 1.850 |
(30,30) vGA | -0.155 | -1.120 | 0.777 |
EDA | 0.679 | -0.277 | 1.625 |
gHC | -0.081 | -1.021 | 0.833 |
RLS | 0.420 | -0.518 | 1.337 |
mcmc_intervals(draws_theta) +
scale_y_discrete(labels=algorithms)+
labs(x=unname(TeX("Ability level ($\\theta$)")),
y='Algorithm',
title='Ability level parameter distribution (PBO)')
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
5.4.3 Item information
We will use the same functions from the BBOB case study
Creating a single data frame
<- NULL
item_information_df for(i in seq(1:length(benchmarks))){
<-as.matrix(fit_summary_a[i,c(3,6,7)])
a<-as.matrix(fit_summary_b[i,c(3,6,7)])
b<- item_info_with_intervals(a=a,b=b,item = i,thetamin = -7, thetamax = 5)
iinfo <- rbind(item_information_df,iinfo)
item_information_df }
Now we can create an information plot for every item
%>%
item_information_df pivot_wider(names_from = 'pars', values_from = 'Information') %>%
ggplot(aes(x=theta))+
geom_line(aes(y=median), color='black')+
# geom_line(aes(y=q05), color='red', linetype='dashed')+
# geom_line(aes(y=q95), color='blue', linetype='dashed')+
facet_wrap(~item,
ncol=4) +
labs(title='Item information curve (PBO)',
x=unname(TeX("Ability ($\\theta$)")),
y='Information',
color='Information interval')+
theme_bw() +
theme(legend.position = 'bottom')
5.4.4 Test information
We can also look at the test information. First, we need to pivot wider so we can sum the items
<- item_information_df %>%
test_information_df pivot_wider(names_from = 'item', values_from = 'Information') %>%
mutate(TestInfo = dplyr::select(., -theta, -pars) %>% rowSums()) %>%
::select(theta, pars, TestInfo) dplyr
Now that we have calculated the test parameters we can plot the test information
First let’s get a horizontal line to show where the algorithms median ability lies
<- fit_summary_theta %>%
alg_median mutate(Algorithm=algorithms) %>%
select(Algorithm, median)
%>%
test_information_df ::select(theta, pars, TestInfo) %>%
dplyrpivot_wider(names_from = 'pars', values_from = 'TestInfo') %>%
ggplot(aes(x=theta)) +
geom_line(aes(y=median))+
geom_vline(data=alg_median, aes(xintercept=median,color=Algorithm),linetype='dashed')+
labs(
title='Test Information Curve (PBO)',
x=unname(TeX("Ability ($\\theta$)")),
y='Test information',
color='Algorithm median'
+
)theme_bw()+
guides(color=guide_legend(nrow=5,byrow=TRUE))+
theme(legend.position = 'bottom')