Chapter 4 Case Study I: BBOB 2019

4.1 Importing the data

To illustrate and make the analysis, we will the number of dimensions equal to 5 (since the benchmark functions are all scalable). To do an analysis with different dimensions just change the code here

d_bbob <- read_csv('data/bbob2019.csv') %>% 
  select(algId, DIM, funcId, runs, succ, budget) %>% 
  filter(DIM==5) %>%  
  mutate(algId_index = as.integer(as.factor(algId)))

#vector with the names in order
benchmarks <- seq(1,24)
algorithms <- levels(as.factor(d_bbob$algId))

4.2 Preparing the Stan data

Creating a list for Stan.

bbob_standata <- list(
  N = nrow(d_bbob),
  y_succ = as.integer(d_bbob$succ),
  N_tries = as.integer(d_bbob$runs),
  p = d_bbob$algId_index,
  Np = as.integer(length(unique(d_bbob$algId_index))),
  item = as.integer(d_bbob$funcId),
  Nitem = as.integer(length(unique(d_bbob$funcId)))
)

Calling the model with cmdstanr

irt2pl <- cmdstan_model('models/irt2pl.stan') 
fit_bbob <- irt2pl$sample(
  data= bbob_standata,
  chains = 4,
  iter_sampling = 4000,
  parallel_chains = 4,
  max_treedepth = 15
)
fit_bbob$save_object(file='fitted/bbob5.RDS')

To load the fitted model (to save time in compiling this document)

fit_bbob<-readRDS('fitted/bbob5.RDS')

4.3 Diagnostics

Getting the draws from the posterior

draws_a <- fit_bbob$draws('a')
draws_b <- fit_bbob$draws('b')
draws_theta <- fit_bbob$draws('theta')

4.3.1 Traceplots

mcmc_trace(draws_a)

mcmc_trace(draws_b)

mcmc_trace(draws_theta)

4.3.2 Rhat and Effective samples

fit_bbob$summary(c('a','b', 'theta')) %>% 
  kable(caption='Summary values fit of the model, including effective samples and Rhat', 
      booktabs=T,
      digits =3,
      format='html') %>% 
  kable_styling() %>% 
  scroll_box()
Table 4.1: Summary values fit of the model, including effective samples and Rhat
variable mean median sd mad q5 q95 rhat ess_bulk ess_tail
a[1] 6.522 6.430 1.424 1.420 4.318 9.009 1.000 14280.165 11093.421
a[2] 4.324 4.250 1.138 1.115 2.598 6.321 1.000 12287.535 9783.007
a[3] 4.020 3.931 1.189 1.173 2.216 6.127 1.000 10109.111 9225.800
a[4] 2.414 2.328 0.859 0.852 1.181 3.964 1.000 9537.022 8863.253
a[5] 0.184 0.179 0.040 0.038 0.127 0.255 1.001 9960.494 11299.490
a[6] 1.692 1.565 0.762 0.714 0.691 3.139 1.000 9466.497 6907.590
a[7] 1.939 1.821 0.789 0.742 0.865 3.392 1.000 8753.252 6591.984
a[8] 4.318 4.244 1.128 1.095 2.607 6.312 1.000 12418.532 10295.961
a[9] 3.290 3.213 1.042 1.031 1.714 5.119 1.000 11748.731 8343.076
a[10] 3.266 3.192 1.056 1.042 1.665 5.118 1.000 9944.351 6922.556
a[11] 2.939 2.848 1.015 0.996 1.424 4.730 1.001 10865.103 7069.033
a[12] 1.685 1.554 0.764 0.726 0.693 3.123 1.000 9145.424 7175.492
a[13] 1.478 1.347 0.670 0.612 0.629 2.763 1.000 10919.706 8940.304
a[14] 1.478 1.343 0.676 0.606 0.641 2.775 1.001 11612.301 9517.923
a[15] 1.473 1.355 0.666 0.613 0.634 2.707 1.000 9236.735 7824.179
a[16] 1.482 1.347 0.670 0.608 0.641 2.758 1.000 9694.299 7818.154
a[17] 1.476 1.345 0.674 0.618 0.628 2.734 1.001 9164.008 8435.066
a[18] 1.484 1.356 0.666 0.609 0.640 2.761 1.000 10599.824 8511.088
a[19] 1.182 1.113 0.457 0.412 0.576 2.044 1.000 11309.124 8818.726
a[20] 1.388 1.275 0.624 0.582 0.598 2.572 1.000 9183.205 7985.132
a[21] 1.023 0.939 0.439 0.379 0.477 1.857 1.000 8203.122 8222.777
a[22] 0.670 0.632 0.247 0.221 0.347 1.115 1.001 7799.858 8398.404
a[23] 1.495 1.355 0.691 0.631 0.630 2.813 1.000 10373.600 8546.457
a[24] 1.469 1.340 0.675 0.608 0.620 2.766 1.000 10191.232 8757.702
b[1] 0.190 0.189 0.526 0.517 -0.675 1.049 1.003 1655.798 3234.633
b[2] 0.421 0.418 0.537 0.529 -0.458 1.301 1.003 1714.585 3269.607
b[3] 0.126 0.120 0.532 0.528 -0.750 0.999 1.003 1662.807 3212.317
b[4] 0.400 0.384 0.582 0.572 -0.532 1.366 1.003 1815.237 3702.849
b[5] -10.606 -10.553 1.755 1.745 -13.618 -7.825 1.000 9271.595 11117.350
b[6] 1.987 1.784 1.170 0.969 0.476 4.159 1.001 4259.516 6617.055
b[7] 0.788 0.734 0.711 0.633 -0.260 1.992 1.002 2240.195 4102.527
b[8] 0.421 0.420 0.536 0.526 -0.463 1.306 1.003 1742.331 3435.551
b[9] 0.678 0.657 0.590 0.561 -0.248 1.668 1.002 2018.667 3758.684
b[10] 0.690 0.667 0.600 0.572 -0.247 1.680 1.002 2031.685 3763.513
b[11] 0.824 0.788 0.651 0.591 -0.148 1.884 1.001 2192.201 3938.860
b[12] 1.993 1.802 1.164 0.998 0.492 4.163 1.001 4568.658 6773.007
b[13] 2.420 2.202 1.295 1.138 0.709 4.876 1.000 5568.265 7220.730
b[14] 2.413 2.204 1.276 1.138 0.713 4.800 1.001 5160.159 6932.234
b[15] 2.419 2.182 1.298 1.123 0.747 4.936 1.001 4644.611 7607.595
b[16] 2.400 2.195 1.266 1.129 0.717 4.822 1.001 5039.876 7317.296
b[17] 2.420 2.195 1.290 1.142 0.715 4.873 1.001 5288.295 7216.751
b[18] 2.401 2.191 1.280 1.132 0.719 4.779 1.001 5412.655 7130.342
b[19] 1.432 1.336 0.880 0.793 0.186 2.976 1.001 3445.220 5449.716
b[20] 2.406 2.178 1.298 1.140 0.714 4.874 1.001 5362.057 6661.986
b[21] 1.239 1.138 0.905 0.829 -0.056 2.847 1.001 3242.422 4953.712
b[22] 0.810 0.734 0.830 0.763 -0.397 2.233 1.002 3062.358 4872.259
b[23] 2.398 2.192 1.286 1.155 0.702 4.841 1.001 5267.369 6601.671
b[24] 2.441 2.218 1.307 1.153 0.721 4.945 1.001 5375.168 7470.779
theta[1] -0.691 -0.690 0.553 0.542 -1.604 0.212 1.003 1792.092 3591.987
theta[2] -0.024 -0.024 0.528 0.519 -0.892 0.838 1.003 1681.458 3361.608
theta[3] -0.255 -0.254 0.532 0.523 -1.131 0.624 1.003 1656.443 3263.409
theta[4] -3.989 -3.911 1.205 1.175 -6.100 -2.172 1.000 7015.079 9333.640
theta[5] -4.003 -3.904 1.230 1.185 -6.193 -2.156 1.000 7087.536 9941.552
theta[6] -3.993 -3.896 1.201 1.171 -6.094 -2.180 1.000 7278.511 9311.373
theta[7] -4.002 -3.912 1.229 1.200 -6.159 -2.152 1.000 7797.899 8536.771
theta[8] -1.284 -1.271 0.634 0.621 -2.345 -0.284 1.002 2255.589 4664.016
theta[9] 0.436 0.433 0.523 0.519 -0.430 1.292 1.003 1642.108 2997.515
theta[10] -0.359 -0.356 0.537 0.529 -1.250 0.518 1.003 1664.464 3065.114
theta[11] -0.359 -0.360 0.538 0.535 -1.243 0.521 1.003 1652.009 3192.851
theta[12] -0.232 -0.235 0.532 0.524 -1.110 0.644 1.003 1663.252 3203.973
theta[13] -4.002 -3.899 1.220 1.195 -6.193 -2.180 1.000 7255.309 8999.452

4.4 Results

Let’s get some summary descriptive statistics of the posterior

fit_summary_a_b <- fit_bbob$summary(c('a','b'))
fit_summary_a <- fit_bbob$summary(c('a'))
fit_summary_b <- fit_bbob$summary(c('b'))
fit_summary_theta <- fit_bbob$summary(c('theta'))

4.4.1 Difficulty and discrimination

Table for the benchmark functions

table_benchmarks <- fit_summary_a_b %>% 
  select('Benchmark ID'=variable, 
         Median=median,
         'CI 5%'=q5,
         'CI 95%'=q95)

table_benchmarks$'Benchmark ID'<-rep(benchmarks,2)

kable(table_benchmarks,
      caption='Summary values of the discrimination and difficulty level parameters for the BBOB benchmarks', 
      booktabs=T,
      digits =3,
      format='html',
      linesep = "") %>% 
  kable_styling() %>% 
  pack_rows("Discrimination value (a)",1,24) %>% 
  pack_rows("Difficulty level (b)",25,48)
Table 4.2: Summary values of the discrimination and difficulty level parameters for the BBOB benchmarks
Benchmark ID Median CI 5% CI 95%
Discrimination value (a)
1 6.430 4.318 9.009
2 4.250 2.598 6.321
3 3.931 2.216 6.127
4 2.328 1.181 3.964
5 0.179 0.127 0.255
6 1.565 0.691 3.139
7 1.821 0.865 3.392
8 4.244 2.607 6.312
9 3.213 1.714 5.119
10 3.192 1.665 5.118
11 2.848 1.424 4.730
12 1.554 0.693 3.123
13 1.347 0.629 2.763
14 1.343 0.641 2.775
15 1.355 0.634 2.707
16 1.347 0.641 2.758
17 1.345 0.628 2.734
18 1.356 0.640 2.761
19 1.113 0.576 2.044
20 1.275 0.598 2.572
21 0.939 0.477 1.857
22 0.632 0.347 1.115
23 1.355 0.630 2.813
24 1.340 0.620 2.766
Difficulty level (b)
1 0.189 -0.675 1.049
2 0.418 -0.458 1.301
3 0.120 -0.750 0.999
4 0.384 -0.532 1.366
5 -10.553 -13.618 -7.825
6 1.784 0.476 4.159
7 0.734 -0.260 1.992
8 0.420 -0.463 1.306
9 0.657 -0.248 1.668
10 0.667 -0.247 1.680
11 0.788 -0.148 1.884
12 1.802 0.492 4.163
13 2.202 0.709 4.876
14 2.204 0.713 4.800
15 2.182 0.747 4.936
16 2.195 0.717 4.822
17 2.195 0.715 4.873
18 2.191 0.719 4.779
19 1.336 0.186 2.976
20 2.178 0.714 4.874
21 1.138 -0.056 2.847
22 0.734 -0.397 2.233
23 2.192 0.702 4.841
24 2.218 0.721 4.945

A more visual representation.

mcmc_intervals(draws_a) +
  scale_y_discrete(labels=benchmarks)+
  labs(x='Discrimination parameter (a)',
       y='Benchmark function ID',
       title='Discrimination parameter distribution (BBOB)')
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.

mcmc_intervals(draws_b) +
  scale_y_discrete(labels=benchmarks)+
  labs(x='Difficulty level parameter (b)',
       y='Benchmark function ID',
       title='Difficulty level parameter distribution (BBOB)')
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.

4.4.2 Ability

Creating a table:

table_algorithms <- fit_summary_theta %>% 
  select(Algorithms=variable, 
         Median=median,
         'CI 5%'=q5,
         'CI 95%'=q95)

table_algorithms$Algorithms <- algorithms

kable(table_algorithms,
      caption='Summary values of the ability level of the algorithms (BBOB)',
      booktabs=T,
      digits =3,
      format='html',
      linesep = "") %>% 
  kable_styling() 
Table 4.3: Summary values of the ability level of the algorithms (BBOB)
Algorithms Median CI 5% CI 95%
adapt-Nelder-Mead-scipy-2019 -0.690 -1.604 0.212
Adaptive_Two_Mode -0.024 -0.892 0.838
BFGS-scipy-2019 -0.254 -1.131 0.624
CG-scipy-2019 -3.911 -6.100 -2.172
COBYLA-scipy-2019 -3.904 -6.193 -2.156
DE-scipy-2019 -3.896 -6.094 -2.180
L-BFGS-B-scipy-2019 -3.912 -6.159 -2.152
Nelder-Mead-scipy-2019 -1.271 -2.345 -0.284
Powell-scipy-2019 0.433 -0.430 1.292
RS-4-initIn0 -0.356 -1.250 0.518
RS-5-initIn0 -0.360 -1.243 0.521
RS-6-initIn0 -0.235 -1.110 0.644
TNC-scipy-2019 -3.899 -6.193 -2.180

A more visual representation.

mcmc_intervals(draws_theta) +
  scale_y_discrete(labels=algorithms)+
  labs(x=unname(TeX("Ability level ($\\theta$)")),
       y='Algorithm',
       title='Ability level parameter distribution (BBOB)')
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.

4.4.3 Item information

First let’s create a few helper functions to calculate the item information

p_info <- function(a,b, theta){
     return(exp(a*(theta-b))/(1+exp(a*(theta-b))))
  }
q_info <- function(a,b, theta){
    return(1-p_info(a,b, theta))
  }
#a and b are a vector of 3 a[1] is lower q05 a[2] is median and a[3] is q95
#return a data frame ready to be plottted
item_info_with_intervals <- function(a,b,item, thetamin=-5, thetamax=5,step=0.1){
  theta <- seq(from=thetamin, to=thetamax, by=step)
  info_median <- a[1]^2*p_info(a[1],b[1],theta)*q_info(a[1],b[1],theta)
  info_lower <- a[2]^2*p_info(a[2],b[2],theta)*q_info(a[2],b[2],theta)
  info_higher <- a[3]^2*p_info(a[3],b[3],theta)*q_info(a[3],b[3],theta)
  
  out<- data.frame(Information= c(info_lower,info_median,info_higher),
                   theta=c(theta,theta,theta),
                   pars=c(rep('q05',length(theta)),
                           rep('median',length(theta)),
                           rep('q95',length(theta))),
                   item=c(rep(item,length(theta)),
                          rep(item,length(theta)),
                          rep(item,length(theta))))
  return(out)
}

Creating a single data frame

item_information_df <- NULL
for(i in seq(1:length(benchmarks))){
  a<-as.matrix(fit_summary_a[i,c(3,6,7)])
  b<-as.matrix(fit_summary_b[i,c(3,6,7)])
  iinfo <- item_info_with_intervals(a=a,b=b,item = i,thetamin = -7, thetamax = 5)
  item_information_df <- rbind(item_information_df,iinfo)
}

Now we can create an information plot for every item

item_information_df %>% 
  pivot_wider(names_from = 'pars', values_from = 'Information') %>% 
  ggplot(aes(x=theta))+
    geom_line(aes(y=median), color='black')+
    facet_wrap(~item,
               ncol=4) +
    labs(title='Item information curve (BBOB)',
         x=unname(TeX("Ability ($\\theta$)")),
         y='Information',
         color='Information interval')+
    theme_bw() +
    theme(legend.position = 'bottom')

4.4.4 Test information

We can also look at the test information. First, we need to pivot wider so we can sum the items

test_information_df <- item_information_df %>% 
  pivot_wider(names_from = 'item', values_from = 'Information') %>% 
  mutate(TestInfo = dplyr::select(., -theta, -pars) %>% rowSums()) %>% 
  dplyr::select(theta, pars, TestInfo)

Now that we have calculated the test parameters we can plot the test information

First let’s get a horizontal line to show where the algorithms median ability lies

alg_median <- fit_summary_theta %>% 
  mutate(Algorithm=algorithms) %>% 
  select(Algorithm, median) 
test_information_df %>% 
  dplyr::select(theta, pars, TestInfo) %>% 
  pivot_wider(names_from = 'pars', values_from = 'TestInfo') %>% 
  ggplot(aes(x=theta)) +
  geom_line(aes(y=median))+
  geom_vline(data=alg_median, aes(xintercept=median,color=Algorithm),linetype='dashed')+
  labs(
    title='Test Information Curve (BBOB)',
    x=unname(TeX("Ability ($\\theta$)")),
    y='Test information',
    color='Algorithm median'
  )+
  theme_bw()+
  guides(color=guide_legend(nrow=5,byrow=TRUE))+
  theme(legend.position = 'bottom')