Chapter 6 Multiple-group comparison

We present here the Stan version of the BEST (Bayesian Estimation Supersedes the t Test) from John K. Kruschke. We will consider the following research question

  • RQ5: Is there a difference in the time taken per function evaluation between the PSO, the RandomSearch1 and the Differential Evolution algorithms?

6.1 RQ5 Data preparation

We start importing the dataset

dataset <- readr::read_csv('./data/statscomp.csv')

Filtering the data that we want and applying some transformations

d <- dataset %>% 
  dplyr::filter(
    OptimizationSuccessful==TRUE &
    (Algorithm=="PSO" | Algorithm=="RandomSearch1" | Algorithm=="DifferentialEvolution")) %>% 
  dplyr::select(Algorithm, CostFunction, TimeToComplete, simNumber, MaxFeval) %>% 
  dplyr::mutate(y=10000*TimeToComplete/MaxFeval,
                CostFunctionID=create_index(CostFunction),
                AlgorithmID=create_index(Algorithm)) %>% 
  dplyr::select(Algorithm, AlgorithmID, CostFunction, CostFunctionID, y,-simNumber, -MaxFeval)

algorithms<-get_index_names_as_array(d$Algorithm)
bm <- get_index_names_as_array(d$CostFunction)

The data should look like this:

kable(dplyr::sample_n(d,size=10), "html",booktabs=T, format.args = list(scientific = FALSE), digits = 3) %>% 
  kable_styling(bootstrap_options = c("striped", "hover", "condensed")) %>% 
  kableExtra::scroll_box(width = "100%")
Algorithm AlgorithmID CostFunction CostFunctionID y
DifferentialEvolution 1 WhitleyN6 28 2.950
DifferentialEvolution 1 ChenBird 2 1.813
PSO 2 Trefethen 25 0.541
DifferentialEvolution 1 WhitleyN6 28 2.957
RandomSearch1 3 DiscusN2 6 0.517
PSO 2 RosenbrockRotatedN6 14 1.747
RandomSearch1 3 Giunta 8 0.379
RandomSearch1 3 QingN2 13 0.190
RandomSearch1 3 Schwefel2d4N6 20 0.436
RandomSearch1 3 Damavandi 5 0.360

Some initial visualizations in terms of box-plots

p1<-ggplot(d) +
  geom_boxplot(aes(x=Algorithm, y=y))+
  labs(y="Time to complete x10,000 (s)")
p1 + plot_annotation(title ="Box-plot of the runtime per function evaluation")

lmfit <- lm(y~Algorithm, data=d)

p2<-ggplot()+
  geom_qq(aes(sample=lmfit$residuals))+
  geom_qq_line(aes(sample=lmfit$residuals))+
  labs(x="Standard normal quantiles", y="Sample quantiles")
p2 + plot_annotation(title = "Q-Q plot for normality analysis")

Verifying that the log of the runtime still present high tailed distributions

d2<-d
d2$y <- log(d2$y)
lmfit2 <- lm(y~Algorithm, data=d2)
p3<-ggplot()+
  geom_qq(aes(sample=lmfit2$residuals))+
  geom_qq_line(aes(sample=lmfit2$residuals))+
  labs(x="Standard normal quantiles", y="Sample quantiles")
p2 + plot_annotation(title = "Q-Q plot with the log of runtime")

6.2 RQ5 Stan model

The Stan model is specified in the file: './stanmodels/multiplegroups.stan'. Note that at the end of the model we commented the generated quantities. This block generates the predictive posterior y_rep and the log likelihood, log_lik. These values are useful in diagnosing and validating the model but the end file is extremely large (~1Gb for 2000 iterations) and make many of the following calculations slow. If the reader wants to see these values is just to uncomment and run the stan model again.

print_stan_code('./stanmodels/multiplegroups.stan')
// Multiple group comparison
// Author: David Issa Mattos
// Date: 23 June 2020
//
//

data {
 int <lower=1> N_total; // Sample size
 real y[N_total]; // time to complete variable

 //To model each algorithm independently
 int <lower=1> N_algorithm; // Number of algorithms
 int algorithm_id[N_total]; //vector that has the id of each algorithm
 
 //To model the influence of each benchmark
 int <lower=1> N_bm;
 int bm_id[N_total];
}

parameters {
  //Fixed effect
  real a_alg[N_algorithm];//the mean effect given by the algorithms
  real <lower=0> sigma[N_algorithm];//std for the student t

  // //Random effect. The effect of the benchmarks
  real a_bm_norm[N_bm];//the mean effect given by the base class type
  real<lower=0> s;//std for the random effects
  
  real<lower=0> nu;//std for the random effects
  
}

model {
  real mu[N_total];
  real sigma_i[N_total];

  sigma ~ exponential(1);
  nu ~ exponential(1.0/30.0);
  
  //Fixed effect
  a_alg ~ normal(0,1);
  
  // //Random effects
  s ~ exponential(1);
  a_bm_norm ~ normal(0,1);

  for (i in 1:N_total)
  {
    
     mu[i] = a_alg[algorithm_id[i]] + a_bm_norm[bm_id[i]]*s;
     sigma_i[i] = sigma[algorithm_id[i]];
  }
  
  y ~ student_t(nu, mu, sigma_i);
}

//Uncoment this part to get the posterior predictives and the log likelihood
//But note that it takes a lot of space in the final model
// generated quantities{
//   vector [N_total] y_rep;
//   vector[N_total] log_lik;
//   for (i in 1:N_total){
//     real mu;
//     real sigma_i;
//     mu = a_alg[algorithm_id[i]] + a_bm_norm[bm_id[i]]*s;
//     sigma_i = sigma[algorithm_id[i]];
//     y_rep[i] = student_t_rng(nu,mu,sigma_i);
//     
//     //Log likelihood
//     log_lik[i] = student_t_lpdf(y[i] | nu,mu,sigma_i);
//     
//   }
// }

Let’s compile and start sampling with the Stan function. In the data folder you can find the specific data used to fit the model after all transformations "./data/multiplegroup-data.RDS"

standata <- list(
  N_total=nrow(d),
  y = d$y,
  N_algorithm = length(algorithms),
  algorithm_id = d$AlgorithmID,
  N_bm = length(bm),
  bm_id = d$CostFunctionID
  )
saveRDS(standata, file = "./data/multiplegroups-data.RDS")

For computation time sake we are not running this chunk every time we compile this document. From now on we will load from the saved Stan fit object. However, when we change our model or the data we can just run this chunk separately. Here we increased the maxtreedepth and the number of iterations so we have a higher effective sample for inference. Both of these do not impact the validity of the chain just the computation efficiency.

standata<-readRDS("./data/multiplegroups-data.RDS")
multiplegroup_fit <- stan(file = './stanmodels/multiplegroups.stan',
                     data=standata,
                     chains = 4,
                     warmup = 400,
                     iter = 4000,
                     control = list(max_treedepth = 15))
saveRDS(multiplegroup_fit, file = "./data/multiplegroups-fit.RDS")

6.3 RQ5 Diagnosis

a_alg_v <- c("a_alg[1]",
           "a_alg[2]",
           "a_alg[3]")

sigma_v <- c("sigma[1]",
           "sigma[2]",
           "sigma[3]")
rstan::traceplot(multiplegroup_fit, pars=a_alg_v)

rstan::traceplot(multiplegroup_fit, pars=sigma_v)

rstan::traceplot(multiplegroup_fit, pars=c('s', 'nu'))

Another diagnosis is to look at the Rhat. If Rhat is greater than 1.05 it indicates a divergence in the chains (they did not mix well). The table below shows a summary of the sampling.

kable(summary(multiplegroup_fit)$summary, "html",) %>% 
  kable_styling(bootstrap_options = c('striped',"hover", "condensed" )) %>% 
  kableExtra::scroll_box(width = "100%")
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
a_alg[1] 1.7811268 0.0026230 0.0606078 1.6619826 1.7403893 1.7820330 1.8215807 1.8982458 533.8817 1.006126
a_alg[2] 0.5691139 0.0026209 0.0605588 0.4496716 0.5283954 0.5700587 0.6093643 0.6855433 533.8829 1.006094
a_alg[3] 0.4423529 0.0026208 0.0605582 0.3233317 0.4013482 0.4432400 0.4826083 0.5580626 533.9264 1.006091
sigma[1] 0.0877444 0.0000318 0.0017455 0.0843878 0.0865374 0.0877212 0.0889341 0.0911437 3011.1145 1.001623
sigma[2] 0.0694441 0.0000277 0.0015140 0.0664710 0.0684330 0.0694499 0.0704463 0.0724822 2976.8074 1.000836
sigma[3] 0.0368688 0.0000144 0.0008039 0.0352959 0.0363257 0.0368479 0.0374083 0.0384708 3121.5824 1.000719
a_bm_norm[1] 0.7169454 0.0087107 0.2135654 0.2988931 0.5710651 0.7142151 0.8674880 1.1328629 601.1139 1.008025
a_bm_norm[2] -0.1824269 0.0085491 0.1988784 -0.5714764 -0.3176881 -0.1827604 -0.0451221 0.1940826 541.1669 1.005750
a_bm_norm[3] -0.5830931 0.0091159 0.2163121 -1.0160424 -0.7275807 -0.5801526 -0.4404438 -0.1643768 563.0722 1.003927
a_bm_norm[4] -0.6091954 0.0091666 0.2178293 -1.0467759 -0.7534750 -0.6062762 -0.4656718 -0.1855039 564.6959 1.003882
a_bm_norm[5] -0.2485564 0.0086165 0.2008707 -0.6432752 -0.3856466 -0.2485221 -0.1111202 0.1316417 543.4600 1.005561
a_bm_norm[6] 0.4734111 0.0084824 0.2024756 0.0780666 0.3334823 0.4720095 0.6152515 0.8568556 569.7825 1.007887
a_bm_norm[7] -0.6423130 0.0092312 0.2198717 -1.0846795 -0.7887968 -0.6386941 -0.4972334 -0.2139691 567.3092 1.003799
a_bm_norm[8] -0.1644817 0.0085326 0.1985217 -0.5517190 -0.2996106 -0.1657384 -0.0275923 0.2129775 541.3154 1.005963
a_bm_norm[9] 2.1116300 0.0121812 0.3393819 1.4484849 1.8816608 2.1092589 2.3358436 2.7967160 776.2394 1.005745
a_bm_norm[10] -0.1812781 0.0085416 0.1989285 -0.5714155 -0.3176169 -0.1816472 -0.0444326 0.1975518 542.3993 1.005821
a_bm_norm[11] 0.9019238 0.0089863 0.2249558 0.4576177 0.7501431 0.8991386 1.0577316 1.3417398 626.6611 1.007809
a_bm_norm[12] -0.7818005 0.0095262 0.2292960 -1.2427092 -0.9348107 -0.7782093 -0.6287008 -0.3328971 579.3611 1.003212
a_bm_norm[13] -0.6775106 0.0092827 0.2220651 -1.1227791 -0.8255683 -0.6734795 -0.5296425 -0.2434466 572.2849 1.003527
a_bm_norm[14] 1.7137941 0.0109291 0.2967903 1.1337453 1.5123317 1.7108835 1.9123530 2.3102016 737.4474 1.006425
a_bm_norm[15] -0.2518458 0.0086224 0.2009569 -0.6477380 -0.3884656 -0.2520031 -0.1136934 0.1301903 543.1896 1.005519
a_bm_norm[16] -0.6890408 0.0093176 0.2228297 -1.1395233 -0.8382348 -0.6855899 -0.5409571 -0.2539657 571.9248 1.003543
a_bm_norm[17] -0.2997380 0.0086702 0.2027220 -0.6996320 -0.4386068 -0.2983614 -0.1626570 0.0871212 546.6910 1.005343
a_bm_norm[18] -0.2065718 0.0085791 0.1997227 -0.5978602 -0.3420286 -0.2057953 -0.0687583 0.1743771 541.9588 1.005684
a_bm_norm[19] -0.1781351 0.0085411 0.1988323 -0.5689114 -0.3141829 -0.1783365 -0.0407893 0.1991033 541.9402 1.005823
a_bm_norm[20] 0.0013479 0.0084299 0.1959243 -0.3801857 -0.1321950 -0.0011718 0.1384331 0.3748182 540.1669 1.006703
a_bm_norm[21] -0.0381568 0.0084334 0.1961037 -0.4215247 -0.1727083 -0.0400992 0.0998043 0.3345669 540.7160 1.006430
a_bm_norm[22] -0.2591012 0.0086473 0.2014404 -0.6537203 -0.3957154 -0.2589614 -0.1220667 0.1262415 542.6678 1.005472
a_bm_norm[23] -0.7358550 0.0094254 0.2260798 -1.1892931 -0.8858307 -0.7320868 -0.5846833 -0.2939862 575.3385 1.003330
a_bm_norm[24] -0.8254234 0.0096072 0.2324392 -1.2935835 -0.9805076 -0.8213645 -0.6689679 -0.3684101 585.3639 1.003067
a_bm_norm[25] -0.3366898 0.0087284 0.2039811 -0.7369234 -0.4748569 -0.3359966 -0.1992994 0.0536531 546.1521 1.005150
a_bm_norm[26] 0.3488281 0.0084028 0.1985740 -0.0350128 0.2120805 0.3487199 0.4879435 0.7238281 558.4606 1.007803
a_bm_norm[27] -0.7346498 0.0094058 0.2258222 -1.1905483 -0.8847851 -0.7302798 -0.5839647 -0.2927947 576.4249 1.003359
a_bm_norm[28] 3.6998131 0.0182556 0.5324392 2.6893552 3.3338793 3.6905820 4.0513884 4.7696181 850.6446 1.004008
a_bm_norm[29] -0.4143554 0.0088376 0.2074474 -0.8239239 -0.5542043 -0.4118008 -0.2764703 -0.0164877 550.9920 1.004714
a_bm_norm[30] -0.1770084 0.0085576 0.1989495 -0.5672901 -0.3128015 -0.1772755 -0.0397254 0.2004904 540.4818 1.005940
s 0.3072466 0.0014798 0.0441124 0.2352709 0.2768578 0.3021747 0.3322788 0.4069552 888.6323 1.001588
nu 2.7532026 0.0015421 0.0802559 2.6021136 2.6972495 2.7520588 2.8075206 2.9112477 2708.6351 1.001055
lp__ 14070.2753067 0.1671716 5.9172571 14057.6082046 14066.4863632 14070.6208015 14074.4422447 14080.7870357 1252.8982 1.000465

6.4 RQ5 Results and Plots

First lets get the HPDI of every parameter.

Then we restrict to the algorithms, them to the slopes, then to the parameter s

hpdi <- get_HPDI_from_stanfit(multiplegroup_fit)

hpdi_algorithm <- hpdi %>% 
      dplyr::filter(str_detect(Parameter, "a_alg\\[")) %>%
      dplyr::mutate(Parameter=algorithms) #Changing to the algorithms labels

hpdi_sigma<- hpdi %>% 
      dplyr::filter(str_detect(Parameter, "sigma\\[")) %>%
      dplyr::mutate(Parameter=algorithms) #Changing to the algorithms labels

hpdi_s <- hpdi %>% 
      dplyr::filter(Parameter=='s')


hpdi_nu <- hpdi %>% 
      dplyr::filter(Parameter=='nu')


hpdi_nu_s <- hpdi %>% 
      dplyr::filter(Parameter=='nu' | Parameter=='s')

p_alg<-ggplot(data=hpdi_algorithm, aes(x=Parameter))+
  geom_pointrange(aes(
    ymin=HPDI.lower, 
    ymax=HPDI.higher, 
    y=Mean))+
  labs(y="a_alg", x="Algorithm")+
  theme(axis.title.x= element_blank())+
  coord_flip()
p_alg + plot_annotation(title = 'HPDI interval for the algorithms')

p_sigma<-ggplot(data=hpdi_sigma, aes(x=Parameter))+
  geom_pointrange(aes(
    ymin=HPDI.lower, 
    ymax=HPDI.higher, 
    y=Mean))+
  labs(y="sigma", x="Algorithm")+
  theme(axis.title.x= element_blank())+
  coord_flip()
p_sigma + plot_annotation(title = 'HPDI interval for sigma')

p_s <- ggplot(data=hpdi_s, aes(x=Parameter))+
  geom_pointrange(aes(
    ymin=HPDI.lower, 
    ymax=HPDI.higher, 
    y=Mean))+
  labs(y="s", x="Parameter")+
  coord_flip()
p_s + plot_annotation(title = 'HPDI interval std of the benchmarks')

p_nu <- ggplot(data=hpdi_nu, aes(x=Parameter))+
  geom_pointrange(aes(
    ymin=HPDI.lower, 
    ymax=HPDI.higher, 
    y=Mean))+
  labs(y="nu", x="Parameter")+
  coord_flip()
p_nu + plot_annotation(title = 'HPDI interval of the degree of freedom')

p_nu_s <- ggplot(data=hpdi_nu_s, aes(x=Parameter))+
  geom_pointrange(aes(
    ymin=HPDI.lower, 
    ymax=HPDI.higher, 
    y=Mean))+
  labs(y="Estimate of s and nu", x="Parameter")+
  theme(axis.title.x= element_blank())+
  coord_flip()

p_nu_s + plot_annotation(title = 'HPDI interval')

Now lets get a posterior distribution of the difference

posterior <- rstan::extract(multiplegroup_fit)
a_alg <- as_tibble(posterior$a_alg)
colnames(a_alg) <- algorithms
sample_a_alg <- dplyr::sample_n(a_alg, size=1000, replace=T) %>% 
  dplyr::mutate(PSO_Random = PSO-RandomSearch1,
                DE_PSO= DifferentialEvolution-PSO,
                DE_Random = DifferentialEvolution-RandomSearch1) %>% 
  dplyr::select(-DifferentialEvolution,-PSO,-RandomSearch1)

#Getting HPDI from a data frame and creating a table instead of plotting...
hpdi_diff<-HDInterval::hdi(sample_a_alg,credMass=0.95)
hpdi_diff<-hpdi_diff %>% as_tibble(rownames = "Metric") %>% 
  tibble::add_row(Metric="Mean", PSO_Random=mean(sample_a_alg$PSO_Random), DE_PSO=mean(sample_a_alg$DE_PSO), DE_Random=mean(sample_a_alg$DE_Random)) %>%
  tidyr::pivot_longer(cols=-Metric, names_to="AlgorithmDifference", values_to='values') %>% 
  tidyr::pivot_wider(names_from =Metric , values_from=values) %>% 
  dplyr::mutate(Difference=c('PSO - RandomSearch', 'DiffEvolution - PSO', 'DiffEvolution - RandomSearch')) %>% 
  dplyr::select(Difference, Lower=lower, Mean, Upper=upper)

kable(hpdi_diff, booktabs=T, format.args = list(scientific = FALSE), digits = 3) %>% 
  kable_styling(bootstrap_options = c("striped", "hover", "condensed")) %>% 
  kableExtra::scroll_box(width = "100%")
Difference Lower Mean Upper
PSO - RandomSearch 0.123 0.127 0.130
DiffEvolution - PSO 1.207 1.212 1.217
DiffEvolution - RandomSearch 1.334 1.339 1.343

Creating an output table

rename_pars <- c(
  paste(rep('a_',length(algorithms)),algorithms, sep = ""),
  paste(rep('sigma_',length(algorithms)),algorithms, sep = ""),
  's',
  'nu')
t<-create_table_model(multiplegroup_fit, pars=c(a_alg_v, sigma_v, 's','nu'),rename_pars)
colnames(t)<-c("Parameter", "Mean", "HPD low", "HPD high")
saveRDS(t,'./statscomp-paper/tables/datafortables/multiplegroupsdifference-par-table.RDS')