Chapter 6 Multiple-group comparison
We present here the Stan version of the BEST (Bayesian Estimation Supersedes the t Test) from John K. Kruschke. We will consider the following research question
- RQ5: Is there a difference in the time taken per function evaluation between the PSO, the RandomSearch1 and the Differential Evolution algorithms?
6.1 RQ5 Data preparation
We start importing the dataset
<- readr::read_csv('./data/statscomp.csv') dataset
Filtering the data that we want and applying some transformations
<- dataset %>%
d ::filter(
dplyr==TRUE &
OptimizationSuccessful=="PSO" | Algorithm=="RandomSearch1" | Algorithm=="DifferentialEvolution")) %>%
(Algorithm::select(Algorithm, CostFunction, TimeToComplete, simNumber, MaxFeval) %>%
dplyr::mutate(y=10000*TimeToComplete/MaxFeval,
dplyrCostFunctionID=create_index(CostFunction),
AlgorithmID=create_index(Algorithm)) %>%
::select(Algorithm, AlgorithmID, CostFunction, CostFunctionID, y,-simNumber, -MaxFeval)
dplyr
<-get_index_names_as_array(d$Algorithm)
algorithms<- get_index_names_as_array(d$CostFunction) bm
The data should look like this:
kable(dplyr::sample_n(d,size=10), "html",booktabs=T, format.args = list(scientific = FALSE), digits = 3) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed")) %>%
::scroll_box(width = "100%") kableExtra
Algorithm | AlgorithmID | CostFunction | CostFunctionID | y |
---|---|---|---|---|
DifferentialEvolution | 1 | WhitleyN6 | 28 | 2.950 |
DifferentialEvolution | 1 | ChenBird | 2 | 1.813 |
PSO | 2 | Trefethen | 25 | 0.541 |
DifferentialEvolution | 1 | WhitleyN6 | 28 | 2.957 |
RandomSearch1 | 3 | DiscusN2 | 6 | 0.517 |
PSO | 2 | RosenbrockRotatedN6 | 14 | 1.747 |
RandomSearch1 | 3 | Giunta | 8 | 0.379 |
RandomSearch1 | 3 | QingN2 | 13 | 0.190 |
RandomSearch1 | 3 | Schwefel2d4N6 | 20 | 0.436 |
RandomSearch1 | 3 | Damavandi | 5 | 0.360 |
Some initial visualizations in terms of box-plots
<-ggplot(d) +
p1geom_boxplot(aes(x=Algorithm, y=y))+
labs(y="Time to complete x10,000 (s)")
+ plot_annotation(title ="Box-plot of the runtime per function evaluation") p1
<- lm(y~Algorithm, data=d)
lmfit
<-ggplot()+
p2geom_qq(aes(sample=lmfit$residuals))+
geom_qq_line(aes(sample=lmfit$residuals))+
labs(x="Standard normal quantiles", y="Sample quantiles")
+ plot_annotation(title = "Q-Q plot for normality analysis") p2
Verifying that the log of the runtime still present high tailed distributions
<-d
d2$y <- log(d2$y)
d2<- lm(y~Algorithm, data=d2)
lmfit2 <-ggplot()+
p3geom_qq(aes(sample=lmfit2$residuals))+
geom_qq_line(aes(sample=lmfit2$residuals))+
labs(x="Standard normal quantiles", y="Sample quantiles")
+ plot_annotation(title = "Q-Q plot with the log of runtime") p2
6.2 RQ5 Stan model
The Stan model is specified in the file: './stanmodels/multiplegroups.stan'
. Note that at the end of the model we commented the generated quantities. This block generates the predictive posterior y_rep and the log likelihood, log_lik. These values are useful in diagnosing and validating the model but the end file is extremely large (~1Gb for 2000 iterations) and make many of the following calculations slow. If the reader wants to see these values is just to uncomment and run the stan model again.
print_stan_code('./stanmodels/multiplegroups.stan')
// Multiple group comparison
// Author: David Issa Mattos
// Date: 23 June 2020
//
//
data {
int <lower=1> N_total; // Sample size
real y[N_total]; // time to complete variable
//To model each algorithm independently
int <lower=1> N_algorithm; // Number of algorithms
int algorithm_id[N_total]; //vector that has the id of each algorithm
//To model the influence of each benchmark
int <lower=1> N_bm;
int bm_id[N_total];
}
parameters {
//Fixed effect
real a_alg[N_algorithm];//the mean effect given by the algorithms
real <lower=0> sigma[N_algorithm];//std for the student t
// //Random effect. The effect of the benchmarks
real a_bm_norm[N_bm];//the mean effect given by the base class type
real<lower=0> s;//std for the random effects
real<lower=0> nu;//std for the random effects
}
model {
real mu[N_total];
real sigma_i[N_total];
sigma ~ exponential(1);
nu ~ exponential(1.0/30.0);
//Fixed effect
a_alg ~ normal(0,1);
// //Random effects
s ~ exponential(1);
a_bm_norm ~ normal(0,1);
for (i in 1:N_total)
{
mu[i] = a_alg[algorithm_id[i]] + a_bm_norm[bm_id[i]]*s;
sigma_i[i] = sigma[algorithm_id[i]];
}
y ~ student_t(nu, mu, sigma_i);
}
//Uncoment this part to get the posterior predictives and the log likelihood
//But note that it takes a lot of space in the final model
// generated quantities{
// vector [N_total] y_rep;
// vector[N_total] log_lik;
// for (i in 1:N_total){
// real mu;
// real sigma_i;
// mu = a_alg[algorithm_id[i]] + a_bm_norm[bm_id[i]]*s;
// sigma_i = sigma[algorithm_id[i]];
// y_rep[i] = student_t_rng(nu,mu,sigma_i);
//
// //Log likelihood
// log_lik[i] = student_t_lpdf(y[i] | nu,mu,sigma_i);
//
// }
// }
Let’s compile and start sampling with the Stan function. In the data folder you can find the specific data used to fit the model after all transformations "./data/multiplegroup-data.RDS"
<- list(
standata N_total=nrow(d),
y = d$y,
N_algorithm = length(algorithms),
algorithm_id = d$AlgorithmID,
N_bm = length(bm),
bm_id = d$CostFunctionID
)saveRDS(standata, file = "./data/multiplegroups-data.RDS")
For computation time sake we are not running this chunk every time we compile this document. From now on we will load from the saved Stan fit object. However, when we change our model or the data we can just run this chunk separately. Here we increased the maxtreedepth and the number of iterations so we have a higher effective sample for inference. Both of these do not impact the validity of the chain just the computation efficiency.
<-readRDS("./data/multiplegroups-data.RDS")
standata<- stan(file = './stanmodels/multiplegroups.stan',
multiplegroup_fit data=standata,
chains = 4,
warmup = 400,
iter = 4000,
control = list(max_treedepth = 15))
saveRDS(multiplegroup_fit, file = "./data/multiplegroups-fit.RDS")
6.3 RQ5 Diagnosis
<- c("a_alg[1]",
a_alg_v "a_alg[2]",
"a_alg[3]")
<- c("sigma[1]",
sigma_v "sigma[2]",
"sigma[3]")
::traceplot(multiplegroup_fit, pars=a_alg_v) rstan
::traceplot(multiplegroup_fit, pars=sigma_v) rstan
::traceplot(multiplegroup_fit, pars=c('s', 'nu')) rstan
Another diagnosis is to look at the Rhat. If Rhat is greater than 1.05 it indicates a divergence in the chains (they did not mix well). The table below shows a summary of the sampling.
kable(summary(multiplegroup_fit)$summary, "html",) %>%
kable_styling(bootstrap_options = c('striped',"hover", "condensed" )) %>%
::scroll_box(width = "100%") kableExtra
mean | se_mean | sd | 2.5% | 25% | 50% | 75% | 97.5% | n_eff | Rhat | |
---|---|---|---|---|---|---|---|---|---|---|
a_alg[1] | 1.7811268 | 0.0026230 | 0.0606078 | 1.6619826 | 1.7403893 | 1.7820330 | 1.8215807 | 1.8982458 | 533.8817 | 1.006126 |
a_alg[2] | 0.5691139 | 0.0026209 | 0.0605588 | 0.4496716 | 0.5283954 | 0.5700587 | 0.6093643 | 0.6855433 | 533.8829 | 1.006094 |
a_alg[3] | 0.4423529 | 0.0026208 | 0.0605582 | 0.3233317 | 0.4013482 | 0.4432400 | 0.4826083 | 0.5580626 | 533.9264 | 1.006091 |
sigma[1] | 0.0877444 | 0.0000318 | 0.0017455 | 0.0843878 | 0.0865374 | 0.0877212 | 0.0889341 | 0.0911437 | 3011.1145 | 1.001623 |
sigma[2] | 0.0694441 | 0.0000277 | 0.0015140 | 0.0664710 | 0.0684330 | 0.0694499 | 0.0704463 | 0.0724822 | 2976.8074 | 1.000836 |
sigma[3] | 0.0368688 | 0.0000144 | 0.0008039 | 0.0352959 | 0.0363257 | 0.0368479 | 0.0374083 | 0.0384708 | 3121.5824 | 1.000719 |
a_bm_norm[1] | 0.7169454 | 0.0087107 | 0.2135654 | 0.2988931 | 0.5710651 | 0.7142151 | 0.8674880 | 1.1328629 | 601.1139 | 1.008025 |
a_bm_norm[2] | -0.1824269 | 0.0085491 | 0.1988784 | -0.5714764 | -0.3176881 | -0.1827604 | -0.0451221 | 0.1940826 | 541.1669 | 1.005750 |
a_bm_norm[3] | -0.5830931 | 0.0091159 | 0.2163121 | -1.0160424 | -0.7275807 | -0.5801526 | -0.4404438 | -0.1643768 | 563.0722 | 1.003927 |
a_bm_norm[4] | -0.6091954 | 0.0091666 | 0.2178293 | -1.0467759 | -0.7534750 | -0.6062762 | -0.4656718 | -0.1855039 | 564.6959 | 1.003882 |
a_bm_norm[5] | -0.2485564 | 0.0086165 | 0.2008707 | -0.6432752 | -0.3856466 | -0.2485221 | -0.1111202 | 0.1316417 | 543.4600 | 1.005561 |
a_bm_norm[6] | 0.4734111 | 0.0084824 | 0.2024756 | 0.0780666 | 0.3334823 | 0.4720095 | 0.6152515 | 0.8568556 | 569.7825 | 1.007887 |
a_bm_norm[7] | -0.6423130 | 0.0092312 | 0.2198717 | -1.0846795 | -0.7887968 | -0.6386941 | -0.4972334 | -0.2139691 | 567.3092 | 1.003799 |
a_bm_norm[8] | -0.1644817 | 0.0085326 | 0.1985217 | -0.5517190 | -0.2996106 | -0.1657384 | -0.0275923 | 0.2129775 | 541.3154 | 1.005963 |
a_bm_norm[9] | 2.1116300 | 0.0121812 | 0.3393819 | 1.4484849 | 1.8816608 | 2.1092589 | 2.3358436 | 2.7967160 | 776.2394 | 1.005745 |
a_bm_norm[10] | -0.1812781 | 0.0085416 | 0.1989285 | -0.5714155 | -0.3176169 | -0.1816472 | -0.0444326 | 0.1975518 | 542.3993 | 1.005821 |
a_bm_norm[11] | 0.9019238 | 0.0089863 | 0.2249558 | 0.4576177 | 0.7501431 | 0.8991386 | 1.0577316 | 1.3417398 | 626.6611 | 1.007809 |
a_bm_norm[12] | -0.7818005 | 0.0095262 | 0.2292960 | -1.2427092 | -0.9348107 | -0.7782093 | -0.6287008 | -0.3328971 | 579.3611 | 1.003212 |
a_bm_norm[13] | -0.6775106 | 0.0092827 | 0.2220651 | -1.1227791 | -0.8255683 | -0.6734795 | -0.5296425 | -0.2434466 | 572.2849 | 1.003527 |
a_bm_norm[14] | 1.7137941 | 0.0109291 | 0.2967903 | 1.1337453 | 1.5123317 | 1.7108835 | 1.9123530 | 2.3102016 | 737.4474 | 1.006425 |
a_bm_norm[15] | -0.2518458 | 0.0086224 | 0.2009569 | -0.6477380 | -0.3884656 | -0.2520031 | -0.1136934 | 0.1301903 | 543.1896 | 1.005519 |
a_bm_norm[16] | -0.6890408 | 0.0093176 | 0.2228297 | -1.1395233 | -0.8382348 | -0.6855899 | -0.5409571 | -0.2539657 | 571.9248 | 1.003543 |
a_bm_norm[17] | -0.2997380 | 0.0086702 | 0.2027220 | -0.6996320 | -0.4386068 | -0.2983614 | -0.1626570 | 0.0871212 | 546.6910 | 1.005343 |
a_bm_norm[18] | -0.2065718 | 0.0085791 | 0.1997227 | -0.5978602 | -0.3420286 | -0.2057953 | -0.0687583 | 0.1743771 | 541.9588 | 1.005684 |
a_bm_norm[19] | -0.1781351 | 0.0085411 | 0.1988323 | -0.5689114 | -0.3141829 | -0.1783365 | -0.0407893 | 0.1991033 | 541.9402 | 1.005823 |
a_bm_norm[20] | 0.0013479 | 0.0084299 | 0.1959243 | -0.3801857 | -0.1321950 | -0.0011718 | 0.1384331 | 0.3748182 | 540.1669 | 1.006703 |
a_bm_norm[21] | -0.0381568 | 0.0084334 | 0.1961037 | -0.4215247 | -0.1727083 | -0.0400992 | 0.0998043 | 0.3345669 | 540.7160 | 1.006430 |
a_bm_norm[22] | -0.2591012 | 0.0086473 | 0.2014404 | -0.6537203 | -0.3957154 | -0.2589614 | -0.1220667 | 0.1262415 | 542.6678 | 1.005472 |
a_bm_norm[23] | -0.7358550 | 0.0094254 | 0.2260798 | -1.1892931 | -0.8858307 | -0.7320868 | -0.5846833 | -0.2939862 | 575.3385 | 1.003330 |
a_bm_norm[24] | -0.8254234 | 0.0096072 | 0.2324392 | -1.2935835 | -0.9805076 | -0.8213645 | -0.6689679 | -0.3684101 | 585.3639 | 1.003067 |
a_bm_norm[25] | -0.3366898 | 0.0087284 | 0.2039811 | -0.7369234 | -0.4748569 | -0.3359966 | -0.1992994 | 0.0536531 | 546.1521 | 1.005150 |
a_bm_norm[26] | 0.3488281 | 0.0084028 | 0.1985740 | -0.0350128 | 0.2120805 | 0.3487199 | 0.4879435 | 0.7238281 | 558.4606 | 1.007803 |
a_bm_norm[27] | -0.7346498 | 0.0094058 | 0.2258222 | -1.1905483 | -0.8847851 | -0.7302798 | -0.5839647 | -0.2927947 | 576.4249 | 1.003359 |
a_bm_norm[28] | 3.6998131 | 0.0182556 | 0.5324392 | 2.6893552 | 3.3338793 | 3.6905820 | 4.0513884 | 4.7696181 | 850.6446 | 1.004008 |
a_bm_norm[29] | -0.4143554 | 0.0088376 | 0.2074474 | -0.8239239 | -0.5542043 | -0.4118008 | -0.2764703 | -0.0164877 | 550.9920 | 1.004714 |
a_bm_norm[30] | -0.1770084 | 0.0085576 | 0.1989495 | -0.5672901 | -0.3128015 | -0.1772755 | -0.0397254 | 0.2004904 | 540.4818 | 1.005940 |
s | 0.3072466 | 0.0014798 | 0.0441124 | 0.2352709 | 0.2768578 | 0.3021747 | 0.3322788 | 0.4069552 | 888.6323 | 1.001588 |
nu | 2.7532026 | 0.0015421 | 0.0802559 | 2.6021136 | 2.6972495 | 2.7520588 | 2.8075206 | 2.9112477 | 2708.6351 | 1.001055 |
lp__ | 14070.2753067 | 0.1671716 | 5.9172571 | 14057.6082046 | 14066.4863632 | 14070.6208015 | 14074.4422447 | 14080.7870357 | 1252.8982 | 1.000465 |
6.4 RQ5 Results and Plots
First lets get the HPDI of every parameter.
Then we restrict to the algorithms, them to the slopes, then to the parameter s
<- get_HPDI_from_stanfit(multiplegroup_fit)
hpdi
<- hpdi %>%
hpdi_algorithm ::filter(str_detect(Parameter, "a_alg\\[")) %>%
dplyr::mutate(Parameter=algorithms) #Changing to the algorithms labels
dplyr
<- hpdi %>%
hpdi_sigma::filter(str_detect(Parameter, "sigma\\[")) %>%
dplyr::mutate(Parameter=algorithms) #Changing to the algorithms labels
dplyr
<- hpdi %>%
hpdi_s ::filter(Parameter=='s')
dplyr
<- hpdi %>%
hpdi_nu ::filter(Parameter=='nu')
dplyr
<- hpdi %>%
hpdi_nu_s ::filter(Parameter=='nu' | Parameter=='s')
dplyr
<-ggplot(data=hpdi_algorithm, aes(x=Parameter))+
p_alggeom_pointrange(aes(
ymin=HPDI.lower,
ymax=HPDI.higher,
y=Mean))+
labs(y="a_alg", x="Algorithm")+
theme(axis.title.x= element_blank())+
coord_flip()
+ plot_annotation(title = 'HPDI interval for the algorithms') p_alg
<-ggplot(data=hpdi_sigma, aes(x=Parameter))+
p_sigmageom_pointrange(aes(
ymin=HPDI.lower,
ymax=HPDI.higher,
y=Mean))+
labs(y="sigma", x="Algorithm")+
theme(axis.title.x= element_blank())+
coord_flip()
+ plot_annotation(title = 'HPDI interval for sigma') p_sigma
<- ggplot(data=hpdi_s, aes(x=Parameter))+
p_s geom_pointrange(aes(
ymin=HPDI.lower,
ymax=HPDI.higher,
y=Mean))+
labs(y="s", x="Parameter")+
coord_flip()
+ plot_annotation(title = 'HPDI interval std of the benchmarks') p_s
<- ggplot(data=hpdi_nu, aes(x=Parameter))+
p_nu geom_pointrange(aes(
ymin=HPDI.lower,
ymax=HPDI.higher,
y=Mean))+
labs(y="nu", x="Parameter")+
coord_flip()
+ plot_annotation(title = 'HPDI interval of the degree of freedom') p_nu
<- ggplot(data=hpdi_nu_s, aes(x=Parameter))+
p_nu_s geom_pointrange(aes(
ymin=HPDI.lower,
ymax=HPDI.higher,
y=Mean))+
labs(y="Estimate of s and nu", x="Parameter")+
theme(axis.title.x= element_blank())+
coord_flip()
+ plot_annotation(title = 'HPDI interval') p_nu_s
Now lets get a posterior distribution of the difference
<- rstan::extract(multiplegroup_fit)
posterior <- as_tibble(posterior$a_alg)
a_alg colnames(a_alg) <- algorithms
<- dplyr::sample_n(a_alg, size=1000, replace=T) %>%
sample_a_alg ::mutate(PSO_Random = PSO-RandomSearch1,
dplyrDE_PSO= DifferentialEvolution-PSO,
DE_Random = DifferentialEvolution-RandomSearch1) %>%
::select(-DifferentialEvolution,-PSO,-RandomSearch1)
dplyr
#Getting HPDI from a data frame and creating a table instead of plotting...
<-HDInterval::hdi(sample_a_alg,credMass=0.95)
hpdi_diff<-hpdi_diff %>% as_tibble(rownames = "Metric") %>%
hpdi_diff::add_row(Metric="Mean", PSO_Random=mean(sample_a_alg$PSO_Random), DE_PSO=mean(sample_a_alg$DE_PSO), DE_Random=mean(sample_a_alg$DE_Random)) %>%
tibble::pivot_longer(cols=-Metric, names_to="AlgorithmDifference", values_to='values') %>%
tidyr::pivot_wider(names_from =Metric , values_from=values) %>%
tidyr::mutate(Difference=c('PSO - RandomSearch', 'DiffEvolution - PSO', 'DiffEvolution - RandomSearch')) %>%
dplyr::select(Difference, Lower=lower, Mean, Upper=upper)
dplyr
kable(hpdi_diff, booktabs=T, format.args = list(scientific = FALSE), digits = 3) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed")) %>%
::scroll_box(width = "100%") kableExtra
Difference | Lower | Mean | Upper |
---|---|---|---|
PSO - RandomSearch | 0.123 | 0.127 | 0.130 |
DiffEvolution - PSO | 1.207 | 1.212 | 1.217 |
DiffEvolution - RandomSearch | 1.334 | 1.339 | 1.343 |
Creating an output table
<- c(
rename_pars paste(rep('a_',length(algorithms)),algorithms, sep = ""),
paste(rep('sigma_',length(algorithms)),algorithms, sep = ""),
's',
'nu')
<-create_table_model(multiplegroup_fit, pars=c(a_alg_v, sigma_v, 's','nu'),rename_pars)
tcolnames(t)<-c("Parameter", "Mean", "HPD low", "HPD high")
saveRDS(t,'./statscomp-paper/tables/datafortables/multiplegroupsdifference-par-table.RDS')