Chapter 2 Probability of success model | Online Appendix for: Statistical Models for the Analysis of Optimization Algorithms with Benchmark Functions

2.1 RQ1 Data preparation

We start importing the dataset

dataset <- readr::read_csv('./data/statscomp.csv')

Let’s select only the columns that interests us. Note we use "" to select some of the columns because they have “-” in the column name

dataset<-dplyr::select(dataset, Algorithm, CostFunction, SD, MaxFevalPerDimensions, simNumber, SolveAt1, "SolveAt1e-1","SolveAt1e-3","SolveAt1e-6", OptimizationSuccessful)

Let’s do some basic transformation

1 - We select only the cases where the optimization completed 2 - We convert True to 1 and 0 to false 3 - We group by the algorithms, functions, SD, and budget so we can summarize and create aggregated data 4 - We create an index of each algorithm and the cost functions. This is basically creating a map of NelderMead=1, PSO=2 etc… This makes things easier to work in Stan. For that we use the function create_index from the utils.R file
5 - We drop the columns we wont use 6 - Get an array with the names of the benchmark functions and the algorithms (to create nicer plots later with lengend)

Since we are only looking at 1e-1 for the precision we comment the other lines

d <- dataset %>% 
  dplyr::filter(OptimizationSuccessful==TRUE) %>%
  dplyr::mutate(
      solvedAt1e1=as.integer(dataset$"SolveAt1e-1"),
      budget=MaxFevalPerDimensions)  %>% 
  dplyr::group_by(Algorithm, CostFunction, SD, budget) %>% 
    dplyr::summarize(
      solvedAt1e1=sum(solvedAt1e1),
      N=n()) %>% 
  dplyr::ungroup() %>%
  dplyr::mutate(AlgorithmID=create_index(Algorithm),
                CostFunctionID=create_index(CostFunction)) %>% 
    dplyr::select(Algorithm,AlgorithmID, CostFunction, CostFunctionID, SD, budget, N,
                  y=solvedAt1e1,
                  )

#List of algorithms
bm <- get_index_names_as_array(d$CostFunction)
algorithms <- get_index_names_as_array(d$Algorithm)

Lets preview a sample of the data set

kable(dplyr::sample_n(d,size=10),"html", booktabs=T, format.args = list(scientific = FALSE), digits = 3) %>% 
  kable_styling(bootstrap_options = c("striped", "hover", "condensed")) %>% 
  kableExtra::scroll_box(width = "100%")

Algorithm	AlgorithmID	CostFunction	CostFunctionID	SD	budget	N	y
RandomSearch2	7	ChenV	3	3	20	10	0
PSO	5	WhitleyN6	28	0	20	10	0
SimulatedAnnealing	8	Schwefel2d26N6	19	3	100000	10	0
NelderMead	4	LunacekBiRastriginN6	9	3	1000	10	0
PSO	5	StrechedVSineWave2N	23	3	10000	10	0
PSO	5	Schwefel2d21N6	17	3	100	10	0
CuckooSearch	2	ZakharovN2	30	3	1000	10	0
DifferentialEvolution	3	StrechedVSineWave2N	23	0	10000	10	10
DifferentialEvolution	3	Schwefel2d21N6	17	0	100000	10	10
CMAES	1	Giunta	8	3	1000	10	1

2.2 RQ1 Stan model

The Stan model is specified in the file: './stanmodels/probsuccess.stan'. Note that at the end of the model we commented the generated quantities. This block generates the predictive posterior y_rep and the log likelihood, log_lik. These values are useful in diagnosing and validating the model but the end file is extremely large (~1Gb for 2000 iterations) and make many of the following calculations slow. If the reader wants to see these values is just to uncomment and run the stan model again

print_stan_code('./stanmodels/probsuccess.stan')

// Probability of success model
// Author: David Issa Mattos
// Date: 16 June 2020
//
//

data {
 int <lower=1> N_total; // Sample size
 int y[N_total]; // Result of the binomial
 int N_draw[N_total]; // Number of draws in the binomial
 
 real x_noise[N_total];//predictor for noise
 
 //To model each algorithm independently
 int <lower=1> N_algorithm; // Number of algorithms
 int algorithm_id[N_total]; //vector that has the id of each algorithm
 
 //To model the influence of each benchmark
 int <lower=1> N_bm;
 int bm_id[N_total];
}

parameters {
  //Fixed effect
  real a_alg[N_algorithm];//the mean effect given by the algorithms
  real b_noise[N_algorithm];//slope for the noise

  // //Random effect. The effect of the benchmarks
  real a_bm_norm[N_bm];//the mean effect given by the base class type
  real<lower=0> s;//std for the random effects
  
}

model {
  real p[N_total];
  
  //Fixed effect
  a_alg ~ normal(0,5);
  b_noise ~ normal(0,5);

  // //Random effects
  s ~ exponential(0.1);
  a_bm_norm ~ normal(0,1);

  for (i in 1:N_total)
  {
    
     p[i] = a_alg[algorithm_id[i]]+ a_bm_norm[bm_id[i]]*s + b_noise[algorithm_id[i]] * x_noise[i];
  }
  
  //Equivalent to: y~binomial(N, inverse_logit(a+bx=alpha))
  y ~ binomial_logit(N_draw,p);
}

//Uncoment this part to get the posterior predictives and the log likelihood
//But note that it takes a lot of space in the final model
// generated quantities{
//   vector [N_total] y_rep;
//   vector[N_total] log_lik;
//   
//   for(i in 1:N_total){
//     real p;
//     p = a_alg[algorithm_id[i]]+ a_bm_norm[bm_id[i]]*s + b_noise[algorithm_id[i]] * x_noise[i];
// 
//     y_rep[i] = binomial_rng(N_draw[i], inv_logit(p));
//     
//     //Log likelihood
//      log_lik[i] = binomial_lpmf(y[i] | N_draw[i], inv_logit(p));
//   }
// }

Let’s compile and start sampling with the Stan function. In the data folder you can find the specific data used to fit the model after all transformations "./data/probsuccsess-data.RDS"

standata <- list(
  N_total = nrow(d),
  y = d$y,
  N_draw = d$N,
  x_noise = d$SD,
  N_algorithm = length(algorithms),
  algorithm_id =d$AlgorithmID,
  N_bm = length(bm),
  bm_id = d$CostFunctionID)
saveRDS(standata, file = "./data/probsuccsess-data.RDS")

For computation time sake we are not running this chunk every time we compile this document. From now on we will load from the saved Stan fit object in the data folder. However, when we change our model or the data we can just run this chunk separately.

standata<-readRDS("./data/probsuccsess-data.RDS")
probsuccess.fit <- stan(file = './stanmodels/probsuccess.stan',
                     data=standata,
                     chains = 4,
                     warmup = 200,
                     iter = 3000)
saveRDS(probsuccess.fit, file = "./data/probsuccsess-fit.RDS")

2.3 RQ1 Diagnosis

2.3.1 RQ1 Chains convergence

The first step is to evaluate the convergence of the chains. We will look now only for the slopes, algorithms intercept and the standard deviation of the random effects (and not each intercept of the random effects)

a_alg <- c("a_alg[1]",
           "a_alg[2]",
           "a_alg[3]",
           "a_alg[4]",
           "a_alg[5]",
           "a_alg[6]",
           "a_alg[7]",
           "a_alg[8]")
b_noise <- c("b_noise[1]",
             "b_noise[2]",
             "b_noise[3]",
             "b_noise[4]",
             "b_noise[5]",
             "b_noise[6]",
             "b_noise[7]",
             "b_noise[8]")
rstan::traceplot(probsuccess.fit, pars=a_alg)

rstan::traceplot(probsuccess.fit, pars=b_noise)

rstan::traceplot(probsuccess.fit, pars=c('s'))

Another diagnosis is to look at the Rhat. If Rhat is greater than 1.05 it indicates a divergence in the chains (they did not mix well). The table below shows a summary of the sampling.

kable(summary(probsuccess.fit)$summary) %>% 
  kable_styling(bootstrap_options = c('striped',"hover", "condensed" )) %>% 
  kableExtra::scroll_box(width = "100%")

	mean	se_mean	sd	2.5%	25%	50%	75%	97.5%	n_eff	Rhat
a_alg[1]	-0.1016770	0.0141859	0.4511590	-0.9986581	-0.3944800	-0.1079857	0.1956272	0.7748758	1011.456	1.0007354
a_alg[2]	-2.5516066	0.0142459	0.4563417	-3.4560617	-2.8523292	-2.5527132	-2.2471997	-1.6730424	1026.124	1.0009415
a_alg[3]	-0.2139745	0.0142481	0.4513760	-1.1136926	-0.5092907	-0.2159365	0.0889257	0.6489137	1003.611	1.0007939
a_alg[4]	-4.3498335	0.0142878	0.4667423	-5.2926288	-4.6576410	-4.3486295	-4.0362012	-3.4514857	1067.149	1.0007534
a_alg[5]	-0.3943603	0.0142332	0.4506022	-1.2874160	-0.6869869	-0.3994464	-0.0884889	0.4724644	1002.264	1.0009414
a_alg[6]	-2.0070362	0.0142076	0.4539332	-2.9122092	-2.3013030	-2.0129352	-1.7030975	-1.1352872	1020.806	1.0008781
a_alg[7]	-2.2163631	0.0142042	0.4537016	-3.1183583	-2.5133972	-2.2204326	-1.9159819	-1.3441906	1020.248	1.0009555
a_alg[8]	-2.5625822	0.0142418	0.4562100	-3.4753740	-2.8617582	-2.5668217	-2.2559160	-1.6908949	1026.123	1.0008100
b_noise[1]	-1.2728595	0.0004634	0.0464911	-1.3649835	-1.3042495	-1.2727833	-1.2419386	-1.1808717	10064.607	1.0003304
b_noise[2]	-0.8111614	0.0005341	0.0594260	-0.9269817	-0.8512298	-0.8108461	-0.7706404	-0.6960778	12378.857	0.9998278
b_noise[3]	-1.3531582	0.0004555	0.0498441	-1.4530193	-1.3862144	-1.3528896	-1.3200088	-1.2545062	11973.466	1.0003042
b_noise[4]	-0.3851807	0.0006594	0.0708802	-0.5254143	-0.4334606	-0.3844210	-0.3357693	-0.2486244	11553.895	0.9998575
b_noise[5]	-1.1483855	0.0004288	0.0471090	-1.2413071	-1.1803177	-1.1475632	-1.1166922	-1.0569722	12068.645	0.9999617
b_noise[6]	-0.9653479	0.0005154	0.0567297	-1.0794613	-1.0029451	-0.9644473	-0.9267742	-0.8573317	12113.240	0.9999536
b_noise[7]	-0.7231598	0.0005147	0.0521852	-0.8278536	-0.7579466	-0.7223159	-0.6875584	-0.6217345	10280.685	0.9998671
b_noise[8]	-0.7934309	0.0006002	0.0579600	-0.9074668	-0.8320577	-0.7937229	-0.7548121	-0.6780034	9326.707	1.0000572
a_bm_norm[1]	-0.5515980	0.0059809	0.2113896	-0.9712815	-0.6922152	-0.5481803	-0.4065674	-0.1432695	1249.219	1.0005773
a_bm_norm[2]	-1.3857790	0.0072688	0.3110012	-2.0315335	-1.5876596	-1.3696647	-1.1724448	-0.8177882	1830.642	1.0004815
a_bm_norm[3]	-0.6135924	0.0059932	0.2138498	-1.0370821	-0.7578332	-0.6099309	-0.4669450	-0.1978411	1273.226	1.0007982
a_bm_norm[4]	0.6996375	0.0065183	0.2143535	0.2882285	0.5533039	0.6956576	0.8438217	1.1273589	1081.413	1.0011373
a_bm_norm[5]	-0.9190532	0.0063871	0.2451850	-1.4221268	-1.0826093	-0.9142283	-0.7500079	-0.4543208	1473.602	1.0004336
a_bm_norm[6]	-0.1266989	0.0057661	0.1905645	-0.5002489	-0.2576990	-0.1251113	0.0010672	0.2486333	1092.226	1.0005111
a_bm_norm[7]	2.0929329	0.0098459	0.3562187	1.4022113	1.8483216	2.0869635	2.3308668	2.8104401	1308.946	1.0019514
a_bm_norm[8]	1.4163381	0.0079819	0.2778883	0.8720145	1.2260972	1.4104389	1.6037712	1.9676857	1212.061	1.0016224
a_bm_norm[9]	-2.2875170	0.0080868	0.5070058	-3.3784938	-2.6042096	-2.2496205	-1.9305845	-1.4020841	3930.757	1.0003584
a_bm_norm[10]	0.0376740	0.0057984	0.1890303	-0.3267560	-0.0909227	0.0381280	0.1632858	0.4111242	1062.770	1.0008355
a_bm_norm[11]	-0.4683589	0.0058892	0.2054966	-0.8814239	-0.6047223	-0.4628744	-0.3299558	-0.0697175	1217.572	1.0004397
a_bm_norm[12]	0.9705429	0.0072309	0.2352270	0.5211149	0.8117004	0.9658386	1.1296289	1.4391891	1058.256	1.0014069
a_bm_norm[13]	0.1111258	0.0058348	0.1895685	-0.2518913	-0.0206515	0.1106819	0.2360902	0.4897085	1055.563	1.0006293
a_bm_norm[14]	-0.4185043	0.0058612	0.2020018	-0.8150537	-0.5543865	-0.4170919	-0.2820809	-0.0200522	1187.764	1.0004502
a_bm_norm[15]	-0.2120039	0.0057779	0.1931704	-0.5875451	-0.3435861	-0.2108279	-0.0835340	0.1704687	1117.724	1.0004975
a_bm_norm[16]	0.0582553	0.0057948	0.1899707	-0.3051919	-0.0713240	0.0577533	0.1855058	0.4365644	1074.708	1.0007318
a_bm_norm[17]	-0.0945615	0.0058098	0.1905650	-0.4616333	-0.2244295	-0.0938981	0.0338356	0.2816415	1075.888	1.0006509
a_bm_norm[18]	0.3553886	0.0060492	0.1962915	-0.0150112	0.2217934	0.3541406	0.4875904	0.7474879	1052.943	1.0007216
a_bm_norm[19]	-1.3873544	0.0071574	0.3144653	-2.0341171	-1.5904221	-1.3696656	-1.1671590	-0.8205984	1930.342	1.0006691
a_bm_norm[20]	-0.2103035	0.0057874	0.1925856	-0.5825628	-0.3399080	-0.2079499	-0.0814456	0.1685257	1107.335	1.0007190
a_bm_norm[21]	0.0374586	0.0057911	0.1888960	-0.3244013	-0.0918735	0.0378206	0.1633785	0.4096106	1063.953	1.0007178
a_bm_norm[22]	0.4262087	0.0061486	0.1989057	0.0477541	0.2899258	0.4231280	0.5578995	0.8254036	1046.516	1.0011291
a_bm_norm[23]	-0.2208615	0.0057839	0.1927261	-0.5929206	-0.3516246	-0.2213745	-0.0911591	0.1589538	1110.306	1.0006586
a_bm_norm[24]	0.9181966	0.0069786	0.2314096	0.4725721	0.7597149	0.9153235	1.0732483	1.3825348	1099.586	1.0014348
a_bm_norm[25]	-0.4800747	0.0058778	0.2060577	-0.8903264	-0.6193752	-0.4793265	-0.3426119	-0.0764864	1228.989	1.0007175
a_bm_norm[26]	1.8127547	0.0090502	0.3223532	1.1907146	1.5925470	1.8072705	2.0297038	2.4651831	1268.663	1.0017888
a_bm_norm[27]	-0.3950165	0.0058627	0.2010451	-0.7896719	-0.5304012	-0.3915179	-0.2564086	-0.0006536	1175.971	1.0004427
a_bm_norm[28]	-1.5823227	0.0078727	0.3510577	-2.3267529	-1.8102101	-1.5614566	-1.3363711	-0.9473444	1988.408	1.0008761
a_bm_norm[29]	0.4734666	0.0062045	0.2008774	0.0930725	0.3343172	0.4734256	0.6092113	0.8729607	1048.224	1.0011553
a_bm_norm[30]	0.7791124	0.0067026	0.2195527	0.3600034	0.6284312	0.7741367	0.9296758	1.2156303	1072.990	1.0013464
s	2.4396697	0.0091963	0.3666749	1.8524855	2.1797734	2.3975453	2.6487166	3.2807386	1589.761	1.0026073
lp__	-5600.1110570	0.1389985	6.2045095	-5613.2707018	-5604.0278224	-5599.7723066	-5595.7819986	-5588.9259626	1992.483	1.0021429

2.4 RQ1 Results and Plots

First lets get the HPDI of every parameter. We do this with the helper function from utils.R. But the function is quite simple. It just converts the stanmodel object to an object that the coda package can read (and do some renaming). Alternatively we can use the HDInterval package.

Then we restrict to the algorithms, them to the slopes, then to the other parameters. We create different data frames that we use to plot with ggplot pointrange

hpdi <- get_HPDI_from_stanfit(probsuccess.fit)
hpdi_oddsratio <- hpdi
hpdi_oddsratio$Mean <- exp(hpdi$Mean)
hpdi_oddsratio$HPDI.lower <- exp(hpdi$HPDI.lower)
hpdi_oddsratio$HPDI.higher <- exp(hpdi$HPDI.higher)


hpdi_oddsratio_algorithm <- hpdi_oddsratio %>% 
      dplyr::filter(str_detect(Parameter, "a_alg\\[")) %>%
      dplyr::mutate(Parameter=algorithms) #Changing to the algorithms labels

hpdi_oddsratio_b_noise <- hpdi_oddsratio %>% 
      dplyr::filter(str_detect(Parameter, "b_noise\\[")) %>%
      dplyr::mutate(Parameter=algorithms) #Changing to the algorithms labels

hpdi_s <- hpdi %>% 
      dplyr::filter(Parameter=='s') 


p_alg<-ggplot(data=hpdi_oddsratio_algorithm, aes(x=Parameter))+
  geom_pointrange(aes(
    ymin=HPDI.lower, 
    ymax=HPDI.higher, 
    y=Mean))+
  labs(y="Odds ratio for intercept", x="Algorithm")+
  coord_flip()
p_alg + plot_annotation(title = 'HPDI interval for the algorithms OR')

p_noise <- ggplot(data=hpdi_oddsratio_b_noise, aes(x=Parameter))+
  geom_pointrange(aes(
    ymin=HPDI.lower, 
    ymax=HPDI.higher, 
    y=Mean))+
  labs( y="Odds ratio for b_noise", x="Parameter")+
  coord_flip()+
  theme()
p_noise + plot_annotation(title = 'HPDI interval for the noise coefficients OR')

p_s <- ggplot(data=hpdi_s, aes(x=Parameter))+
  geom_pointrange(aes(
    ymin=HPDI.lower, 
    ymax=HPDI.higher, 
    y=Mean))+
  labs(y="s", x="Parameter")+
  coord_flip()
p_s + plot_annotation(title = 'HPDI interval for s')

Creating an output table

algreduced <- c("CMAES", "Cuckoo", "DiffEvol.", "NelderM.", "PSO", "RandomS1","RandomS2", "SimAnneal")
rename_pars <- c(
  paste(rep('a_',length(algorithms)),algreduced, sep = ""),
  paste(rep('b_',length(algorithms)),algreduced, sep = ""),
  's')

t<-create_table_model(probsuccess.fit, pars = c(a_alg, b_noise, 's'), renamepars =  rename_pars)

t<- t %>% 
  mutate('OR Mean' = exp(Mean),
         'OR HPD low' = exp(HPDI.lower),
         'OR HPD high' = exp(HPDI.higher))
colnames(t)<-c("Parameter", "Mean", "HPD low", "HPD high",'OR Mean','OR HPD low','OR HPD high')

saveRDS(t,'./statscomp-paper/tables/datafortables/probsuccess-par-table.RDS')