library(designit)
library(ggplot2)
library(dplyr)
library(tidyr)

Example 2: Scoring two plates at once

(latin square for each plate should give the best score)

We set up in one go 2 plate scoring functions, each one acting locally on a specific plate, plus one osat score to guarantee uniform distribution of groups across plates.

The initial sample allocation (by assign_in_order) leads to a poor starting point since each plate has only 2 of the 4 groups represented.

This is not a problem as long as we make sure that initial permutations are likely to remedy the situation. That’s why we ensure 10 pairwise sample swaps for the first iterations.

# Setting up the batch container
example2 <- BatchContainer$new(
  dimensions = c(
    plate = 2,
    row = 4, col = 4
  )
)

# Add samples to container
example2 <- assign_in_order(example2, samples = tibble::tibble(
  Group = c(rep(c("Grp 1", "Grp 2", "Grp 3", "Grp 4"), each = 8)),
  ID = 1:32
))

scoring_f <- c(mk_plate_scoring_functions(example2, plate = "plate", row = "row", column = "col", group = "Group"),
  osat_plate = osat_score_generator(batch_vars = c("plate"), feature_vars = c("Group"))
)

plot_plate(example2,
  plate = plate, row = row, column = col, .color = Group,
  title = "Ex2: Initial sample arrangement"
)


example2$score(scoring_f)
#>    Plate 1    Plate 2 osat_plate 
#>   23.89265   23.89265  128.00000
set.seed(41)
bc <- optimize_design(
  example2,
  scoring = scoring_f,
  n_shuffle = c(rep(10, 10), rep(3, 90), rep(2, 100), rep(1, 1400)),
  acceptance_func = mk_simanneal_acceptance_func(mk_simanneal_temp_func(T0 = 10000, alpha = 0.5)),
  aggregate_scores_func = worst_score,
  quiet = TRUE
)
bc$trace$elapsed
#> Time difference of 13.84524 secs

bc$score(scoring_f)
#>    Plate 1    Plate 2 osat_plate 
#>   6.127258   6.094080   0.000000

plot_plate(bc,
  plate = plate, row = row, column = col, .color = Group,
  title = "Ex2: Design created by swapping samples 'globally' across the plates"
)

While this ‘global’ optimization is possible, it does probably not converge to an (almost) ideal solution in an acceptable time if there are more samples involved. This is due to a lot of unproductive sample swapping happening across the plates.

One way to address this: we may split the optimization into two cycles, first assigning samples to plates (balancing groups), then improving the positions of the samples within each plate. This motivates the use of a dedicated sample permutation function which takes the plate structure into account and only shuffles samples around within one plate.

scoring_f <- osat_score_generator(batch_vars = c("plate"), feature_vars = c("Group"))

set.seed(42)
bc <- optimize_design(
  example2,
  scoring = scoring_f,
  quiet = TRUE,
  max_iter = 200, # this is set to shorten vignette run-time, normally we don't know.
  n_shuffle = 2,
  acceptance_func = mk_simanneal_acceptance_func(mk_simanneal_temp_func(T0 = 10000, alpha = 0.5)),
)
bc$trace$elapsed
#> Time difference of 1.634799 secs

plot_plate(bc,
  plate = plate, row = row, column = col, .color = Group,
  title = "Ex2: 'Plate wise' design\nStep 1: after allocating samples to plates"
)


scoring_f <- mk_plate_scoring_functions(bc, plate = "plate", row = "row", column = "col", group = "Group")

bc$score(scoring_f)
#>  Plate 1  Plate 2 
#> 12.77527 13.63704
set.seed(42)
bc <- optimize_design(
  bc,
  scoring = scoring_f,
  max_iter = 400,
  shuffle_proposal_func = mk_subgroup_shuffling_function(subgroup_vars = c("plate")),
  acceptance_func = mk_simanneal_acceptance_func(mk_simanneal_temp_func(T0 = 1000, alpha = 0.5)),
  aggregate_scores_func = L2s_norm,
  quiet = TRUE
)
bc$trace$elapsed
#> Time differences in secs
#> [1] 1.634799 2.326842

bc$score(scoring_f)
#>  Plate 1  Plate 2 
#> 6.854748 6.309297

plot_plate(bc,
  plate = plate, row = row, column = col, .color = Group,
  title = "Ex2: 'Plate wise' design\nStep 2: after arranging samples within plates"
)

In this case, the shuffling function exchanges 1 pair of sample assignments every time (the default). However, any number of constant swaps or a swapping protocol (formally a vector of integers) can be supplied as well.

Now for the most efficient solution: we start again by first assigning samples to plates (balancing groups), then making use of the independence of the two within-plate optimizations and improving them one after the other.

This is possible by passing the argument to the function that generates the permutations. It enforces permutation only to happen first within plate 1, then within plate 2, so that the two scores can be optimized in succeeding runs.

scoring_f <- osat_score_generator(batch_vars = c("plate"), feature_vars = c("Group"))
set.seed(42)
bc <- optimize_design(
  example2,
  scoring = scoring_f,
  quiet = TRUE,
  max_iter = 150, # this is set to shorten vignette run-time, normally we don't know.
  n_shuffle = 2,
  acceptance_func = mk_simanneal_acceptance_func(mk_simanneal_temp_func(T0 = 10000, alpha = 0.5)),
)
bc$trace$elapsed
#> Time difference of 1.372123 secs

plot_plate(bc,
  plate = plate, row = row, column = col, .color = Group,
  title = "Ex2: 'Serial plate' design\nStep 1: after allocating samples to plates"
)


scoring_f <- mk_plate_scoring_functions(bc, plate = "plate", row = "row", column = "col", group = "Group")

bc$score(scoring_f)
#>  Plate 1  Plate 2 
#> 10.57482 26.16613
set.seed(42)
bc <- optimize_design(
  bc,
  scoring = scoring_f,
  max_iter = 150,
  quiet = TRUE,
  shuffle_proposal_func = mk_subgroup_shuffling_function(subgroup_vars = c("plate"), restrain_on_subgroup_levels = c(1)),
  acceptance_func = mk_simanneal_acceptance_func(mk_simanneal_temp_func(T0 = 1000, alpha = 0.5)),
  aggregate_scores_func = L2s_norm
)
bc$trace$elapsed
#> Time differences in secs
#> [1] 1.372123 1.127218

bc$score(scoring_f)
#>   Plate 1   Plate 2 
#>  6.416193 26.166134
set.seed(42)
bc <- optimize_design(
  bc,
  scoring = scoring_f,
  max_iter = 550,
  quiet = TRUE,
  shuffle_proposal_func = mk_subgroup_shuffling_function(subgroup_vars = c("plate"), restrain_on_subgroup_levels = c(2)),
  acceptance_func = mk_simanneal_acceptance_func(mk_simanneal_temp_func(T0 = 1000, alpha = 0.5)),
  aggregate_scores_func = L2s_norm
)
bc$trace$elapsed
#> Time differences in secs
#> [1] 1.372123 1.127218 3.161646

bc$score(scoring_f)
#>  Plate 1  Plate 2 
#> 6.416193 6.581966

plot_plate(bc,
  plate = plate, row = row, column = col, .color = Group,
  title = "Ex2: 'Serial plate' design\nStep 2: after optimizing each plate in turn"
)