SelectBoost.FDA now includes a validation layer for
repeated simulations, method benchmarks, plain-SelectBoost baselines,
and direct advantage summaries for FDA-aware
SelectBoost.
library(SelectBoost.FDA)
sim_grid <- simulate_fda_scenario(
n = 60,
grid_length = 30,
scenario = "localized_dense",
representation = "grid",
seed = 1
)
sim_grid
#> FDA simulation data
#> observations: 60
#> features: 62
#> active features: 13
#> scenario: localized_dense
#> confounding strength: 0
#> active region scale: 1
#> local correlation: 0
#> active predictors: signal, age, treatment
head(selection_map(sim_grid$design))
#> feature predictor block position argval representation
#> signal.1 signal_1 signal signal 1 0 grid
#> signal.2 signal_2 signal signal 2 0.0344827586206897 grid
#> signal.3 signal_3 signal signal 3 0.0689655172413793 grid
#> signal.4 signal_4 signal signal 4 0.103448275862069 grid
#> signal.5 signal_5 signal signal 5 0.137931034482759 grid
#> signal.6 signal_6 signal signal 6 0.172413793103448 grid
#> basis_type transform source_predictor source_representation
#> signal.1 <NA> identity signal grid
#> signal.2 <NA> identity signal grid
#> signal.3 <NA> identity signal grid
#> signal.4 <NA> identity signal grid
#> signal.5 <NA> identity signal grid
#> signal.6 <NA> identity signal grid
#> source_position_start source_position_end source_argval_start
#> signal.1 1 1 0
#> signal.2 2 2 0.0344827586206897
#> signal.3 3 3 0.0689655172413793
#> signal.4 4 4 0.103448275862069
#> signal.5 5 5 0.137931034482759
#> signal.6 6 6 0.172413793103448
#> source_argval_end domain_start domain_end component
#> signal.1 0 0 0 <NA>
#> signal.2 0.0344827586206897 0.0344827586206897 0.0344827586206897 <NA>
#> signal.3 0.0689655172413793 0.0689655172413793 0.0689655172413793 <NA>
#> signal.4 0.103448275862069 0.103448275862069 0.103448275862069 <NA>
#> signal.5 0.137931034482759 0.137931034482759 0.137931034482759 <NA>
#> signal.6 0.172413793103448 0.172413793103448 0.172413793103448 <NA>
#> unit feature_index basis_component domain_label
#> signal.1 <NA> 1 <NA> 0
#> signal.2 <NA> 2 <NA> 0.0344827586206897
#> signal.3 <NA> 3 <NA> 0.0689655172413793
#> signal.4 <NA> 4 <NA> 0.103448275862069
#> signal.5 <NA> 5 <NA> 0.137931034482759
#> signal.6 <NA> 6 <NA> 0.172413793103448
sim_grid$truth$active_predictors
#> [1] "signal" "age" "treatment"The returned object keeps both the fitted fda_design and
the mapped truth for the transformed feature space.
study_dense <- run_simulation_study(
n_rep = 2,
simulate_args = list(
n = 50,
grid_length = 28,
scenario = "localized_dense",
representation = "basis"
),
benchmark_args = list(
methods = c("stability", "selectboost", "plain_selectboost"),
levels = c("feature", "group", "basis"),
stability_args = list(selector = "lasso", B = 6, cutoff = 0.5, seed = 4),
selectboost_args = list(selector = "lasso", B = 4, steps.seq = c(0.7, 0.4), c0lim = FALSE),
plain_selectboost_args = list(selector = "lasso", B = 4, steps.seq = c(0.7, 0.4), c0lim = FALSE)
),
seed = 10
)
study_smooth <- run_simulation_study(
n_rep = 2,
simulate_args = list(
n = 50,
grid_length = 28,
scenario = "distributed_smooth",
representation = "basis"
),
benchmark_args = list(
methods = c("stability", "selectboost", "plain_selectboost"),
levels = c("feature", "group", "basis"),
stability_args = list(selector = "lasso", B = 6, cutoff = 0.5, seed = 14),
selectboost_args = list(selector = "lasso", B = 4, steps.seq = c(0.7, 0.4), c0lim = FALSE),
plain_selectboost_args = list(selector = "lasso", B = 4, steps.seq = c(0.7, 0.4), c0lim = FALSE)
),
seed = 20
)
summarise_benchmark_advantage(
study_dense,
target = "selectboost",
reference = c("plain_selectboost", "stability"),
level = "feature",
metric = "f1"
)
#> scenario
#> localized_dense.basis.gaussian.feature.selectboost.plain_selectboost.f1 localized_dense
#> localized_dense.basis.gaussian.feature.selectboost.stability.f1 localized_dense
#> representation
#> localized_dense.basis.gaussian.feature.selectboost.plain_selectboost.f1 basis
#> localized_dense.basis.gaussian.feature.selectboost.stability.f1 basis
#> family
#> localized_dense.basis.gaussian.feature.selectboost.plain_selectboost.f1 gaussian
#> localized_dense.basis.gaussian.feature.selectboost.stability.f1 gaussian
#> level
#> localized_dense.basis.gaussian.feature.selectboost.plain_selectboost.f1 feature
#> localized_dense.basis.gaussian.feature.selectboost.stability.f1 feature
#> target
#> localized_dense.basis.gaussian.feature.selectboost.plain_selectboost.f1 selectboost
#> localized_dense.basis.gaussian.feature.selectboost.stability.f1 selectboost
#> reference
#> localized_dense.basis.gaussian.feature.selectboost.plain_selectboost.f1 plain_selectboost
#> localized_dense.basis.gaussian.feature.selectboost.stability.f1 stability
#> metric
#> localized_dense.basis.gaussian.feature.selectboost.plain_selectboost.f1 f1
#> localized_dense.basis.gaussian.feature.selectboost.stability.f1 f1
#> n_rep
#> localized_dense.basis.gaussian.feature.selectboost.plain_selectboost.f1 2
#> localized_dense.basis.gaussian.feature.selectboost.stability.f1 2
#> target_value_mean
#> localized_dense.basis.gaussian.feature.selectboost.plain_selectboost.f1 0.7638889
#> localized_dense.basis.gaussian.feature.selectboost.stability.f1 0.7638889
#> reference_value_mean
#> localized_dense.basis.gaussian.feature.selectboost.plain_selectboost.f1 0.7500000
#> localized_dense.basis.gaussian.feature.selectboost.stability.f1 0.6923077
#> delta_mean
#> localized_dense.basis.gaussian.feature.selectboost.plain_selectboost.f1 0.01388889
#> localized_dense.basis.gaussian.feature.selectboost.stability.f1 0.07158120
#> delta_sd
#> localized_dense.basis.gaussian.feature.selectboost.plain_selectboost.f1 0.09035253
#> localized_dense.basis.gaussian.feature.selectboost.stability.f1 0.12842751
#> win_rate
#> localized_dense.basis.gaussian.feature.selectboost.plain_selectboost.f1 0.5
#> localized_dense.basis.gaussian.feature.selectboost.stability.f1 0.5
summarise_benchmark_advantage(
study_smooth,
target = "selectboost",
reference = c("plain_selectboost", "stability"),
level = "feature",
metric = "f1"
)
#> scenario
#> distributed_smooth.basis.gaussian.feature.selectboost.plain_selectboost.f1 distributed_smooth
#> distributed_smooth.basis.gaussian.feature.selectboost.stability.f1 distributed_smooth
#> representation
#> distributed_smooth.basis.gaussian.feature.selectboost.plain_selectboost.f1 basis
#> distributed_smooth.basis.gaussian.feature.selectboost.stability.f1 basis
#> family
#> distributed_smooth.basis.gaussian.feature.selectboost.plain_selectboost.f1 gaussian
#> distributed_smooth.basis.gaussian.feature.selectboost.stability.f1 gaussian
#> level
#> distributed_smooth.basis.gaussian.feature.selectboost.plain_selectboost.f1 feature
#> distributed_smooth.basis.gaussian.feature.selectboost.stability.f1 feature
#> target
#> distributed_smooth.basis.gaussian.feature.selectboost.plain_selectboost.f1 selectboost
#> distributed_smooth.basis.gaussian.feature.selectboost.stability.f1 selectboost
#> reference
#> distributed_smooth.basis.gaussian.feature.selectboost.plain_selectboost.f1 plain_selectboost
#> distributed_smooth.basis.gaussian.feature.selectboost.stability.f1 stability
#> metric
#> distributed_smooth.basis.gaussian.feature.selectboost.plain_selectboost.f1 f1
#> distributed_smooth.basis.gaussian.feature.selectboost.stability.f1 f1
#> n_rep
#> distributed_smooth.basis.gaussian.feature.selectboost.plain_selectboost.f1 2
#> distributed_smooth.basis.gaussian.feature.selectboost.stability.f1 2
#> target_value_mean
#> distributed_smooth.basis.gaussian.feature.selectboost.plain_selectboost.f1 0.8117647
#> distributed_smooth.basis.gaussian.feature.selectboost.stability.f1 0.8117647
#> reference_value_mean
#> distributed_smooth.basis.gaussian.feature.selectboost.plain_selectboost.f1 0.8444444
#> distributed_smooth.basis.gaussian.feature.selectboost.stability.f1 0.5000000
#> delta_mean
#> distributed_smooth.basis.gaussian.feature.selectboost.plain_selectboost.f1 -0.03267974
#> distributed_smooth.basis.gaussian.feature.selectboost.stability.f1 0.31176471
#> delta_sd
#> distributed_smooth.basis.gaussian.feature.selectboost.plain_selectboost.f1 0.07949174
#> distributed_smooth.basis.gaussian.feature.selectboost.stability.f1 0.01663781
#> win_rate
#> distributed_smooth.basis.gaussian.feature.selectboost.plain_selectboost.f1 0.5
#> distributed_smooth.basis.gaussian.feature.selectboost.stability.f1 1.0The repeated-study summary reports the mean and standard deviation of
recovery metrics by method, evaluation level, scenario, and
c0 when applicable. In practice, the
localized_dense setting is the most direct stress test for
the FDA-aware grouping built into selectboost_fda().
sensitivity <- run_selectboost_sensitivity_study(
n_rep = 1,
simulate_grid = data.frame(
scenario = c("localized_dense", "confounded_blocks"),
confounding_strength = c(0.4, 0.9),
active_region_scale = c(0.8, 0.7),
local_correlation = c(1, 2),
stringsAsFactors = FALSE
),
selectboost_grid = data.frame(
association_method = c("correlation", "hybrid", "interval"),
bandwidth = c(NA, 4, 4),
stringsAsFactors = FALSE
),
simulate_args = list(n = 50, grid_length = 28, representation = "grid"),
benchmark_args = list(
methods = c("stability", "selectboost", "plain_selectboost"),
levels = c("feature", "group"),
stability_args = list(selector = "lasso", B = 6, cutoff = 0.5, seed = 40),
selectboost_args = list(selector = "lasso", B = 4, steps.seq = c(0.7, 0.4), c0lim = FALSE),
plain_selectboost_args = list(selector = "lasso", B = 4, steps.seq = c(0.7, 0.4), c0lim = FALSE)
),
seed = 50
)
summarise_benchmark_advantage(
sensitivity,
target = "selectboost",
reference = "plain_selectboost",
level = "feature",
metric = "f1"
)
#> scenario
#> confounded_blocks.grid.gaussian.hybrid.4.0.9.0.7.2.feature.selectboost.plain_selectboost.f1 confounded_blocks
#> confounded_blocks.grid.gaussian.interval.4.0.9.0.7.2.feature.selectboost.plain_selectboost.f1 confounded_blocks
#> localized_dense.grid.gaussian.hybrid.4.0.4.0.8.1.feature.selectboost.plain_selectboost.f1 localized_dense
#> localized_dense.grid.gaussian.interval.4.0.4.0.8.1.feature.selectboost.plain_selectboost.f1 localized_dense
#> representation
#> confounded_blocks.grid.gaussian.hybrid.4.0.9.0.7.2.feature.selectboost.plain_selectboost.f1 grid
#> confounded_blocks.grid.gaussian.interval.4.0.9.0.7.2.feature.selectboost.plain_selectboost.f1 grid
#> localized_dense.grid.gaussian.hybrid.4.0.4.0.8.1.feature.selectboost.plain_selectboost.f1 grid
#> localized_dense.grid.gaussian.interval.4.0.4.0.8.1.feature.selectboost.plain_selectboost.f1 grid
#> family
#> confounded_blocks.grid.gaussian.hybrid.4.0.9.0.7.2.feature.selectboost.plain_selectboost.f1 gaussian
#> confounded_blocks.grid.gaussian.interval.4.0.9.0.7.2.feature.selectboost.plain_selectboost.f1 gaussian
#> localized_dense.grid.gaussian.hybrid.4.0.4.0.8.1.feature.selectboost.plain_selectboost.f1 gaussian
#> localized_dense.grid.gaussian.interval.4.0.4.0.8.1.feature.selectboost.plain_selectboost.f1 gaussian
#> association_method
#> confounded_blocks.grid.gaussian.hybrid.4.0.9.0.7.2.feature.selectboost.plain_selectboost.f1 hybrid
#> confounded_blocks.grid.gaussian.interval.4.0.9.0.7.2.feature.selectboost.plain_selectboost.f1 interval
#> localized_dense.grid.gaussian.hybrid.4.0.4.0.8.1.feature.selectboost.plain_selectboost.f1 hybrid
#> localized_dense.grid.gaussian.interval.4.0.4.0.8.1.feature.selectboost.plain_selectboost.f1 interval
#> bandwidth
#> confounded_blocks.grid.gaussian.hybrid.4.0.9.0.7.2.feature.selectboost.plain_selectboost.f1 4
#> confounded_blocks.grid.gaussian.interval.4.0.9.0.7.2.feature.selectboost.plain_selectboost.f1 4
#> localized_dense.grid.gaussian.hybrid.4.0.4.0.8.1.feature.selectboost.plain_selectboost.f1 4
#> localized_dense.grid.gaussian.interval.4.0.4.0.8.1.feature.selectboost.plain_selectboost.f1 4
#> confounding_strength
#> confounded_blocks.grid.gaussian.hybrid.4.0.9.0.7.2.feature.selectboost.plain_selectboost.f1 0.9
#> confounded_blocks.grid.gaussian.interval.4.0.9.0.7.2.feature.selectboost.plain_selectboost.f1 0.9
#> localized_dense.grid.gaussian.hybrid.4.0.4.0.8.1.feature.selectboost.plain_selectboost.f1 0.4
#> localized_dense.grid.gaussian.interval.4.0.4.0.8.1.feature.selectboost.plain_selectboost.f1 0.4
#> active_region_scale
#> confounded_blocks.grid.gaussian.hybrid.4.0.9.0.7.2.feature.selectboost.plain_selectboost.f1 0.7
#> confounded_blocks.grid.gaussian.interval.4.0.9.0.7.2.feature.selectboost.plain_selectboost.f1 0.7
#> localized_dense.grid.gaussian.hybrid.4.0.4.0.8.1.feature.selectboost.plain_selectboost.f1 0.8
#> localized_dense.grid.gaussian.interval.4.0.4.0.8.1.feature.selectboost.plain_selectboost.f1 0.8
#> local_correlation
#> confounded_blocks.grid.gaussian.hybrid.4.0.9.0.7.2.feature.selectboost.plain_selectboost.f1 2
#> confounded_blocks.grid.gaussian.interval.4.0.9.0.7.2.feature.selectboost.plain_selectboost.f1 2
#> localized_dense.grid.gaussian.hybrid.4.0.4.0.8.1.feature.selectboost.plain_selectboost.f1 1
#> localized_dense.grid.gaussian.interval.4.0.4.0.8.1.feature.selectboost.plain_selectboost.f1 1
#> level
#> confounded_blocks.grid.gaussian.hybrid.4.0.9.0.7.2.feature.selectboost.plain_selectboost.f1 feature
#> confounded_blocks.grid.gaussian.interval.4.0.9.0.7.2.feature.selectboost.plain_selectboost.f1 feature
#> localized_dense.grid.gaussian.hybrid.4.0.4.0.8.1.feature.selectboost.plain_selectboost.f1 feature
#> localized_dense.grid.gaussian.interval.4.0.4.0.8.1.feature.selectboost.plain_selectboost.f1 feature
#> target
#> confounded_blocks.grid.gaussian.hybrid.4.0.9.0.7.2.feature.selectboost.plain_selectboost.f1 selectboost
#> confounded_blocks.grid.gaussian.interval.4.0.9.0.7.2.feature.selectboost.plain_selectboost.f1 selectboost
#> localized_dense.grid.gaussian.hybrid.4.0.4.0.8.1.feature.selectboost.plain_selectboost.f1 selectboost
#> localized_dense.grid.gaussian.interval.4.0.4.0.8.1.feature.selectboost.plain_selectboost.f1 selectboost
#> reference
#> confounded_blocks.grid.gaussian.hybrid.4.0.9.0.7.2.feature.selectboost.plain_selectboost.f1 plain_selectboost
#> confounded_blocks.grid.gaussian.interval.4.0.9.0.7.2.feature.selectboost.plain_selectboost.f1 plain_selectboost
#> localized_dense.grid.gaussian.hybrid.4.0.4.0.8.1.feature.selectboost.plain_selectboost.f1 plain_selectboost
#> localized_dense.grid.gaussian.interval.4.0.4.0.8.1.feature.selectboost.plain_selectboost.f1 plain_selectboost
#> metric
#> confounded_blocks.grid.gaussian.hybrid.4.0.9.0.7.2.feature.selectboost.plain_selectboost.f1 f1
#> confounded_blocks.grid.gaussian.interval.4.0.9.0.7.2.feature.selectboost.plain_selectboost.f1 f1
#> localized_dense.grid.gaussian.hybrid.4.0.4.0.8.1.feature.selectboost.plain_selectboost.f1 f1
#> localized_dense.grid.gaussian.interval.4.0.4.0.8.1.feature.selectboost.plain_selectboost.f1 f1
#> n_rep
#> confounded_blocks.grid.gaussian.hybrid.4.0.9.0.7.2.feature.selectboost.plain_selectboost.f1 1
#> confounded_blocks.grid.gaussian.interval.4.0.9.0.7.2.feature.selectboost.plain_selectboost.f1 1
#> localized_dense.grid.gaussian.hybrid.4.0.4.0.8.1.feature.selectboost.plain_selectboost.f1 1
#> localized_dense.grid.gaussian.interval.4.0.4.0.8.1.feature.selectboost.plain_selectboost.f1 1
#> target_value_mean
#> confounded_blocks.grid.gaussian.hybrid.4.0.9.0.7.2.feature.selectboost.plain_selectboost.f1 0.6206897
#> confounded_blocks.grid.gaussian.interval.4.0.9.0.7.2.feature.selectboost.plain_selectboost.f1 0.5454545
#> localized_dense.grid.gaussian.hybrid.4.0.4.0.8.1.feature.selectboost.plain_selectboost.f1 0.5517241
#> localized_dense.grid.gaussian.interval.4.0.4.0.8.1.feature.selectboost.plain_selectboost.f1 0.4444444
#> reference_value_mean
#> confounded_blocks.grid.gaussian.hybrid.4.0.9.0.7.2.feature.selectboost.plain_selectboost.f1 0.5294118
#> confounded_blocks.grid.gaussian.interval.4.0.9.0.7.2.feature.selectboost.plain_selectboost.f1 0.4736842
#> localized_dense.grid.gaussian.hybrid.4.0.4.0.8.1.feature.selectboost.plain_selectboost.f1 0.5517241
#> localized_dense.grid.gaussian.interval.4.0.4.0.8.1.feature.selectboost.plain_selectboost.f1 0.5625000
#> delta_mean
#> confounded_blocks.grid.gaussian.hybrid.4.0.9.0.7.2.feature.selectboost.plain_selectboost.f1 0.09127789
#> confounded_blocks.grid.gaussian.interval.4.0.9.0.7.2.feature.selectboost.plain_selectboost.f1 0.07177033
#> localized_dense.grid.gaussian.hybrid.4.0.4.0.8.1.feature.selectboost.plain_selectboost.f1 0.00000000
#> localized_dense.grid.gaussian.interval.4.0.4.0.8.1.feature.selectboost.plain_selectboost.f1 -0.11805556
#> delta_sd
#> confounded_blocks.grid.gaussian.hybrid.4.0.9.0.7.2.feature.selectboost.plain_selectboost.f1 0
#> confounded_blocks.grid.gaussian.interval.4.0.9.0.7.2.feature.selectboost.plain_selectboost.f1 0
#> localized_dense.grid.gaussian.hybrid.4.0.4.0.8.1.feature.selectboost.plain_selectboost.f1 0
#> localized_dense.grid.gaussian.interval.4.0.4.0.8.1.feature.selectboost.plain_selectboost.f1 0
#> win_rate
#> confounded_blocks.grid.gaussian.hybrid.4.0.9.0.7.2.feature.selectboost.plain_selectboost.f1 1
#> confounded_blocks.grid.gaussian.interval.4.0.9.0.7.2.feature.selectboost.plain_selectboost.f1 1
#> localized_dense.grid.gaussian.hybrid.4.0.4.0.8.1.feature.selectboost.plain_selectboost.f1 0
#> localized_dense.grid.gaussian.interval.4.0.4.0.8.1.feature.selectboost.plain_selectboost.f1 0This is the intended benchmark workflow when the goal is to show when
FDA-aware grouping matters. The summary table keeps
association_method, bandwidth,
confounding_strength, active_region_scale, and
local_correlation as explicit columns, so it is
straightforward to isolate the settings where
selectboost_fda() gains over the plain baseline.
The repository also ships a larger saved sensitivity study generated
by tools/run_selectboost_sensitivity_study.R. That script
runs a broader sweep and writes reusable benchmark summaries to
inst/extdata/benchmarks/.
benchmark_dir <- system.file("extdata", "benchmarks", package = "SelectBoost.FDA")
top_feature_settings <- utils::read.csv(
file.path(benchmark_dir, "selectboost_sensitivity_top_settings.csv"),
stringsAsFactors = FALSE
)
utils::head(
top_feature_settings[
,
c(
"scenario",
"confounding_strength",
"active_region_scale",
"local_correlation",
"association_method",
"bandwidth",
"selectboost_f1_mean",
"plain_selectboost_f1_mean",
"delta_mean",
"win_rate"
)
],
10
)
#> scenario confounding_strength active_region_scale local_correlation
#> 1 confounded_blocks 0.6 0.5 2
#> 2 confounded_blocks 1.0 0.8 2
#> 3 confounded_blocks 0.6 0.8 2
#> 4 localized_dense 0.6 0.5 2
#> 5 confounded_blocks 0.6 0.5 2
#> 6 confounded_blocks 0.6 0.5 2
#> 7 confounded_blocks 1.0 0.5 0
#> 8 localized_dense 1.0 0.8 2
#> 9 confounded_blocks 1.0 0.5 2
#> 10 localized_dense 0.6 0.8 2
#> association_method bandwidth selectboost_f1_mean plain_selectboost_f1_mean
#> 1 interval 8 0.5362319 0.4087266
#> 2 hybrid 4 0.5885135 0.4826750
#> 3 hybrid 4 0.5833671 0.4944862
#> 4 neighborhood 4 0.4972542 0.4144859
#> 5 hybrid 4 0.5429293 0.4657088
#> 6 neighborhood 4 0.5072823 0.4322990
#> 7 interval 8 0.5323457 0.4575499
#> 8 neighborhood 4 0.5635386 0.4924953
#> 9 neighborhood 4 0.4655172 0.3983586
#> 10 interval 8 0.5392157 0.4769314
#> delta_mean win_rate
#> 1 0.12750533 1.0000000
#> 2 0.10583853 1.0000000
#> 3 0.08888092 1.0000000
#> 4 0.08276831 0.6666667
#> 5 0.07722048 0.6666667
#> 6 0.07498337 1.0000000
#> 7 0.07479582 1.0000000
#> 8 0.07104330 0.6666667
#> 9 0.06715866 1.0000000
#> 10 0.06228427 0.6666667The key comparison columns are selectboost_f1_mean,
plain_selectboost_f1_mean, and delta_mean.
This makes the algorithm comparison explicit at the feature-selection
level while keeping the FDA-specific settings attached to each row.