Comprehensive Examples

Introduction

This vignette provides comprehensive examples of using the MultiCorrSurvBinary package for various clinical trial simulation scenarios.

library(MultiCorrSurvBinary)
library(dplyr)

Example 1: Oncology Phase II Trial

Simulate a single-arm Phase II oncology trial with correlated endpoints.

# Phase II trial parameters
set.seed(2024)

# Generate single-arm data
phase2_data <- rCorrSurvBinary(
  nsim = 50,  # Reduced for vignette speed
  outcomes = c('OS', 'PFS', 'OR'),
  n = 80,
  mst.OS = 15,      # 15-month median OS
  mst.PFS = 8,      # 8-month median PFS
  p.OR = 0.35,      # 35% response rate
  rho.OS.PFS = 0.6, # Strong correlation between survivals
  rho.OS.OR = 0.4,  # Moderate correlation OS-response
  rho.PFS.OR = 0.5, # Moderate correlation PFS-response
  tau = 18,         # 18-month accrual
  seed = 123,
  validate.bounds = FALSE  # Skip validation for speed
)

# Summary statistics
phase2_summary <- phase2_data %>%
  summarise(
    n_sims = n_distinct(sim),
    median_OS = median(OS),
    median_PFS = median(PFS),
    response_rate = mean(OR),
    correlation_OS_PFS = cor(OS, PFS),
    correlation_OS_OR = cor(OS, OR),
    correlation_PFS_OR = cor(PFS, OR)
  )

print(phase2_summary)
#> # A tibble: 1 × 7
#>   n_sims median_OS median_PFS response_rate correlation_OS_PFS correlation_OS_OR correlation_PFS_OR
#>    <int>     <dbl>      <dbl>         <dbl>              <dbl>             <dbl>              <dbl>
#> 1     50      15.4       6.84         0.344              0.683             0.292              0.356

Example 2: Randomized Controlled Trial

Simulate a two-arm randomized controlled trial.

# Define RCT parameters
rct_params <- list(
  experimental = list(
    mst.OS = 24, mst.PFS = 12, p.OR = 0.5, n = 100,
    rho.OS.PFS = 0.7, rho.OS.OR = 0.3, rho.PFS.OR = 0.4
  ),
  control = list(
    mst.OS = 18, mst.PFS = 9, p.OR = 0.3, n = 100,
    rho.OS.PFS = 0.7, rho.OS.OR = 0.3, rho.PFS.OR = 0.4
  )
)

# Generate RCT data
rct_data <- rCorrSurvBinaryMultiArmSubgroup(
  nsim = 50,  # Reduced for vignette speed
  outcomes = c('OS', 'PFS', 'OR'),
  arm.params = rct_params,
  tau = 36,
  seed = 456,
  validate.bounds = FALSE
)

# Compare arms
arm_comparison <- rct_data %>%
  group_by(ARM) %>%
  summarise(
    n_patients = n(),
    median_OS = median(OS),
    median_PFS = median(PFS),
    response_rate = mean(OR),
    .groups = 'drop'
  )

print(arm_comparison)
#> # A tibble: 2 × 5
#>   ARM          n_patients median_OS median_PFS response_rate
#>   <chr>             <int>     <dbl>      <dbl>         <dbl>
#> 1 control            5000      18.1       7.93         0.306
#> 2 experimental       5000      23.3      10.3          0.495

Example 3: Biomarker-Stratified Trial

Simulate a trial with biomarker-defined subgroups.

# Biomarker-stratified trial parameters - use sub1, sub2 naming
biomarker_params <- list(
  treatment = list(
    sub1 = list(  # biomarker_positive -> sub1
      mst.OS = 30, mst.PFS = 18, p.OR = 0.7, n = 60,
      rho.OS.PFS = 0.6, rho.OS.OR = 0.4, rho.PFS.OR = 0.5
    ),
    sub2 = list(  # biomarker_negative -> sub2
      mst.OS = 20, mst.PFS = 10, p.OR = 0.4, n = 40,
      rho.OS.PFS = 0.6, rho.OS.OR = 0.4, rho.PFS.OR = 0.5
    )
  ),
  control = list(
    sub1 = list(  # biomarker_positive -> sub1
      mst.OS = 22, mst.PFS = 12, p.OR = 0.4, n = 60,
      rho.OS.PFS = 0.6, rho.OS.OR = 0.4, rho.PFS.OR = 0.5
    ),
    sub2 = list(  # biomarker_negative -> sub2
      mst.OS = 18, mst.PFS = 8, p.OR = 0.25, n = 40,
      rho.OS.PFS = 0.6, rho.OS.OR = 0.4, rho.PFS.OR = 0.5
    )
  )
)

# Generate biomarker-stratified data
biomarker_data <- rCorrSurvBinaryMultiArmSubgroup(
  nsim = 20,  # Reduced for vignette speed
  outcomes = c('OS', 'PFS', 'OR'),
  arm.params = biomarker_params,
  tau = 24,
  seed = 789,
  validate.bounds = FALSE
)

# Analyze by biomarker status (use SUBGROUP column)
biomarker_analysis <- biomarker_data %>%
  group_by(ARM, SUBGROUP) %>%
  summarise(
    n_patients = n(),
    median_OS = round(median(OS), 1),
    median_PFS = round(median(PFS), 1),
    response_rate = round(mean(OR), 3),
    .groups = 'drop'
  )

print(biomarker_analysis)

Example 4: Event-Driven Analysis

Perform interim and final analyses based on event counts.

# Use the RCT data for event-driven analysis
event_analysis <- AnalysisCorrSurvBinary(
  data = rct_data,
  E = c(25, 50, 75),  # Interim analyses
  prioritize = "OS",
  subgroup.prioritize = c("entire"),
  alternative = "greater"
)
#> Available arms: control, experimental
#> Control arm: experimental
#> Treatment arms: control
#> Available outcomes: OS, PFS, OR
#> Subgroup prioritization: entire
#> Total tests per simulation: 9
#> Performing analysis for 50 simulations...
#> Processing simulation 2/50 (4.0%)
#> Processing simulation 4/50 (8.0%)
#> Processing simulation 6/50 (12.0%)
#> Processing simulation 8/50 (16.0%)
#> Processing simulation 10/50 (20.0%)
#> Processing simulation 12/50 (24.0%)
#> Processing simulation 14/50 (28.0%)
#> Processing simulation 16/50 (32.0%)
#> Processing simulation 18/50 (36.0%)
#> Processing simulation 20/50 (40.0%)
#> Processing simulation 22/50 (44.0%)
#> Processing simulation 24/50 (48.0%)
#> Processing simulation 26/50 (52.0%)
#> Processing simulation 28/50 (56.0%)
#> Processing simulation 30/50 (60.0%)
#> Processing simulation 32/50 (64.0%)
#> Processing simulation 34/50 (68.0%)
#> Processing simulation 36/50 (72.0%)
#> Processing simulation 38/50 (76.0%)
#> Processing simulation 40/50 (80.0%)
#> Processing simulation 42/50 (84.0%)
#> Processing simulation 44/50 (88.0%)
#> Processing simulation 46/50 (92.0%)
#> Processing simulation 48/50 (96.0%)
#> Processing simulation 50/50 (100.0%)
#> Event-driven analysis completed.
#> Total results generated: 450

# Summarize results by analysis timepoint
if (nrow(event_analysis) > 0) {
  event_summary <- event_analysis %>%
    group_by(analysis_event, outcome) %>%
    summarise(
      n_comparisons = n(),
      mean_analysis_time_months = round(mean(analysis_time) * 12, 1),
      significant_tests = sum(pvalue < 0.05, na.rm = TRUE),
      power = round(mean(pvalue < 0.05, na.rm = TRUE), 3),
      .groups = 'drop'
    )
  
  print(event_summary)
} else {
  cat("No analysis results generated (insufficient events)\n")
}
#> # A tibble: 9 × 6
#>   analysis_event outcome n_comparisons mean_analysis_time_months significant_tests power
#>            <dbl> <chr>           <int>                     <dbl>             <int> <dbl>
#> 1             25 OR                 50                      216.                 0  0   
#> 2             25 OS                 50                      216.                 1  0.02
#> 3             25 PFS                50                      216.                 1  0.02
#> 4             50 OR                 50                      317.                 0  0   
#> 5             50 OS                 50                      317.                 0  0   
#> 6             50 PFS                50                      317.                 1  0.02
#> 7             75 OR                 50                      403.                 0  0   
#> 8             75 OS                 50                      403.                 0  0   
#> 9             75 PFS                50                      403.                 0  0

Example 5: Correlation Bounds Exploration

Explore feasible correlation ranges for different parameter combinations.

# Function to explore correlation bounds
explore_bounds <- function(mst_os, mst_pfs, p_or) {
  bounds <- CorrBounds(
    outcomes = c('OS', 'PFS', 'OR'),
    lambda.OS = log(2) / mst_os,
    lambda.PFS = log(2) / mst_pfs,
    p.OR = p_or,
    rho.OS.PFS = 0,
    rho.OS.OR = 0,
    rho.PFS.OR = 0
  )
  
  data.frame(
    mst_OS = mst_os,
    mst_PFS = mst_pfs,
    p_OR = p_or,
    OS_PFS_lower = round(bounds$bounds$OS.PFS$lower, 3),
    OS_PFS_upper = round(bounds$bounds$OS.PFS$upper, 3),
    OS_OR_lower = round(bounds$bounds$OS.OR$lower, 3),
    OS_OR_upper = round(bounds$bounds$OS.OR$upper, 3),
    PFS_OR_lower = round(bounds$bounds$PFS.OR$lower, 3),
    PFS_OR_upper = round(bounds$bounds$PFS.OR$upper, 3)
  )
}

# Explore different parameter combinations
param_combinations <- data.frame(
  mst_os = c(12, 18, 24),
  mst_pfs = c(6, 9, 12),
  p_or = c(0.3, 0.5, 0.7)
)

bounds_results <- do.call(rbind, lapply(1:nrow(param_combinations), function(i) {
  explore_bounds(param_combinations$mst_os[i], 
                param_combinations$mst_pfs[i], 
                param_combinations$p_or[i])
}))

# Display bounds for different scenarios
print(bounds_results)
#>   mst_OS mst_PFS p_OR OS_PFS_lower OS_PFS_upper OS_OR_lower OS_OR_upper PFS_OR_lower PFS_OR_upper
#> 1     12       6  0.3       -0.645        0.936      -0.545       0.788       -0.545        0.788
#> 2     18       9  0.5       -0.645        0.936      -0.693       0.693       -0.693        0.693
#> 3     24      12  0.7       -0.645        0.936      -0.788       0.545       -0.788        0.545

Example 6: Validation and Diagnostics

Validate simulation results and perform diagnostic checks.

# Generate validation data
validation_data <- rCorrSurvBinary(
  nsim = 100,  # Reduced for vignette
  outcomes = c('OS', 'PFS', 'OR'),
  n = 200,
  mst.OS = 18,
  mst.PFS = 10,
  p.OR = 0.4,
  rho.OS.PFS = 0.6,
  rho.OS.OR = 0.3,
  rho.PFS.OR = 0.4,
  tau = 24,
  seed = 2024,
  validate.bounds = FALSE
)

# Check marginal distributions
marginal_check <- validation_data %>%
  summarise(
    # OS exponential check
    theoretical_median_OS = 18,
    empirical_median_OS = round(median(OS), 2),
    # PFS exponential check  
    theoretical_median_PFS = 10,
    empirical_median_PFS = round(median(PFS), 2),
    # OR Bernoulli check
    theoretical_response_rate = 0.4,
    empirical_response_rate = round(mean(OR), 3),
    # Correlation checks
    theoretical_cor_OS_PFS = 0.6,
    empirical_cor_OS_PFS = round(cor(OS, PFS), 3),
    theoretical_cor_OS_OR = 0.3,
    empirical_cor_OS_OR = round(cor(OS, OR), 3),
    theoretical_cor_PFS_OR = 0.4,
    empirical_cor_PFS_OR = round(cor(PFS, OR), 3)
  )

print(marginal_check)
#> # A tibble: 1 × 12
#>   theoretical_median_OS empirical_median_OS theoretical_median_PFS empirical_median_PFS
#>                   <dbl>               <dbl>                  <dbl>                <dbl>
#> 1                    18                18.3                     10                 8.16
#> # ℹ 8 more variables: theoretical_response_rate <dbl>, empirical_response_rate <dbl>,
#> #   theoretical_cor_OS_PFS <dbl>, empirical_cor_OS_PFS <dbl>, theoretical_cor_OS_OR <dbl>,
#> #   empirical_cor_OS_OR <dbl>, theoretical_cor_PFS_OR <dbl>, empirical_cor_PFS_OR <dbl>

# Check OS >= PFS constraint
constraint_check <- validation_data %>%
  summarise(
    violations = sum(OS < PFS),
    total_observations = n(),
    violation_rate = round(mean(OS < PFS), 4)
  )

print(constraint_check)
#> # A tibble: 1 × 3
#>   violations total_observations violation_rate
#>        <int>              <int>          <dbl>
#> 1          0              20000              0

Best Practices and Tips

1. Parameter Selection

Always validate correlation bounds before simulation
Consider biological plausibility of correlation values
Use realistic survival medians based on disease context

2. Sample Size Planning

Account for correlation when calculating power
Consider event-driven analyses for survival endpoints
Plan interim analyses carefully to control Type I error

3. Simulation Efficiency

Start with small nsim for testing
Use seeds for reproducibility
Set validate.bounds = FALSE for large simulations after initial validation

4. Validation

Check marginal distributions match specifications
Verify correlation structures are preserved
Ensure biological constraints (OS >= PFS) are maintained

This comprehensive set of examples demonstrates the flexibility and power of the MultiCorrSurvBinary package for clinical trial simulations.