Introduction to MultiCorrSurvBinary

Gosuke Homma

2025-06-16

Overview

The MultiCorrSurvBinary package provides comprehensive tools for generating correlated time-to-event and binary outcomes in clinical trial simulations. This package is particularly useful for:

Key Concepts

Outcomes Supported

The package supports three types of clinical trial outcomes:

  1. OS (Overall Survival): Time-to-event outcome following exponential distribution
  2. PFS (Progression-Free Survival): Time-to-event outcome following exponential distribution
  3. OR (Objective Response): Binary outcome following Bernoulli distribution

Correlation Structure

Correlations between outcomes are modeled using:

OS >= PFS Constraint

When both OS and PFS are included, the biological constraint OS >= PFS is enforced with options to prioritize preserving either the OS or PFS marginal distribution.

Basic Usage

Load the Package

library(MultiCorrSurvBinary)
library(dplyr)

Single Population Simulation

Generate correlated outcomes for a single population:

# Generate correlated OS, PFS, and OR for single population
result_single <- rCorrSurvBinary(
  nsim = 10,
  outcomes = c('OS', 'PFS', 'OR'),
  n = 100,
  mst.OS = 12,      # 12-year median OS
  mst.PFS = 6,      # 6-year median PFS  
  p.OR = 0.4,       # 40% response rate
  rho.OS.PFS = 0.5, # Correlation between OS and PFS
  rho.OS.OR = 0.3,  # Correlation between OS and OR
  rho.PFS.OR = 0.4, # Correlation between PFS and OR
  tau = 24,         # 24-year accrual period
  seed = 123,
  validate.bounds = FALSE  # Skip validation for demo speed
)

# View first few rows
head(result_single)
#> # A tibble: 6 × 6
#>     sim patientID Accrual    OS    PFS    OR
#>   <int>     <int>   <dbl> <dbl>  <dbl> <dbl>
#> 1     1         1    6.90 15.5   6.19      0
#> 2     1         2   18.9  45.4   9.37      1
#> 3     1         3    9.82  1.68  1.68      0
#> 4     1         4   21.2  15.6   8.49      0
#> 5     1         5   22.6   4.70  0.958     0
#> 6     1         6    1.09 18.6  10.3       0

Correlation Bounds Validation

Validate that correlation parameters are within feasible bounds:

# Check correlation bounds
bounds_check <- CorrBounds(
  outcomes = c('OS', 'PFS', 'OR'),
  lambda.OS = log(2)/12,  # Rate parameter for OS
  lambda.PFS = log(2)/6,  # Rate parameter for PFS
  p.OR = 0.4,
  rho.OS.PFS = 0.5,
  rho.OS.OR = 0.3,
  rho.PFS.OR = 0.4
)

# Print validation results
if (bounds_check$valid) {
  cat("Correlation parameters are valid!\n")
} else {
  cat("Invalid correlations:\n")
  for (error in bounds_check$errors) {
    cat("-", error, "\n")
  }
}
#> Correlation parameters are valid!

# Show calculated bounds
for (bound_name in names(bounds_check$bounds)) {
  bound <- bounds_check$bounds[[bound_name]]
  cat(sprintf("%s bounds: [%.4f, %.4f]\n", 
              bound_name, bound$lower, bound$upper))
}
#> OS.PFS bounds: [-0.6449, 0.9365]
#> OS.OR bounds: [-0.6256, 0.7481]
#> PFS.OR bounds: [-0.6256, 0.7481]

Multiple Arms Simulation

Generate data for multiple treatment arms:

# Define parameters for two treatment arms
arm_params <- list(
  arm1 = list(
    mst.OS = 18, mst.PFS = 12, p.OR = 0.6, n = 100,
    rho.OS.PFS = 0.5, rho.OS.OR = 0.3, rho.PFS.OR = 0.4
  ),
  arm2 = list(
    mst.OS = 12, mst.PFS = 8, p.OR = 0.4, n = 100,
    rho.OS.PFS = 0.5, rho.OS.OR = 0.3, rho.PFS.OR = 0.4
  )
)

# Generate multi-arm data
result_multi <- rCorrSurvBinaryMultiArmSubgroup(
  nsim = 10,
  outcomes = c('OS', 'PFS', 'OR'),
  arm.params = arm_params,
  tau = 24,
  seed = 456,
  validate.bounds = FALSE  # Skip validation for demo speed
)

# Summary by arm
result_multi %>%
  group_by(ARM) %>%
  summarise(
    n_patients = n(),
    mean_OS = mean(OS, na.rm = TRUE),
    mean_PFS = mean(PFS, na.rm = TRUE),
    response_rate = mean(OR, na.rm = TRUE),
    .groups = 'drop'
  )
#> # A tibble: 2 × 5
#>   ARM   n_patients mean_OS mean_PFS response_rate
#>   <chr>      <int>   <dbl>    <dbl>         <dbl>
#> 1 arm1        1000    25.7    13.2          0.614
#> 2 arm2        1000    17.9     9.33         0.401

Advanced Features

Subgroup Analysis

The package supports complex subgroup structures:

# Define parameters with subgroups
arm_params_subgroups <- list(
  arm1 = list(
    sub1 = list(
      mst.OS = 20, mst.PFS = 12, p.OR = 0.7, n = 50,
      rho.OS.PFS = 0.4, rho.OS.OR = 0.2, rho.PFS.OR = 0.3
    ),
    sub2 = list(
      mst.OS = 15, mst.PFS = 8, p.OR = 0.5, n = 25,
      rho.OS.PFS = 0.4, rho.OS.OR = 0.2, rho.PFS.OR = 0.3
    )
  ),
  arm2 = list(
    sub1 = list(
      mst.OS = 12, mst.PFS = 6, p.OR = 0.4, n = 50,
      rho.OS.PFS = 0.4, rho.OS.OR = 0.2, rho.PFS.OR = 0.3
    ),
    sub2 = list(
      mst.OS = 10, mst.PFS = 5, p.OR = 0.3, n = 25,
      rho.OS.PFS = 0.4, rho.OS.OR = 0.2, rho.PFS.OR = 0.3
    )
  )
)

# Generate subgroup data
result_subgroups <- rCorrSurvBinaryMultiArmSubgroup(
  nsim = 5,
  outcomes = c('OS', 'PFS', 'OR'),
  arm.params = arm_params_subgroups,
  tau = 18,
  seed = 789,
  validate.bounds = FALSE
)

# Summary by arm and subgroup
result_subgroups %>%
  group_by(ARM, SUBGROUP) %>%
  summarise(
    n_patients = n(),
    mean_OS = round(mean(OS, na.rm = TRUE), 2),
    mean_PFS = round(mean(PFS, na.rm = TRUE), 2),
    response_rate = round(mean(OR, na.rm = TRUE), 2),
    .groups = 'drop'
  )
#> # A tibble: 4 × 6
#>   ARM   SUBGROUP n_patients mean_OS mean_PFS response_rate
#>   <chr> <chr>         <int>   <dbl>    <dbl>         <dbl>
#> 1 arm1  sub1            250    27.8    12.4           0.67
#> 2 arm1  sub2            125    20.8     9.22          0.43
#> 3 arm2  sub1            250    17.8     7.05          0.38
#> 4 arm2  sub2            125    16.8     6.39          0.29

Event-Driven Analysis

Perform sequential analyses at predefined event numbers:

# Perform event-driven analysis
analysis_results <- AnalysisCorrSurvBinary(
  data = result_multi,
  E = c(15, 30, 45),  # Event numbers for analysis
  prioritize = "OS",
  subgroup.prioritize = c("entire"),
  alternative = "greater"
)
#> Available arms: arm1, arm2
#> Control arm: arm2
#> Treatment arms: arm1
#> Available outcomes: OS, PFS, OR
#> Subgroup prioritization: entire
#> Total tests per simulation: 9
#> Performing analysis for 10 simulations...
#> Processing simulation 1/10 (10.0%)
#> Processing simulation 2/10 (20.0%)
#> Processing simulation 3/10 (30.0%)
#> Processing simulation 4/10 (40.0%)
#> Processing simulation 5/10 (50.0%)
#> Processing simulation 6/10 (60.0%)
#> Processing simulation 7/10 (70.0%)
#> Processing simulation 8/10 (80.0%)
#> Processing simulation 9/10 (90.0%)
#> Processing simulation 10/10 (100.0%)
#> Event-driven analysis completed.
#> Total results generated: 90

# Summary of analysis results
if (nrow(analysis_results) > 0) {
  analysis_results %>%
    group_by(analysis_event, outcome) %>%
    summarise(
      n_tests = n(),
      significant_tests = sum(pvalue < 0.05, na.rm = TRUE),
      mean_analysis_time = round(mean(analysis_time, na.rm = TRUE), 2),
      .groups = 'drop'
    )
} else {
  cat("No analysis results generated (insufficient events)\n")
}
#> # A tibble: 9 × 5
#>   analysis_event outcome n_tests significant_tests mean_analysis_time
#>            <dbl> <chr>     <int>             <int>              <dbl>
#> 1             15 OR           10                 4               9.79
#> 2             15 OS           10                 2               9.79
#> 3             15 PFS          10                 2               9.79
#> 4             30 OR           10                 5              14.4 
#> 5             30 OS           10                 2              14.4 
#> 6             30 PFS          10                 2              14.4 
#> 7             45 OR           10                 7              17.5 
#> 8             45 OS           10                 2              17.5 
#> 9             45 PFS          10                 2              17.5

Key Functions Summary

Best Practices

  1. Always validate correlation bounds before running large simulations
  2. Use appropriate event numbers relative to sample sizes in event-driven analyses
  3. Set seeds for reproducible results
  4. Consider biological constraints when specifying correlation parameters
  5. Test with small nsim first before scaling up simulations
  6. Use validate.bounds = FALSE for large simulations after initial validation

For more detailed examples and advanced usage, see the examples vignette.