This vignette describes how to use GSCUSUM charts, an extension to standard CUSUM charts for binary performance data grouped in samples of unequal size.
Following information have to be available:
dplyr::group_indices())These information are collected in a numeric matrix.
Non-risk-adjusted example data:
head(gscusum_example_data)
#> # A tibble: 6 x 4
#>       t y      year block_identifier
#>   <int> <lgl> <dbl>            <int>
#> 1     1 FALSE  2016                1
#> 2     2 FALSE  2016                1
#> 3     3 FALSE  2016                1
#> 4     4 FALSE  2016                1
#> 5     5 FALSE  2016                1
#> 6     6 FALSE  2016                1Risk-adjusted example data:
Like in the standard CUSUM chart (see vignette for CUSUM charts), parameters have to be estimated in order to set up the charts,
failure_probability <- mean(gscusum_example_data$y[gscusum_example_data$year == 2016])
n_patients <- nrow(gscusum_example_data[gscusum_example_data$year == 2016,])and control limits have to be estimated:
cusum_limit <- cusum_limit_sim(failure_probability,
                            n_patients,
                            odds_multiplier = 2,
                            n_simulation = 1000,
                            alpha = 0.05,
                            seed = 2046)
print(cusum_limit)
#> [1] 4.91128GSCUSUM charts are constructed on performance data from 2017.
gscusum_data <- gscusum_example_data[gscusum_example_data$year == 2017,]
input_outcomes <- matrix(c(gscusum_data$y, gscusum_data$block_identifier), ncol = 2)
gcs <- gscusum(input_outcomes = input_outcomes,
              failure_probability = failure_probability,
              odds_multiplier = 2,
              limit = cusum_limit,
              max_num_shuffles = 1000,
              quantiles = c(0.,0.05,0.25,0.5,0.75,.95,1))This function returns the signal probability, average CUSUM values and quantiles of the CUSUM distribution specified in the function call.
gcs <- as.data.frame(gcs)
names(gcs) <- c("sig_prob", "avg", "min", "q05", "q25", "median","q75","q95","max")
head(gcs)
#>   sig_prob        avg min q05 q25     median       q75       q95       max
#> 1        0 0.08151062   0   0   0 0.00000000 0.0000000 0.4910278 0.4910278
#> 2        0 0.13900012   0   0   0 0.00000000 0.2889099 0.4910278 0.9820557
#> 3        0 0.15609100   0   0   0 0.00000000 0.2889099 0.7799377 0.9820557
#> 4        0 0.17359918   0   0   0 0.00000000 0.2889099 0.7799377 0.9820557
#> 5        0 0.19099323   0   0   0 0.00000000 0.3757019 0.7799377 0.9820557
#> 6        0 0.19285629   0   0   0 0.08679199 0.3757019 0.5778198 0.9820557gcs$block_identifier <- input_outcomes[,2]
gcs$t <- seq(1,nrow(gcs))
col1 <- "#f7ba02"
col2 <- "#4063bc"
palette <- rep(c(col1, col2), 300)
ggplot() +
  geom_line(data = gcs, aes(x = t, y = sig_prob)) +
  geom_point(data = gcs, aes(x = t, y = sig_prob, col = as.factor(block_identifier) )) +
  scale_color_manual(guide=FALSE, values = palette) +
  scale_y_continuous(name = "Signal Probability", limits = c(0,1))+
  theme_bw()The complete run can be plotted with:
nblock <- max(gcs$block_identifier)
p <- ggplot(gcs)
for ( i in 1: nblock){
  dblock <- gcs[gcs$block_identifier == i,]
  col <- ifelse(i %% 2 == 0,col2,col1)
  dblock_before <- dblock[1,]
  dblock_before$t <- dblock_before$t - .5
  dblock_after <- dblock[nrow(dblock),]
  dblock_after$t <- dblock_after$t + .5
  dblock_n <- rbind(dblock, dblock_before, dblock_after)
  p <- p +
    geom_ribbon(data = dblock_n, aes(x = t, ymin = min, ymax = max), fill = col, alpha = 0.2) +
    geom_ribbon(data = dblock_n, aes(x = t, ymin = q05, ymax = q95), fill = col, alpha = 0.2) +
    geom_ribbon(data = dblock_n, aes(x = t, ymin = q25, ymax = q75), fill = col, alpha = 0.2)
}
p <- p +
  geom_line(data = gcs, aes(x = t, y = median)) +
  geom_point(data = gcs, aes( x = t, y = median, fill = as.factor(block_identifier)), size=2, pch = 21)+
  geom_hline(aes(yintercept = cusum_limit), linetype = 2) +
  theme_bw() +
  scale_y_continuous(name = "CUSUM Distribution") +
  scale_x_continuous(name = "Sequence of Observations") +
  scale_fill_manual(values = palette, guide = FALSE) +
  labs(subtitle = "GSCUSUM")
pLike in the standard RA-CUSUM chart (see vignette for CUSUM charts), parameters are estimated in order to set up the charts,
and control limits are set:
racusum_limit <- racusum_limit_sim(patient_risks = ragscusum_example_data$score[ragscusum_example_data$year == 2016],
                            odds_multiplier = 2,
                            n_simulation = 1000,
                            alpha = 0.05,
                            seed = 2046)
print(racusum_limit)
#> [1] 2.403465GSCUSUM charts are constructed on performance data from 2017.
ragscusum_data <- ragscusum_example_data[ragscusum_example_data$year == 2017,]
input_outcomes <- matrix(c(gscusum_data$y, gscusum_data$block_identifier), ncol = 2)
gcs <- gscusum(input_outcomes = input_outcomes,
              failure_probability = failure_probability,
              odds_multiplier = 2,
              limit = cusum_limit,
              max_num_shuffles = 1000,
              quantiles = c(0.,0.05,0.25,0.5,0.75,.95,1))This function returns the signal probability, average CUSUM values and quantiles of the CUSUM distribution specified in the function call.
gcs <- as.data.frame(gcs)
names(gcs) <- c("sig_prob", "avg", "min", "q05", "q25", "median","q75","q95","max")
head(gcs)
#>   sig_prob        avg min q05 q25     median       q75       q95       max
#> 1        0 0.07905548   0   0   0 0.00000000 0.0000000 0.4910278 0.4910278
#> 2        0 0.12831284   0   0   0 0.00000000 0.2889099 0.4910278 0.9820557
#> 3        0 0.16331018   0   0   0 0.00000000 0.2889099 0.7799377 0.9820557
#> 4        0 0.17408902   0   0   0 0.00000000 0.2889099 0.5778198 0.9820557
#> 5        0 0.19183142   0   0   0 0.00000000 0.3757019 0.7799377 0.9820557
#> 6        0 0.19487628   0   0   0 0.08679199 0.3757019 0.5778198 0.9820557gcs$block_identifier <- input_outcomes[,2]
gcs$t <- seq(1,nrow(gcs))
col1 <- "#f7ba02"
col2 <- "#4063bc"
palette <- rep(c(col1, col2), 300)
ggplot() +
  geom_line(data = gcs, aes(x = t, y = sig_prob)) +
  geom_point(data = gcs, aes(x = t, y = sig_prob, col = as.factor(block_identifier) )) +
  scale_color_manual(guide=FALSE, values = palette) +
  scale_y_continuous(name = "Signal Probability", limit = c(0,1))+
  theme_bw()The complete run can be plotted with:
nblock <- max(gcs$block_identifier)
p <- ggplot(gcs)
for ( i in 1: nblock){
  dblock <- gcs[gcs$block_identifier == i,]
  col <- ifelse(i %% 2 == 0,col2,col1)
  dblock_before <- dblock[1,]
  dblock_before$t <- dblock_before$t - .5
  dblock_after <- dblock[nrow(dblock),]
  dblock_after$t <- dblock_after$t + .5
  dblock_n <- rbind(dblock, dblock_before, dblock_after)
  p <- p +
    geom_ribbon(data = dblock_n, aes(x = t, ymin = min, ymax = max), fill = col, alpha = 0.2) +
    geom_ribbon(data = dblock_n, aes(x = t, ymin = q05, ymax = q95), fill = col, alpha = 0.2) +
    geom_ribbon(data = dblock_n, aes(x = t, ymin = q25, ymax = q75), fill = col, alpha = 0.2)
}
p <- p +
  geom_line(data = gcs, aes(x = t, y = median)) +
  geom_point(data = gcs, aes( x = t, y = median, fill = as.factor(block_identifier)), size=2, pch = 21)+
  geom_hline(aes(yintercept = cusum_limit), linetype = 2) +
  theme_bw() +
  scale_y_continuous(name = "RACUSUM Distribution") +
  scale_x_continuous(name = "Sequence of Observations") +
  scale_fill_manual(values = palette, guide = FALSE) +
  labs(subtitle = "RA-GSCUSUM")
p