GSCUSUM charts

head(gscusum_example_data) #> # A tibble: 6 x 4 #> t y year block_identifier #> <int> <lgl> <dbl> <int> #> 1 1 FALSE 2016 1 #> 2 2 FALSE 2016 1 #> 3 3 FALSE 2016 1 #> 4 4 FALSE 2016 1 #> 5 5 FALSE 2016 1 #> 6 6 FALSE 2016 1

head(ragscusum_example_data) #> # A tibble: 6 x 5 #> t y score year block_identifier #> <int> <lgl> <dbl> <dbl> <int> #> 1 1 FALSE 0.00829 2016 1 #> 2 2 FALSE 0.00237 2016 1 #> 3 3 FALSE 0.00926 2016 1 #> 4 4 FALSE 0.00394 2016 1 #> 5 5 FALSE 0.0241 2016 1 #> 6 6 FALSE 0.00557 2016 1

Non-risk-adjusted GSCUSUM chart

Like in the standard CUSUM chart (see vignette for CUSUM charts), parameters have to be estimated in order to set up the charts,

failure_probability <- mean(gscusum_example_data$y[gscusum_example_data$year == 2016])

n_patients <- nrow(gscusum_example_data[gscusum_example_data$year == 2016,])

and control limits have to be estimated:


cusum_limit <- cusum_limit_sim(failure_probability,
                            n_patients,
                            odds_multiplier = 2,
                            n_simulation = 1000,
                            alpha = 0.05,
                            seed = 2046)


print(cusum_limit)
#> [1] 4.91128

GSCUSUM charts are constructed on performance data from 2017.


gscusum_data <- gscusum_example_data[gscusum_example_data$year == 2017,]

input_outcomes <- matrix(c(gscusum_data$y, gscusum_data$block_identifier), ncol = 2)


gcs <- gscusum(input_outcomes = input_outcomes,
              failure_probability = failure_probability,
              odds_multiplier = 2,
              limit = cusum_limit,
              max_num_shuffles = 1000,
              quantiles = c(0.,0.05,0.25,0.5,0.75,.95,1))

This function returns the signal probability, average CUSUM values and quantiles of the CUSUM distribution specified in the function call.

gcs <- as.data.frame(gcs)
names(gcs) <- c("sig_prob", "avg", "min", "q05", "q25", "median","q75","q95","max")
head(gcs)
#>   sig_prob        avg min q05 q25     median       q75       q95       max
#> 1        0 0.08151062   0   0   0 0.00000000 0.0000000 0.4910278 0.4910278
#> 2        0 0.13900012   0   0   0 0.00000000 0.2889099 0.4910278 0.9820557
#> 3        0 0.15609100   0   0   0 0.00000000 0.2889099 0.7799377 0.9820557
#> 4        0 0.17359918   0   0   0 0.00000000 0.2889099 0.7799377 0.9820557
#> 5        0 0.19099323   0   0   0 0.00000000 0.3757019 0.7799377 0.9820557
#> 6        0 0.19285629   0   0   0 0.08679199 0.3757019 0.5778198 0.9820557

gcs$block_identifier <- input_outcomes[,2]
gcs$t <- seq(1,nrow(gcs))

col1 <- "#f7ba02"
col2 <- "#4063bc"
palette <- rep(c(col1, col2), 300)

ggplot() +
  geom_line(data = gcs, aes(x = t, y = sig_prob)) +
  geom_point(data = gcs, aes(x = t, y = sig_prob, col = as.factor(block_identifier) )) +
  scale_color_manual(guide=FALSE, values = palette) +
  scale_y_continuous(name = "Signal Probability", limits = c(0,1))+
  theme_bw()

The complete run can be plotted with:

nblock <- max(gcs$block_identifier)

p <- ggplot(gcs)

for ( i in 1: nblock){
  dblock <- gcs[gcs$block_identifier == i,]
  col <- ifelse(i %% 2 == 0,col2,col1)
  dblock_before <- dblock[1,]
  dblock_before$t <- dblock_before$t - .5
  dblock_after <- dblock[nrow(dblock),]
  dblock_after$t <- dblock_after$t + .5
  dblock_n <- rbind(dblock, dblock_before, dblock_after)

  p <- p +
    geom_ribbon(data = dblock_n, aes(x = t, ymin = min, ymax = max), fill = col, alpha = 0.2) +
    geom_ribbon(data = dblock_n, aes(x = t, ymin = q05, ymax = q95), fill = col, alpha = 0.2) +
    geom_ribbon(data = dblock_n, aes(x = t, ymin = q25, ymax = q75), fill = col, alpha = 0.2)

}

p <- p +
  geom_line(data = gcs, aes(x = t, y = median)) +
  geom_point(data = gcs, aes( x = t, y = median, fill = as.factor(block_identifier)), size=2, pch = 21)+
  geom_hline(aes(yintercept = cusum_limit), linetype = 2) +
  theme_bw() +
  scale_y_continuous(name = "CUSUM Distribution") +
  scale_x_continuous(name = "Sequence of Observations") +
  scale_fill_manual(values = palette, guide = FALSE) +
  labs(subtitle = "GSCUSUM")
p

racusum_limit <- racusum_limit_sim(patient_risks = ragscusum_example_data$score[ragscusum_example_data$year == 2016], odds_multiplier = 2, n_simulation = 1000, alpha = 0.05, seed = 2046) print(racusum_limit) #> [1] 2.403465

ragscusum_data <- ragscusum_example_data[ragscusum_example_data$year == 2017,] input_outcomes <- matrix(c(gscusum_data$y, gscusum_data$block_identifier), ncol = 2) gcs <- gscusum(input_outcomes = input_outcomes, failure_probability = failure_probability, odds_multiplier = 2, limit = cusum_limit, max_num_shuffles = 1000, quantiles = c(0.,0.05,0.25,0.5,0.75,.95,1))

gcs <- as.data.frame(gcs) names(gcs) <- c("sig_prob", "avg", "min", "q05", "q25", "median","q75","q95","max") head(gcs) #> sig_prob avg min q05 q25 median q75 q95 max #> 1 0 0.07905548 0 0 0 0.00000000 0.0000000 0.4910278 0.4910278 #> 2 0 0.12831284 0 0 0 0.00000000 0.2889099 0.4910278 0.9820557 #> 3 0 0.16331018 0 0 0 0.00000000 0.2889099 0.7799377 0.9820557 #> 4 0 0.17408902 0 0 0 0.00000000 0.2889099 0.5778198 0.9820557 #> 5 0 0.19183142 0 0 0 0.00000000 0.3757019 0.7799377 0.9820557 #> 6 0 0.19487628 0 0 0 0.08679199 0.3757019 0.5778198 0.9820557

gcs$block_identifier <- input_outcomes[,2] gcs$t <- seq(1,nrow(gcs)) col1 <- "#f7ba02" col2 <- "#4063bc" palette <- rep(c(col1, col2), 300) ggplot() + geom_line(data = gcs, aes(x = t, y = sig_prob)) + geom_point(data = gcs, aes(x = t, y = sig_prob, col = as.factor(block_identifier) )) + scale_color_manual(guide=FALSE, values = palette) + scale_y_continuous(name = "Signal Probability", limit = c(0,1))+ theme_bw()

nblock <- max(gcs$block_identifier) p <- ggplot(gcs) for ( i in 1: nblock){ dblock <- gcs[gcs$block_identifier == i,] col <- ifelse(i %% 2 == 0,col2,col1) dblock_before <- dblock[1,] dblock_before$t <- dblock_before$t - .5 dblock_after <- dblock[nrow(dblock),] dblock_after$t <- dblock_after$t + .5 dblock_n <- rbind(dblock, dblock_before, dblock_after) p <- p + geom_ribbon(data = dblock_n, aes(x = t, ymin = min, ymax = max), fill = col, alpha = 0.2) + geom_ribbon(data = dblock_n, aes(x = t, ymin = q05, ymax = q95), fill = col, alpha = 0.2) + geom_ribbon(data = dblock_n, aes(x = t, ymin = q25, ymax = q75), fill = col, alpha = 0.2) } p <- p + geom_line(data = gcs, aes(x = t, y = median)) + geom_point(data = gcs, aes( x = t, y = median, fill = as.factor(block_identifier)), size=2, pch = 21)+ geom_hline(aes(yintercept = cusum_limit), linetype = 2) + theme_bw() + scale_y_continuous(name = "RACUSUM Distribution") + scale_x_continuous(name = "Sequence of Observations") + scale_fill_manual(values = palette, guide = FALSE) + labs(subtitle = "RA-GSCUSUM") p

GSCUSUM charts

Lena Hubig

2019-10-02

Overview

Data preparation

Non-risk-adjusted GSCUSUM chart

Risk-adjusted GSCUSUM chart