library(testthat)
library(dplyr)

test_that("HLA_columns_to_GLstring correctly converts HLA columns into a GL string", {
  typing_table <- data.frame(
    patient = c("patient1", "patient2", "patient3"),
    mA1cd = c("A*01:01", "A*02:01", "A*03:01"),
    mA2cd = c("A*11:01", "blank", "A*26:01"),
    mB1cd = c("B*07:02", "B*08:01", "B*15:01"),
    mB2cd = c("B*44:02", "B*40:01", "-"),
    mC1cd = c("C*03:04", "C*04:01", "C*05:01"),
    mC2cd = c("C*07:01", "C*07:02", "C*08:01"),
    stringsAsFactors = FALSE
  )

  result <- HLA_columns_to_GLstring(
    typing_table,
    HLA_typing_columns = c("mA1cd", "mA2cd", "mB1cd", "mB2cd", "mC1cd", "mC2cd"),
    prefix_to_remove = "m",
    suffix_to_remove = "cd"
  )

  expect_equal(length(result), 3)
  expect_true("HLA-A*01:01+HLA-A*11:01^HLA-B*07:02+HLA-B*44:02^HLA-C*03:04+HLA-C*07:01" %in% result)
  expect_true("HLA-A*02:01^HLA-B*08:01+HLA-B*40:01^HLA-C*04:01+HLA-C*07:02" %in% result)
  expect_true("HLA-A*03:01+HLA-A*26:01^HLA-B*15:01^HLA-C*05:01+HLA-C*08:01" %in% result)

  test_1 <- HLA_columns_to_GLstring(HLA_typing_1, c(A1:DPB1_2))
  result_1 <- c("HLA-A*24:02+HLA-A*29:02^HLA-C*07:04+HLA-C*16:01^HLA-B*44:02+HLA-B*44:03^HLA-DRB5*01:01+HLA-DRB5*01:01^HLA-DRB1*15:01+HLA-DRB1*15:01^HLA-DQA1*01:02+HLA-DQA1*01:02^HLA-DQB1*06:02+HLA-DQB1*06:02^HLA-DPA1*01:03+HLA-DPA1*01:03^HLA-DPB1*03:01+HLA-DPB1*04:01", "HLA-A*02:01+HLA-A*11:05^HLA-C*07:01+HLA-C*07:02^HLA-B*07:02+HLA-B*08:01^HLA-DRB3*01:01^HLA-DRB4*01:03^HLA-DRB1*03:01+HLA-DRB1*04:01^HLA-DQA1*03:03+HLA-DQA1*05:01^HLA-DQB1*02:01+HLA-DQB1*03:01^HLA-DPA1*01:03+HLA-DPA1*01:03^HLA-DPB1*04:01+HLA-DPB1*04:01", "HLA-A*02:01+HLA-A*26:18^HLA-C*02:02+HLA-C*03:04^HLA-B*27:05+HLA-B*54:01^HLA-DRB3*02:02^HLA-DRB4*01:03^HLA-DRB1*04:04+HLA-DRB1*14:54^HLA-DQA1*01:04+HLA-DQA1*03:01^HLA-DQB1*03:02+HLA-DQB1*05:02^HLA-DPA1*01:03+HLA-DPA1*02:02^HLA-DPB1*02:01+HLA-DPB1*05:01", "HLA-A*29:02+HLA-A*30:02^HLA-C*06:02+HLA-C*07:01^HLA-B*08:01+HLA-B*13:02^HLA-DRB4*01:03+HLA-DRB4*01:03^HLA-DRB1*04:01+HLA-DRB1*07:01^HLA-DQA1*02:01+HLA-DQA1*03:01^HLA-DQB1*02:02+HLA-DQB1*03:02^HLA-DPA1*01:03+HLA-DPA1*02:01^HLA-DPB1*01:01+HLA-DPB1*16:01", "HLA-A*02:05+HLA-A*24:02^HLA-C*07:18+HLA-C*12:03^HLA-B*35:03+HLA-B*58:01^HLA-DRB3*02:02+HLA-DRB3*02:02^HLA-DRB1*03:01+HLA-DRB1*14:54^HLA-DQA1*01:04+HLA-DQA1*05:01^HLA-DQB1*02:01+HLA-DQB1*05:03^HLA-DPA1*01:03+HLA-DPA1*02:01^HLA-DPB1*10:01+HLA-DPB1*124:01", "HLA-A*01:01+HLA-A*24:02^HLA-C*07:01+HLA-C*14:02^HLA-B*49:01+HLA-B*51:01^HLA-DRB3*03:01^HLA-DRB1*08:01+HLA-DRB1*13:02^HLA-DQA1*01:02+HLA-DQA1*04:01^HLA-DQB1*04:02+HLA-DQB1*06:04^HLA-DPA1*01:03+HLA-DPA1*01:04^HLA-DPB1*04:01+HLA-DPB1*15:01", "HLA-A*03:01+HLA-A*03:01^HLA-C*03:03+HLA-C*16:01^HLA-B*15:01+HLA-B*51:01^HLA-DRB4*01:01^HLA-DRB1*01:01+HLA-DRB1*07:01^HLA-DQA1*01:01+HLA-DQA1*02:01^HLA-DQB1*02:02+HLA-DQB1*05:01^HLA-DPA1*01:03+HLA-DPA1*01:03^HLA-DPB1*04:01+HLA-DPB1*04:01", "HLA-A*01:01+HLA-A*32:01^HLA-C*06:02+HLA-C*07:02^HLA-B*08:01+HLA-B*37:01^HLA-DRB3*02:02^HLA-DRB5*01:01^HLA-DRB1*03:01+HLA-DRB1*15:01^HLA-DQA1*01:02+HLA-DQA1*05:01^HLA-DQB1*02:01+HLA-DQB1*06:02^HLA-DPA1*01:03+HLA-DPA1*02:01^HLA-DPB1*04:01+HLA-DPB1*14:01", "HLA-A*03:01+HLA-A*30:01^HLA-C*07:02+HLA-C*12:03^HLA-B*07:02+HLA-B*38:01^HLA-DRB3*01:01^HLA-DRB5*01:01^HLA-DRB1*03:01+HLA-DRB1*15:01^HLA-DQA1*01:02+HLA-DQA1*05:01^HLA-DQB1*02:01+HLA-DQB1*06:02^HLA-DPA1*01:03+HLA-DPA1*01:03^HLA-DPB1*04:01+HLA-DPB1*04:01", "HLA-A*02:05+HLA-A*11:01^HLA-C*07:18+HLA-C*16:02^HLA-B*51:01+HLA-B*58:01^HLA-DRB3*03:01^HLA-DRB5*01:01^HLA-DRB1*13:02+HLA-DRB1*15:01^HLA-DQA1*01:02+HLA-DQA1*01:03^HLA-DQB1*06:01+HLA-DQB1*06:09^HLA-DPA1*01:03+HLA-DPA1*01:03^HLA-DPB1*02:01+HLA-DPB1*104:01")
  expect_equal(test_1, result_1)

  test_LIS <- HLA_columns_to_GLstring(HLA_typing_LIS, c(mA1Cd.recipient:mDPB12cd.recipient), prefix_to_remove = "m", suffix_to_remove = "cd.recipient")
  result_LIS <- c("HLA-A*24:02+HLA-A*02:01^HLA-B*40:02+HLA-B*40:01^HLA-C*03:04^HLA-DRB1*10:01+HLA-DRB1*15:01^HLA-DRB5*01:01^HLA-DQA1*01:05+HLA-DQA1*01:02^HLA-DQB1*05:01+HLA-DQB1*06:02^HLA-DPA1*01:03^HLA-DPB1*02:01+HLA-DPB1*04:01", "HLA-A*03:01+HLA-A*74:01^HLA-B*53:01+HLA-B*57:03^HLA-C*04:01+HLA-C*07:01^HLA-DRB1*11:01+HLA-DRB1*16:02^HLA-DRB3*02:02^HLA-DRB5*02:21^HLA-DQA1*01:02+HLA-DQA1*05:05^HLA-DQB1*03:19+HLA-DQB1*05:02^HLA-DPA1*02:01+HLA-DPA1*02:02^HLA-DPB1*01:01", "HLA-A*11:01+HLA-A*32:01^HLA-B*52:01^HLA-C*12:02^HLA-DRB1*15:02^HLA-DRB5*01:02^HLA-DQA1*01:03^HLA-DQB1*06:01^HLA-DPA1*01:03+HLA-DPA1*02:07^HLA-DPB1*02:01+HLA-DPB1*04:01", "HLA-A*03:01+HLA-A*30:02^HLA-B*14:02^HLA-C*08:02^HLA-DRB1*01:02+HLA-DRB1*13:02^HLA-DRB3*03:01^HLA-DQA1*01:01+HLA-DQA1*01:02^HLA-DQB1*05:01+HLA-DQB1*06:09^HLA-DPA1*02:01^HLA-DPB1*05:01+HLA-DPB1*17:01", "HLA-A*01:01+HLA-A*24:02^HLA-B*07:02^HLA-C*07:02^HLA-DRB1*01:01+HLA-DRB1*15:01^HLA-DRB5*01:01^HLA-DQA1*01:01+HLA-DQA1*01:02^HLA-DQB1*05:01+HLA-DQB1*06:02^HLA-DPA1*01:03+HLA-DPA1*02:01^HLA-DPB1*04:01+HLA-DPB1*11:01", "HLA-A*02:01+HLA-A*30:02^HLA-B*39:11+HLA-B*41:01^HLA-C*07:02+HLA-C*17:01^HLA-DRB1*03:01+HLA-DRB1*04:05^HLA-DRB3*02:02^HLA-DRB4*01:03^HLA-DQA1*05:01+HLA-DQA1*03:03^HLA-DQB1*02:01+HLA-DQB1*02:02^HLA-DPA1*01:03^HLA-DPB1*02:01+HLA-DPB1*04:01", "HLA-A*02:17+HLA-A*32:01^HLA-B*40:02^HLA-C*02:02+HLA-C*03:05^HLA-DRB1*04:11+HLA-DRB1*13:01^HLA-DRB3*02:02^HLA-DRB4*01:03^HLA-DQA1*03:01+HLA-DQA1*01:03^HLA-DQB1*03:02+HLA-DQB1*06:03^HLA-DPA1*01:03^HLA-DPB1*04:02", "HLA-A*11:01+HLA-A*23:01^HLA-B*15:17+HLA-B*44:03^HLA-C*07:01+HLA-C*16:01^HLA-DRB1*07:01^HLA-DRB4*01:01+HLA-DRB4*01:03^HLA-DQA1*02:01^HLA-DQB1*02:02^HLA-DPA1*01:03+HLA-DPA1*02:01^HLA-DPB1*04:01+HLA-DPB1*11:01", "HLA-A*03:01+HLA-A*68:02^HLA-B*08:01+HLA-B*15:03^HLA-C*02:10+HLA-C*03:04^HLA-DRB1*13:04+HLA-DRB1*07:01^HLA-DRB3*02:02^HLA-DRB4*01:03^HLA-DQA1*02:01+HLA-DQA1*05:05^HLA-DQB1*02:02+HLA-DQB1*03:19^HLA-DPA1*01:03+HLA-DPA1*02:02^HLA-DPB1*01:01+HLA-DPB1*02:01", "HLA-A*01:01+HLA-A*02:01^HLA-B*08:01+HLA-B*07:02^HLA-C*07:01+HLA-C*05:01^HLA-DRB1*03:01+HLA-DRB1*04:01^HLA-DRB3*01:01^HLA-DRB4*01:03^HLA-DQA1*05:01+HLA-DQA1*03:03^HLA-DQB1*02:01+HLA-DQB1*03:01^HLA-DPA1*01:03^HLA-DPB1*04:01+HLA-DPB1*06:01")
  expect_equal(test_LIS, result_LIS)

  test_freq <- HLA_columns_to_GLstring(Haplotype_frequencies, c("HLA-A":"HLA-DPB1"))
  result_freq <- c("HLA-A*24:02:01:01^HLA-C*03:04:01:01^HLA-B*40:01:02^HLA-DRB1*08:01:01^HLA-DQA1*04:01:01^HLA-DQB1*04:02:01^HLA-DPA1*01:03:01:04^HLA-DPB1*04:01:01:01", "HLA-A*03:01:01:05^HLA-C*06:02:01:01^HLA-B*47:01:01:03^HLA-DRB4*01:01:01:01^HLA-DRB1*07:01:01:01^HLA-DQA1*02:01:01:01^HLA-DQB1*02:02:01:01^HLA-DPA1*01:03:01:03^HLA-DPB1*06:01:01", "HLA-A*02:01:01:01^HLA-C*05:01:01:02^HLA-B*44:02:01:01^HLA-DRB3*01:01:02:01^HLA-DRB1*03:01:01:01^HLA-DQA1*05:01:01:02^HLA-DQB1*02:01:01^HLA-DPA1*01:03:01:01^HLA-DPB1*02:01:02", "HLA-A*32:01:01^HLA-C*02:02:02:01^HLA-B*40:02:01^HLA-DRB3*02:02:01:02^HLA-DRB1*11:01:01:01^HLA-DQA1*05:05:01:01^HLA-DQB1*03:01:01:03^HLA-DPA1*01:03:01:01^HLA-DPB1*02:01:02", "HLA-A*02:01:01:01^HLA-C*05:01:01:02^HLA-B*44:02:01:01^HLA-DRB5*01:01:01^HLA-DRB1*15:01:01:01^HLA-DQA1*01:02:01:01^HLA-DQB1*06:02:01^HLA-DPA1*01:03:01:02^HLA-DPB1*04:01:01:01", "HLA-A*02:01:01:01^HLA-C*05:01:01:02^HLA-B*44:02:01:01^HLA-DRB4*01:03:01:01^HLA-DRB1*04:01:01:01^HLA-DQA1*03:03:01:01^HLA-DQB1*03:01:01:01^HLA-DPA1*01:03:01:05^HLA-DPB1*04:02:01:02", "HLA-A*02:06:01:01^HLA-C*08:01:01^HLA-B*40:06:01:01^HLA-DRB4*01:03:02^HLA-DRB1*09:01:02^HLA-DQA1*03:02^HLA-DQB1*03:03:02:02^HLA-DPA1*02:01:01:02^HLA-DPB1*09:01:01", "HLA-A*24:02:01:01^HLA-C*07:02:01:01^HLA-B*39:05:01^HLA-DRB5*02:02^HLA-DRB1*16:02:01:02^HLA-DQA1*05:05:01:05^HLA-DQB1*03:01:01:01^HLA-DPA1*01:03:01:05^HLA-DPB1*04:02:01:02", "HLA-A*02:01:01:01^HLA-C*02:02:02:01^HLA-B*40:02:01^HLA-DRB3*02:02:01:02^HLA-DRB1*13:01:01:01^HLA-DQA1*01:03:01:02^HLA-DQB1*06:03:01^HLA-DPA1*01:03:01:01^HLA-DPB1*02:01:02", "HLA-A*24:02:01:01^HLA-C*07:04:01:01^HLA-B*44:02:01:03^HLA-DRB3*02:02:01:02^HLA-DRB1*11:01:01:01^HLA-DQA1*05:05:01:01^HLA-DQB1*03:01:01:03^HLA-DPA1*01:03:01:01^HLA-DPB1*02:01:02")
  expect_equal(test_freq, result_freq)


  test_separate_DRB_no_prefixes <- tibble(
    patient = c("patient1", "patient2", "patient3"),
    DRB3_1  = c("03:01", NA, "03:03"),
    DRB4_1  = c(NA, "04:02", NA),
    DRB5_1  = c(NA, NA, "05:03")
  )
  result_separate_no_prefixes <- test_separate_DRB_no_prefixes %>%
    mutate(GL = HLA_columns_to_GLstring(., HLA_typing_columns = DRB3_1:DRB5_1)) %>%
    pull(GL)
  expect_equal(
    result_separate_no_prefixes,
    c("HLA-DRB3*03:01", "HLA-DRB4*04:02", "HLA-DRB3*03:03^HLA-DRB5*05:03")
  )

  test_separate_DRB_numeric_only <- tibble(
    patient = c("patient1", "patient2", "patient3"),
    DRB3_1  = c("3*03:01", NA, "3*03:03"),
    DRB4_1  = c(NA, "4*04:02", NA),
    DRB5_1  = c(NA, NA, "5*05:03")
  )
  result_separate_numeric_only <- test_separate_DRB_numeric_only %>%
    mutate(GL = HLA_columns_to_GLstring(., HLA_typing_columns = DRB3_1:DRB5_1)) %>%
    pull(GL)
  expect_equal(
    result_separate_numeric_only,
    c("HLA-DRB3*03:01", "HLA-DRB4*04:02", "HLA-DRB3*03:03^HLA-DRB5*05:03")
  )

  test_separate_DRB_full_prefix <- tibble(
    patient = c("patient1", "patient2", "patient3"),
    DRB3_1  = c("DRB3*03:01", NA, "DRB3*03:03"),
    DRB4_1  = c(NA, "DRB4*04:02", NA),
    DRB5_1  = c(NA, NA, "DRB5*05:03")
  )
  result_separate_full_prefix <- test_separate_DRB_full_prefix %>%
    mutate(GL = HLA_columns_to_GLstring(., HLA_typing_columns = DRB3_1:DRB5_1)) %>%
    pull(GL)
  expect_equal(
    result_separate_full_prefix,
    c("HLA-DRB3*03:01", "HLA-DRB4*04:02", "HLA-DRB3*03:03^HLA-DRB5*05:03")
  )

  test_mixed_DRB_with_prefixes <- tibble(
    patient = c("patient1", "patient2", "patient3"),
    DRB345_1 = c("DRB3*01:01", "DRB4*04:01", "DRB5*05:01"),
    DRB345_2 = c("DRB4*01:01", "DRB5*04:01", "DRB5*05:02")
  )
  result_mixed_with_prefixes <- test_mixed_DRB_with_prefixes %>%
    mutate(GL = HLA_columns_to_GLstring(., HLA_typing_columns = DRB345_1:DRB345_2)) %>%
    pull(GL)
  expect_equal(
    result_mixed_with_prefixes,
    c(
      "HLA-DRB3*01:01^HLA-DRB4*01:01", "HLA-DRB4*04:01^HLA-DRB5*04:01",
      "HLA-DRB5*05:01+HLA-DRB5*05:02"
    )
  )

  test_mixed_DRB_no_prefixes <- tibble(
    patient = c("patient1", "patient2", "patient3"),
    DRB345_1 = c("3*01:01", "4*04:01", "5*05:01")
  )
  result_mixed_no_prefixes <- test_mixed_DRB_no_prefixes %>%
    mutate(GL = HLA_columns_to_GLstring(., HLA_typing_columns = DRB345_1)) %>%
    pull(GL)
  expect_equal(
    result_mixed_no_prefixes,
    c("HLA-DRB3*01:01", "HLA-DRB4*04:01", "HLA-DRB5*05:01")
  )

  test_sero <- tibble(
    patient = c("patient1", "patient2", "patient3"),
    A_1 = c("2", "3", "68"),
    A_2 = c("", "1", "69"),
    DR_1 = c("7", 4, "9"),
    DR_2 = c("6", "4", NA),
    DRw_1 = c(NA, NA, "52"),
    DRw_2 = c("51", "52", "53")
  )

  test_sero_code <- test_sero %>% HLA_columns_to_GLstring(HLA_typing_columns = A_1:DRw_2)
  test_sero_result <- c("HLA-A2^HLA-DR7+HLA-DR6^HLA-DR51", "HLA-A3+HLA-A1^HLA-DR4+HLA-DR4^HLA-DR52", "HLA-A68+HLA-A69^HLA-DR9^HLA-DR52+HLA-DR53")
  expect_equal(test_sero_code, test_sero_result)
})
