#' Prepares pedigree data for plotting and spatial representation
#'
#' @description Combines extended pedigree (obtained by [`org_fams()`] function)
#'  and sample metadata data for visual ([`ped_satplot()`]) and spatial
#'  ([`ped_spatial()`]) representation of the pedigree.
#'
#' @details
#' * `sampledata` has to include columns that contain information on:
#'   - unique identifier of each sample; character or numeric
#'   (default column name = `Sample`, see [`check_sampledata()`] function),
#'   - date of sample collection in `Date` format (default = `Date`),
#'   - assignment of sample to particular individual; character or numeric
#'   (default = `AnimalRef`, see [`check_sampledata()`] function),
#'   - sex of the animal coded as `F`, `M` or `NA`; character
#'   (default = `GeneticSex`, see [`check_sampledata()`] function),
#'   - longitude and latitude coordinates of sample collection location; numeric
#'   (default = `lng` and `lat`, see [`check_sampledata()`] function),
#'   - type of particular sample eg. scat, tissue, saliva; character
#'   (default = `SType`, see [`check_sampledata()`] function),
#'   - date of first and last sample of individual in `Date` format
#'       (default = `FirstSeen` and `LastSeen`, see [`anim_timespan()`] function),
#'   - value identifying if if the individual is dead; logical
#'       (default = `IsDead`, see [`anim_timespan()`] function).
#'
#'
#'
#' @param plot_fams Character string or numeric vector. FamID numbers from `fams` data generated by [`org_fams()`] function.
#'   If all families want to be plotted it is defined as character string "all". For a subset of families
#'   a numeric vector of FamIDs has to be specified. Defaults to "all".
#' @param all_fams Data frame. Family (`fams`) data generated by [`org_fams()`] function.
#' @param ped Data frame. Organized pedigree (`ped`) generated by [`org_fams()`] function.
#' @param sampledata Data frame. Metadata for all genetic samples that belong
#' to the individuals included in pedigree reconstruction analysis.
#' For description of `sampledata` structure and sample information needed for `plot_table()` see Details.
#' @param datacolumns Vector of column names included `sampledata` that are needed to produce
#' this functions output (see Details).
#' @param deadSample Single value or vector of different lethal sample types.
#' Defaults to c("Tissue").
#'
#' @return
#' Extended `sampledata` data frame that includes all columns defined in `datacolumns`
#' parameter and adds information needed for visual and spatial representation of
#' pedigree:
#'   * `plottingID`: Numeric. Identifier number for temporal pedigree plot
#'    [`ped_satplot()`]. In case of polygamous animals same individual
#'    can be included in more than one family.
#'   * `FamID`: Numeric. Identifier number of family that individual belongs to.
#'   * `hsGroup`: Numeric. Identifier number for the half-sib group of individual.
#'   * `rep`: Logical. Is individual reproductive in current family,
#'    (current family defined with `FamID` for a particular entry).
#'   * `later_rep`: Logical. Is individual reproductive in any other (later) families.
#'   * `isPolygamous`: Logical. Does the individual have more than one mate.
#'   * `dead`: Logical. Is individual dead.
#'   * `first_sample`: Logical. Is this particular sample the first sample of the individual.
#'   * `last_sample`: Logical. Is this particular sample the last sample of the individual.
#'   * `isReference`: Logical. Is this particular sample reference sample of individual.
#'
#' @export
#'
#' @examples
#'
#' # Prepare the data for usage with plot_table() function.
#' # Get animal timespan data using the anim_timespan() function.
#' animal_ts <- anim_timespan(wolf_samples$AnimalRef,
#'   wolf_samples$Date,
#'   wolf_samples$SType,
#'   dead = c("Tissue")
#' )
#' # Add animal timespan to the sampledata
#' sampledata <- merge(wolf_samples, animal_ts, by.x = "AnimalRef", by.y = "ID", all.x = TRUE)
#' # Define the path to the pedigree data file.
#' path <- paste0(system.file("extdata", package = "wpeR"), "/wpeR_samplePed")
#' # Retrieve the pedigree data from the get_colony function.
#' ped_colony <- get_colony(path, sampledata, rm_obsolete_parents = TRUE, out = "FamAgg")
#' # Organize families and expand pedigree data using the org_fams function.
#' org_tables <- org_fams(ped_colony, sampledata, output = "both")
#'
#' # Run the function
#' # Prepare data for plotting.
#' plot_table(plot_fams = "all",
#'   org_tables$fams,
#'   org_tables$ped,
#'   sampledata,
#'   deadSample = c("Tissue")
#' )
#'
#'
plot_table <- function(plot_fams = "all", all_fams, ped, sampledata,
                       datacolumns = c("Sample", "AnimalRef", "GeneticSex",
                                       "Date", "SType", "lat", "lng",
                                       "FirstSeen", "LastSeen", "IsDead"),
                       deadSample = c("Tissue")) {

  if (all(plot_fams == "all")) {
    sel_fams <- all_fams
  } else if (all(plot_fams %in% all_fams$FamID)) {
    sel_fams <- all_fams[which(all_fams$FamID %in% plot_fams),]
  } else {
    stop("FamID/s ", plot_fams[!(plot_fams %in% all_fams$FamID)], " not present in all_fams data frame")
  }

  outdata <- NULL
  # ID for plotting, since the same animal can be drawn in more families with polygamy
  plottingID <- 1

  # loop fills the outdata table with data from sampledata[datacolumn,],
  ## adds famID, hsGroup and creates plottingID, rep (if animal is reproductive),
  ## later_rep (if animal reproductive in later season),
  ## is Polygamous (if it has offspring with more than one other animal)
  # plottingID marks all the samples of the same animal
  # loop starts with reproductive males after that reproductive females and than other animals.
  ## Even though the loop is segmented it does the same things for all animals.
  for (i in seq_len(nrow(sel_fams))) {
    # Get reproductive animals, write in outdata
    if (!grepl("//*", sel_fams$father[i])) {
      fatherSamples <- sampledata[sampledata$AnimalRef == sel_fams$father[i], datacolumns]

      fatherSamples$plottingID <- rep(plottingID, nrow(fatherSamples))
      plottingID <- plottingID + 1

      fatherSamples$FamID <- rep(sel_fams$FamID[i], nrow(fatherSamples))
      fatherSamples$hsGroup <- rep(sel_fams$hsGroup[i], nrow(fatherSamples))
      fatherSamples$rep <- rep(TRUE, nrow(fatherSamples))
      # does the animal become reproductive later
      fatherSamples$later_rep <- rep(FALSE, nrow(fatherSamples))

      if (!is.na(sel_fams$DadHSgroup[i])) {
        fatherSamples$isPolygamous <- rep(TRUE, nrow(fatherSamples))
      } else {
        fatherSamples$isPolygamous <- rep(FALSE, nrow(fatherSamples))
      } # if Dad polygamous cluster is not NA, then TRUE

      outdata <- rbind(outdata, fatherSamples)
    }

    if (!grepl("#", sel_fams$mother[i])) {
      motherSamples <- sampledata[sampledata$AnimalRef == sel_fams$mother[i], datacolumns]

      motherSamples$plottingID <- rep(plottingID, nrow(motherSamples))
      plottingID <- plottingID + 1

      motherSamples$FamID <- rep(sel_fams$FamID[i], nrow(motherSamples))
      motherSamples$hsGroup <- rep(sel_fams$hsGroup[i], nrow(motherSamples))
      motherSamples$rep <- rep(TRUE, nrow(motherSamples))
      # does the animal become reproductive later
      motherSamples$later_rep <- rep(FALSE, nrow(motherSamples))

      if (!is.na(sel_fams$MomHSgroup[i])) {
        motherSamples$isPolygamous <- rep(TRUE, nrow(motherSamples))
      } else {
        motherSamples$isPolygamous <- rep(FALSE, nrow(motherSamples))
      } # if Mom polygamous cluster is not NA, then TRUE

      outdata <- rbind(outdata, motherSamples)
    }

    # Get other animals, write in outdata
    sub_ped <- ped[ped$FamID == sel_fams$FamID[i], ]

    for (j in seq_len(nrow(sub_ped))) {
      offspringSamples <- sampledata[sampledata$AnimalRef == sub_ped$id[j], datacolumns]

      offspringSamples$plottingID <- rep(plottingID, nrow(offspringSamples))
      plottingID <- plottingID + 1

      offspringSamples$FamID <- rep(sel_fams$FamID[i], nrow(offspringSamples))
      offspringSamples$hsGroup <- rep(sel_fams$hsGroup[i], nrow(offspringSamples))
      offspringSamples$rep <- rep(FALSE, nrow(offspringSamples))

      if (sum(grepl(sub_ped$id[j], all_fams$father) | grepl(sub_ped$id[j], all_fams$mother)) > 0) {
        offspringSamples$later_rep <- rep(TRUE, nrow(offspringSamples))
      } # the animal becomes reproductive later
      else {
        offspringSamples$later_rep <- rep(FALSE, nrow(offspringSamples))
      }

      offspringSamples$isPolygamous <- rep(FALSE, nrow(offspringSamples))

      outdata <- rbind(outdata, offspringSamples)
    }
  } # fams loop

  # mark mortality samples
  # based on deadSample vectors and sample type adds column isDead (TRUE/FALSE)
  outdata$dead <- outdata$SType %in% deadSample

  # mark first sample, mark last sample
  # new columns first_sample, last_sample, IsReference, at first all FALSE
  outdata$first_sample <- rep(FALSE, nrow(outdata))
  outdata$last_sample <- rep(FALSE, nrow(outdata))
  outdata$IsReference <- rep(FALSE, nrow(outdata))

  ## fills in columns created above
  for (i in 1:(nrow(outdata))) {
    # need this junk to catch NAs
    minDt <- min(outdata$Date[outdata$plottingID == outdata$plottingID[i]], na.rm = TRUE)
    maxDt <- max(outdata$Date[outdata$plottingID == outdata$plottingID[i]], na.rm = TRUE)

    if (!is.na(outdata$Date[i]) & minDt != Inf & maxDt != -Inf) {
      if (outdata$Date[i] == minDt) outdata$first_sample[i] <- TRUE
      if (outdata$Date[i] == maxDt) outdata$last_sample[i] <- TRUE
    }
    if (outdata$Sample[i] == outdata$AnimalRef[i]) outdata$IsReference[i] <- TRUE
  }

  # samples that have only NA's for data
  outdata$first_sample[outdata$first_sample == Inf] <- NA
  outdata$last_sample[outdata$last_sample == -Inf] <- NA

  unique_out <- unique(outdata$plottingID)

  # get rid of duplicates that occur when there are several "first" or "last" samples (collected on the same date)

  for (i in 1:length(unique_out)) {
    animal_samps_frst <- outdata$plottingID == unique_out[i] & outdata$first_sample == TRUE
    animal_samps_lst <- outdata$plottingID == unique_out[i] & outdata$last_sample == TRUE

    # if more than one first or last, retain only the flag on the first one
    if (sum(animal_samps_frst) > 1) {
      outdata$first_sample[outdata$plottingID == unique_out[i] & outdata$first_sample == TRUE][2:sum(animal_samps_frst)] <- FALSE
    }

    if (sum(animal_samps_lst) > 1) {
      outdata$last_sample[outdata$plottingID == unique_out[i] & outdata$last_sample == TRUE][2:sum(animal_samps_lst)] <- FALSE
    }
  }


  return(outdata)
}
