#' @title Information Criterion. Very close to the original BIC method, but this uses the integrated likelihood instead.
#' @description It finds a Laplace-Metropolis marginal density of likelihood using posterior mean. It also uses Woodbury lemma for fast calculation 
#'
#' @param data.row  The dataset
#' @param Gibbs  Gibbs sample derived form \code{BCFM} function
#' @param model.attributes  Model attributes generated by \code{initialize.model.attributes}
#' @param cluster.size Minimum proportion required for each cluster (default 0.05)
#' @param burnin  Number of burn-in period. If not specified, it uses the first tenths as burn-in period
#'
#' @importFrom LaplacesDemon logit
#' 
#' @return  The value of Laplace-Metropolis marginal density
#' @export

IC <- function(data.row, Gibbs, model.attributes, cluster.size = 0.05, burnin = NA){
  n.iter <- dim(Gibbs$B)[1]
  n <- model.attributes$S * model.attributes$times
  r <- model.attributes$R
  k <- model.attributes$L
  G <- model.attributes$G
  npars <- r*k - sum(seq(1,k)) + model.attributes$G*k + k + (model.attributes$G-1)*k*(k+1)/2 + r + model.attributes$G - 1
  
  if(is.null(Gibbs$tau2)){Gibbs$tau2 <- Gibbs$tau}
  
  if(is.na(burnin)){burnin <- round(n.iter / 10)}
  if(length(dim(data.row)) == 3){data.row <- apply(data.row, 2, c)}
  turns <- seq(burnin + 1, n.iter)
  
  if(G > 1 & k > 1){
    probs.mean <- apply(Gibbs$probs[turns,], 2, mean)
    mu.mean <- apply(Gibbs$mu[turns,,], c(2,3), mean)
    Omega.mean <- apply(Gibbs$Omega[turns,,,], c(2,3,4), mean)
    B.mean <- apply(Gibbs$B[turns,,], c(2,3), mean)
    sigma2.mean <- apply(Gibbs$sigma2[turns,], 2, mean)
    tau2.mean <- apply(Gibbs$tau2[turns,], 2, mean)}
  if(k == 1 & G > 1){
    probs.mean <- apply(Gibbs$probs[turns,], 2, mean)
    mu.mean <- apply(Gibbs$mu[turns,,], 2, mean)
    Omega.mean <- apply(Gibbs$Omega[turns,,,], 2, mean)
    B.mean <- cbind(apply(Gibbs$B[turns,,], 2, mean))
    sigma2.mean <- apply(Gibbs$sigma2[turns,], 2, mean)
    tau2.mean <- as.matrix(mean(Gibbs$tau2[turns,]))}
  if(G == 1 & k > 1){
    probs.mean <- NA
    mu.mean <- apply(Gibbs$mu[turns,,],2,mean)
    Omega.mean <- apply(Gibbs$Omega[turns,,,],c(2,3),mean)
    B.mean <- apply(Gibbs$B[turns,,], c(2,3), mean)
    sigma2.mean <- apply(Gibbs$sigma2[turns,], 2, mean)
    tau2.mean <- apply(Gibbs$tau2[turns,], 2, mean)}
  if(G == 1 & k == 1){
    probs.mean <- NA
    mu.mean <- as.matrix(mean(Gibbs$mu[turns,,]))
    Omega.mean <- as.matrix(mean(Gibbs$Omega[turns,,,]))
    B.mean <- cbind(apply(Gibbs$B[turns,,], 2, mean))
    sigma2.mean <- apply(Gibbs$sigma2[turns,], 2, mean)
    tau2.mean <- as.matrix(mean(Gibbs$tau2[turns,]))}
  
  if(G > 1 & k > 1){
    int.like.tot <- 0
    for(l2 in seq(1, n)){
      int.like.group <- rep(NA, G)
      for(l3 in seq(1, G)){
        int.like.group[l3] <- log(probs.mean[l3]) -r/2*log(2*pi) - 0.5*log(woodburydet(sigma2.mean,B.mean,Omega.mean[l3,,])) - 
          0.5*t(data.row[l2,] - B.mean%*%mu.mean[l3,])%*%wdbryinverse(sigma2.mean,B.mean,Omega.mean[l3,,])%*%(data.row[l2,] - B.mean%*%mu.mean[l3,])
      }
      int.like.max <- max(int.like.group)
      int.like.ind <- int.like.max + log(sum(exp(int.like.group - int.like.max)))
      int.like.tot <- int.like.tot + int.like.ind
    }
  }
  if(G > 1 & k == 1){
    int.like.tot <- 0
    for(l2 in seq(1, n)){
      int.like.group <- rep(NA, G)
      for(l3 in seq(1, G)){
        int.like.group[l3] <- log(probs.mean[l3]) -r/2*log(2*pi) + -0.5*log(woodburydet_k1(sigma2.mean,B.mean,Omega.mean[l3])) -
          0.5*t(rbind(data.row[l2,] - B.mean*mu.mean[l3]))%*%woodburyinverse(sigma2.mean,B.mean,Omega.mean[l3])%*%cbind(data.row[l2,] - B.mean*mu.mean[l3])
      }
      int.like.max <- max(int.like.group)
      int.like.ind <- int.like.max + log(sum(exp(int.like.group - int.like.max)))
      int.like.tot <- int.like.tot + int.like.ind
    }
  }
  if(G == 1 & k > 1){
    int.like.tot <- 0
    for(l2 in seq(1,n)){
      int.like.ind <- -r/2*log(2*pi) -1/2*log(woodburydet(sigma2.mean,B.mean,Omega.mean)) - 
        0.5*(t(data.row[l2,] - B.mean%*%mu.mean)%*%woodburyinverse(sigma2.mean,B.mean,Omega.mean)%*%(data.row[l2,] - B.mean%*%mu.mean))
      int.like.tot <- int.like.tot + int.like.ind    
    }
  }
  if(G == 1 & k == 1){
    int.like.tot <- 0
    for(l2 in seq(1, n)){
      int.like.ind <- -r/2*log(2*pi) -1/2*log(woodburydet_k1(sigma2.mean,B.mean,Omega.mean)) - 
        0.5*(t(rbind(data.row[l2,] - B.mean%*%mu.mean))%*%woodburyinverse(sigma2.mean,B.mean,Omega.mean)%*%cbind(data.row[l2,] - B.mean%*%mu.mean))
      int.like.tot <- int.like.tot + int.like.ind
    }
  }
  
  getmode <- function(v){
    uniqv <- unique(v)
    uniqv[which.max(tabulate(match(v,uniqv)))]
  }
  
  # check mode of Z over n.iter
  sim.Z.mode <- matrix(NA,model.attributes$S,model.attributes$times)
  # run each time for BCFM and BDCFM:
  for(s in 1:model.attributes$S){
    for(t in 1:model.attributes$times){
      sim.Z.mode[s,t] <- getmode(Gibbs$Z[,s,t])
    }
  }
  result <- NA
  # Calculate proportion of each cluster
  for(tt in 1:model.attributes$times){
    cluster_counts <- table(sim.Z.mode[,tt])
    cluster_proportions <- cluster_counts / model.attributes$S
    if(sum(cluster_proportions >= cluster.size ) != G) {
      result <- Inf
    }
  }
  
  if(is.na(result)){result <- npars*log(n) - 2*int.like.tot}
  
  return(result)

}