#' @details This function performs a z-test on ranked set sample data for both one-sample and two-sample mean comparison problems, using normal approximation. For a one-sample test, only `data1` is needed, provided as a data frame with columns `rank` and `y`. For a two-sample test, both `data1` and `data2` must be supplied, each as data frames with `rank` and `y` columns. The function computes the test statistic, confidence interval, and p-value based on the provided RSS data and specified parameters.
#' @title RSS z-test for one-sample and two-sample problems
#' @name rss.z.test
#' @description The rss.z.test function performs one- and two-sample z-tests on ranked set sample data using normal approximation, with options for specifying the confidence level, alternative hypothesis, and hypothesized mean or mean difference.
#'
#' @param data1 A numeric data frame of ranked set samples with columns `rank` for ranks and `y` for data values.
#' @param data2 An optional numeric data frame of ranked set samples with columns `rank` for ranks and `y` for data values (for two-sample problem).
#' @param alpha A numeric value specifying the confidence level for the interval.
#' @param alternative A character string specifying the alternative hypothesis. Must be one of "two.sided" (default), "greater", or "less".
#' @param mu0 A numeric value indicating the hypothesized value of the mean (for a one-sample problem) or the difference in means (for a two-sample problem).
#'
#' @return
#'   \item{RSS_mean}{The RSS mean estimate for a one-sample problem or a vector of RSS mean estimates for each group in a two-sample problem.}
#'   \item{CI}{The confidence interval for the population mean for a one-sample problem or for the mean difference in a two-sample problem.}
#'   \item{z}{The z-statistic for the test.}
#'   \item{p.value}{The p-value for the test.}
#'
#' @references
#'
#' Chen, Z., Bai Z., Sinha B. K. (2003). Ranked Set Sampling: Theory and Application. New York: Springer.
#'
#' S. Ahn, J. Lim, and X. Wang. (2014) The student’s t approximation to distributions of pivotal statistics from ranked set samples. Journal of the Korean Statistical Society, 43, 643–652.
#'
#' S. Ahn, X. Wang, C. Moon, and J. Lim. (2024) New scheme of empirical likelihood method for ranked set sampling: Applications to two one sample problems. International Statistical Review.
#'
#' @seealso
#' \code{\link{rss.simulation}}: used for simulating Ranked Set Samples (RSS), which can serve as input.
#'
#' \code{\link{rss.sampling}}: used for sampling Ranked Set Samples (RSS) from a population data set, providing input data.
#'
#' @importFrom methods is
#' @examples
#' ## Balanced RSS with a set size 3 and equal sample sizes of 6 for each stratum,
#' ## using imperfect ranking from a normal distribution with a mean of 0.
#' rss.data1=rss.simulation(H=3,nsamp=c(6,6,6),dist="normal", rho=0.8,delta=0)
#'
#' ## one-sample z-test
#' rss.z.test(data1=rss.data1, data2=NULL, alpha=0.05,
#' alternative="two.sided", mu0=0)
#'
#' ## Unbalanced RSS with a set size 3 and different sample sizes of 6, 10, and 8 for each stratum,
#' ## using imperfect ranking from a normal distribution with a mean of 0.
#' rss.data2<-rss.simulation(H=3,nsamp=c(6,8,10),dist="normal", rho=0.8,delta=0)
#'
#' ## two-sample z-test
#' rss.z.test(data1=rss.data1, data2=rss.data2, alpha=0.05,
#' alternative="two.sided", mu0=0)
#'
#' @export
rss.z.test <- function(data1, data2=NULL, alpha=0.05, alternative="two.sided", mu0=0)
{
  alternative.set=c("two.sided", "less", "greater")
  if(!alternative %in% alternative.set) stop("Invalid alternative selected. Please choose from 'two.sided', 'less', or 'greater'.")

  if( (alpha > 0) & (alpha < 1)){

    if(is(data2)[1] == "NULL"){
      if(!all(c("rank", "y") %in% colnames(data1))) {
        stop("The input data must contain 'rank' and 'y' variables.")
      }
      data = data1
      H = length(unique(data$rank))
      nsamp = table(data$rank)

      rss.mu=mean(tapply(data$y,data$rank,mean))
      varh = tapply(data$y,data$rank,stats::var)
      rss.sd = sqrt(sum(varh/nsamp)/H^2)
      zstat = (rss.mu - mu0)/rss.sd
      CI.up = rss.mu + stats::qnorm(1-alpha/2)*rss.sd
      CI.low = rss.mu - stats::qnorm(1-alpha/2)*rss.sd

      if(alternative == "two.sided"){
        pval = 2*(1-stats::pnorm(abs(zstat)))
      }else if(alternative == "less"){
        pval = stats::pnorm(zstat)
      }else if(alternative == "greater"){
        pval = 1-stats::pnorm(zstat)
      }
      result = list(RSS_mean = rss.mu, CI = c(CI.low, CI.up), z = zstat, p.value = pval)
      return(result)
    }

    if(is(data2)[1] != "NULL"){
      if (!all(c("rank", "y") %in% colnames(data1))) {
        stop("The first input data must contain 'rank' and 'y' variables.")
      }
      if (!all(c("rank", "y") %in% colnames(data2))) {
        stop("The second input data must contain 'rank' and 'y' variables.")
      }
      H1 = length(unique(data1$rank))
      nsamp1 = table(data1$rank)
      H2 = length(unique(data2$rank))
      nsamp2 = table(data2$rank)

      rss.mu1 = mean(tapply(data1$y,data1$rank,mean))
      rss.mu2 = mean(tapply(data2$y,data2$rank,mean))
      rss.diff = rss.mu1-rss.mu2
      varh1 = tapply(data1$y,data1$rank,stats::var)
      varh2 = tapply(data2$y,data2$rank,stats::var)

      rss.var1 = sum(varh1/nsamp1)/H1^2
      rss.var2 = sum(varh2/nsamp2)/H2^2
      rss.sd = sqrt(rss.var1+rss.var2)
      zstat = (rss.diff - mu0)/rss.sd
      CI.up = rss.diff + stats::qnorm(1-alpha/2)*rss.sd
      CI.low = rss.diff - stats::qnorm(1-alpha/2)*rss.sd

      if(alternative == "two.sided"){
        pval = 2*(1-stats::pnorm(abs(zstat)))
      }else if(alternative == "less"){
        pval = stats::pnorm(zstat)
      }else if(alternative == "greater"){
        pval = 1-stats::pnorm(zstat)
      }
      result <- list(RSS_mean = c(rss.mu1,rss.mu2), CI_diff = c(CI.low, CI.up), z = zstat, p.value = pval)
      return(result)
    }

  }else stop("alpha is out of bound.", call. = F)
}

