## $Id: ppw.R,v 1.47 2008/12/12 14:52:17 jracine Exp jracine $

## Original code in Matlab by A. Patton, R translation and
## modifications by C. Parmeter and J. Racine.
##
## We are grateful to Andrew Patton and Dimitris Politis for their
## assistance and feedback. Kindly report features, deficiencies, and
## improvements to racinej@mcmaster.ca.
##
## The citation is A. Patton, D.N. Politis, and H. White (2008,
## forthcoming), "CORRECTION TO `Automatic Block-Length Selection for
## the Dependent Bootstrap' by D.N. Politis and H. White". This is
## based on the article by Politis, D.N., and H. White (2004),
## "Automatic block-length selection for the dependent bootstrap."
## Econometric Reviews, vol. 23.
##
## INPUTS:  data, an n x k matrix.
##
## OUTPUTS: b.star, a 2 x k vector of optimal bootstrap block lengths
## for the stationary bootstrap and circular bootstrap (BstarSB,
## BstarCB).

#' @aliases lam
#' @title Lag window
#' @description  The function \code{lam} is used to construct a "flat-top" lag window 
#' for spectral estimation based on Politis, D.N. and J.P. Romano (1995),
#' "Bias-Corrected Nonparametric Spectral Estimation", Journal of Time Series 
#' Analysis, vol. 16, No. 1.
#' @param s a time series. 
#' @details This function constructs a lag window used in spectral estimation.
#' More details about the lag window and its usage can be found in the referenced papers.
#' @return  A lag window for spectral estimation.
#' @author Original code in Matlab by A. Patton, R translation and modifications
#'  by C. Parmeter and J. Racine. We are grateful to Andrew Patton and Dimitris
#'   Politis for their assistance and feedback. Kindly report features, 
#'   deficiencies, and improvements to \email{racinej@mcmaster.ca}.
#' @references Patton, A., Politis, D.N. and  White, H. (2009).
#'  Correction to ``Automatic Block-Length Selection for the Dependent Bootstrap''
#'   by D. Politis and H. White, \emph{Econometric Reviews}, \bold{ 28}, 372-375.
#' @seealso \code{\link{blockboot}}, \code{\link{b.star}}
#' @keywords "Stationary time series" "Optimal block length" 
#' @export
#' @examples 
#' # Generate a sequence for testing
#' s <- seq(-1, 1, by = 0.1)
#' # Calculate the lag window using the lam function
#' lag_window <- lam(s)
#' # Plot the generated lag window
#' plot(lag_window, type="l")

lam <- function(s){
  return((abs(s)>=0)*(abs(s)<0.5)+2*(1-abs(s))*(abs(s)>=0.5)*(abs(s)<=1))
}

##  Note that an example for usage appears at the bottom of
## this file. If you use this function as input into a routine such as
## tsboot() in the boot library (Angelo Canty and Brian Ripley
## (2008). boot: Bootstrap R (S-Plus) Functions. R package version
## 1.2-34.) you ought to use the option round=TRUE.
#' @aliases b.star
#' @title Bootstrap Block Length Choice in the Stationary Case
#' @description  
#' This function calculates the optimal bootstrap block lengths for both the 
#' Stationary Bootstrap, Nonoverlapping Block Bootstrap, Circular Block Bootstrap
#' and Moving Block Bootstrap methods, based on the data provided.
#' @param data A time series or a matrix of time series data.
#' @param mmax An integer constant representing the maximum lag. By default 
#' determined by the data.
#' @param Bmax  An integer constant representing the maximum block length
#'   By default determined by the data.
#' @param round A boolean indicating whether the resulting block lengths should be rounded.
#' @details 
#' A \code{b.star} computes optimal block lengths for bootstrapping time series data,
#' utilizing autocorrelation and autocovariance measures. The function incorporates
#' several parameters, including  \code{mmax}, and \code{Bmax}, to refine the
#' block length calculations. The method involves a detailed analysis of the data's
#' autocorrelation structure to identify the most suitable block lengths for
#' bootstrapping procedures.
#' @return A 2 x k matrix, where each column represents a time series and each row
#' provides the optimal block length for the Moving Block Bootstrap or
#' Circular Block Bootstrap (first row) and the Nonoverlapping Block Bootstrap or
#' Stationary Bootstrap (second row).
#' @author 
#' Original code in Matlab by A. Patton.  
#' R translation and modifications by C. Parmeter and J. Racine, \email{racinej@@mcmaster.ca}.  
#' We are grateful to Andrew Patton and Dimitris Politis for their assistance and feedback,  
#' and for allowing us to include this function in this package.
#' @examples
#' # Simulate an ARIMA process
#' X <- arima.sim(n = 200, model = list(ar = c(0.5, 0.4), na = 0.5))
#' # Calculate the optimal bootstrap block lengths
#' optimal_choice <- b.star(X)
#' print(optimal_choice)
#' @references 
#' Politis, D.N., and White, H. (2004). Automatic block-length selection for the dependent bootstrap. 
#' \emph{Econometric Reviews}, \bold{23}, 53-70. 
#' 
#' Patton, A., Politis, D.N., and White, H. (2009). Correction to `Automatic
#' Block-Length Selection for the Dependent Bootstrap' by D.N. Politis and H. White."
#' \emph{Econometric Reviews},  \bold{28},  372-375.
#' @seealso \code{\link{blockboot}}, \code{\link{lam}}.
#' @keywords blocks
#' @export




b.star <- function(data,
                   mmax= NULL,
                   Bmax = NULL,
                   round = FALSE){
  
  ## Convert the data object to a data frame to handle both vectors
  ## and matrices.
  
  data <- data.frame(data)
  n <- nrow(data)
  k <- ncol(data)
  
  ## Set Defaults. Note that in footnote c, page 59, for Kn Politis
  ## and White (2004) use max(5,log10(n)). Since this must be an
  ## integer we use ceiling(log10(n)).
  Kn <- max(5,ceiling(log10(n)))
  if (is.null(mmax)) mmax <- ceiling(sqrt(n))+Kn
  if (is.null(Bmax)) Bmax <- ceiling(min(3*sqrt(n),n/3))
  c <- qnorm(0.975)
  
  ## Create two vectors of length k in which we store results.
  
  BstarSB <- numeric(length=k)
  BstarCB <- numeric(length=k)
  
  ## Now we loop through each variable in data (i.e., column,
  ## data[,i]).
  
  for(i in 1:k) {
    
    ## We first obtain the autocorrelations rho(1),...,rho(mmax) (we
    ## need to drop the first autocorrelation as it is rho(0), hence
    ## acf[-1]). This is the default in acf [type="correlation"]. Note
    ## that Patton uses sample correlations after dropping the first
    ## mmax observations, while we instead use the acf to obtain
    ## rho(k).
    
    rho.k <- acf(data[,i],
                 lag.max = mmax,
                 type = "correlation",
                 plot = FALSE)$acf[-1]
    
    ## Next we compute mhat. The use of c*sqrt(log10(n)/n) for
    ## critical values is given in footnote c of Politis and White
    ## (2004, page 59), and the approach for determining mhat is
    ## described in footnote c.
    
    rho.k.crit <- c*sqrt(log10(n)/n)
    
    ## Compute the number of insignificant runs following each rho(k),
    ## k=1,...,mmax.
    
    num.insignificant <- sapply(1:(mmax-Kn+1),
                                function(j){
                                  sum((abs(rho.k) < rho.k.crit)[j:(j+Kn-1)])
                                })
    
    ## If there are any values of rho(k) for which the Kn proceeding
    ## values of rho(k+j), j=1,...,Kn are all insignificant, take the
    ## smallest rho(k) such that this holds (see footnote c for
    ## further details).
    
    if(any(num.insignificant==Kn)) {
      mhat <- which(num.insignificant==Kn)[1]
    } else {
      
      ## If no runs of length Kn are insignificant, take the smallest
      ## value of rho(k) that is significant.
      
      if(any(abs(rho.k) > rho.k.crit)) {
        
        lag.sig <- which(abs(rho.k) > rho.k.crit)
        k.sig <- length(lag.sig)
        
        if(k.sig == 1) {
          
          ## When only one lag is significant, mhat is the sole
          ## significant rho(k).
          
          mhat <- lag.sig
          
        } else {
          
          ## If there are more than one significant lags but no runs
          ## of length Kn, take the largest value of rho(k) that is
          ## significant.
          
          mhat <- max(lag.sig)
          
        }
        
      } else {
        
        ## When there are no significant lags, mhat must be the
        ## smallest positive integer (footnote c), hence mhat is set
        ## to one.
        
        mhat <- 1
        
      }
      
    }
    
    ## Compute M (mhat is at least one).
    
    M <- ifelse(2*mhat > mmax, mmax, 2*mhat)
    
    ## We compute BstarSB and BstarCB using the formulas in the above
    ## references. Now we require the autocovariance R(k) (hence
    ## type="covariance" in the acf call). Note that Patton uses
    ## sample covariances after dropping the first mmax observations,
    ## while we instead use the acf with type="covariance" to obtain
    ## R(k). Note also that we require R(0) hence we do not drop it as
    ## we did for rho(k) via acf(...)$acf[-1].
    
    kk <- seq(-M,M)
    
    R.k <- ccf(data[,i], data[,i],
               lag.max = M,
               type = "covariance",
               plot = FALSE)$acf
    
    Ghat <- sum(lam(kk/M)*abs(kk)*R.k)
    DCBhat <- 4/3*sum(lam(kk/M)*R.k)^2
    DSBhat <- 2*sum(lam(kk/M)*R.k)^2
    BstarSB[i] <- ((2*Ghat^2)/DSBhat)^(1/3)*n^(1/3)
    BstarCB[i] <- ((2*(Ghat^2)/DCBhat)^(1/3))*(n^(1/3))
    
  }
  
  ## The user can choose whether they want rounded values returned or
  ## not. BstarCB is rounded up, BstarSB simply rounded but both must
  ## be positive integers.
  
  if(round == FALSE) {
    
    BstarSB <- ifelse(BstarSB > Bmax, Bmax, BstarSB)
    BstarCB <- ifelse(BstarCB > Bmax, Bmax, BstarCB)
    
  } else {
    
    BstarSB <- ifelse(BstarSB > Bmax, Bmax, ifelse(BstarSB < 1, 1, round(BstarSB)))
    ## old:    BstarCB <- ifelse(BstarCB > Bmax, Bmax, ifelse(BstarCB < 1, 1, ceiling(BstarCB)))
    BstarCB <- ifelse(BstarCB > Bmax, Bmax, ifelse(BstarCB < 1, 1, max(1,round(BstarCB))))
    
  }
  
  return(cbind(round(BstarSB), round(BstarCB)))
  
}

##

# ------------------------------------------------------------------------
# 
# "bopt_circy(x, period, PLT=FALSE)" --
# 
# Optimal choice of booxtrap block length for 
# perioperiodically correlated time series
# 
# ------------------------------------------------------------------------
#' @aliases bopt_circy
#' @title Optimal Bootstrap Block Length for Periodically Correlated Time Series.
#' @description Calculates the optimal block length for Generalized Seasonal 
#' Block Bootstrap (GSBB), Extension of Moving Block Bootstrap (EMBB), 
#' and their circular versions CGSBB and CEMBB for periodically 
#' correlated time series, in the problems of the overall mean 
#' and seasonal means estimation.
#' @param x A numeric vector representing a periodically correlated time series.
#' @param period An integer; period length of \code{x}.
#' @param PLT Logical. If \code{TRUE} the function plots the Mean Square Error (MSE) 
#'       of the bootstrap variance estimator for various block lengths. By default 
#'       it is equal to \code{FALSE}.      
#' @param parameter The possible bopt_circy parameters are: 
#'  * `"mean"`,
#'  * `"seasonal mean"`.
#' @param method A choice of the block bootstrap method: 
#'  * `"GSBB"` - Generalized Seasonal Block Bootstrap,
#'  * `"CGSBB"` - Circular version of GSBB,
#'  * `"EMBB"` - Extension of Moving Block Bootstrap,
#'  * `"CEMBB"` - Circular version of EMBB.
#' @param plot_range If \code{PLT=TRUE}, a parameter changing the range of the x-axis in the 
#' plot of MSE. By default \code{plot_range = 10}.
#' @details   
#' For each bootstrap method implemented here, 
#' the function \code{bopt_circy} computes the optimal block length
#' for periodically correlated (PC) time series. 
#' The optimal block length is obtained by minimization of the 
#' MSE of the bootstrap variance estimator (see Bertail and Dudek (2024)). 
#' For the `GSBB` and `CGSBB` the optimal block length has the form 
#' \eqn{k\cdot d \pm 1}, where \eqn{d} is a period length and \eqn{k} is a positive
#' integer. For the `EMBB` and `CEMBB` the optimal block length can be of any length.
#' @return 
#' Returns the optimal block length (integer) for the chosen block bootstrap method.  
#' @references Bertail, P. and Dudek, A. (2025). \emph{Bootstrap for 
#' Dependent Data, with an R package} (by Bernard Desgraupes and Karolina Marek) - submitted.
#' 
#' Bertail, P. and Dudek, A.E. (2024). Optimal choice of bootstrap block length 
#' for periodically correlated time series, \emph{ Bernoulli}, \bold{30}, 2521-2545.
#' 
#' @seealso \code{\link{blockboot.seasonal}}, \code{\link{embb.sample}}.
#' @keywords "Bootstrap" "Periodically correlated" "Optimal block length" "MSE"
#' @export
#' @examples 
#' # Generate a periodically correlated time series
#' n=200
#' b <- arima.sim(n = n, model = list(ar = c(0.5, 0.4), na = 0.5))
#' period <- 12 
#' x <- 5*cos(2 * pi /period * (1:n))+5*b * cos(2 * pi /period * (1:n))
#' # Calculate the optimal block length for GSBB
#' optimal_choice <- bopt_circy(x, period, parameter= "mean", method= "GSBB")
#' print(optimal_choice)
##

bopt_circy <- function(x,period,PLT=FALSE, parameter = c("mean",
                                                         "seasonal mean"),method = c("EMBB", "CEMBB","GSBB", "CGSBB"),
                       plot_range=NULL) {
  if (is.null(plot_range)){
    plot_range=2*period
  }
  
  
  if (parameter[1] == "mean") {
    res <- bopt_circy_mean(x,period,PLT,method, plot_range)
  } else if (parameter[1] == "seasonal mean") {
    res <- bopt_circy_seasonal_mean(x,period,PLT, method, plot_range)
  }
  return(res)
}



bopt_circy_mean <- function(x,period,PLT=FALSE, method,plot_range) {
  
  n <- length(x) #length of the time series
  b <- floor(n^(1/3)) # prior size of the blocks
  # bm <- floor(n^(2/3))   # maximum block size
  
  
  # Automatic choice of the block length for the overall mean.
  
  ll <- floor(n^(1/4))+2*period # it was floor(n^(1/3))+2*period
  lld <- floor(n^(1/2))+2*period # it was floor(n^(1/2))+2*period
  nld <- lld-ll+1 # number of estimation of the "truncated sums"
  
  #initialize G1
  G1 <- 0
  
  # Computation of G_d in Bertail and Dudek's Bernoulli paper
  # the value G is computed for different trucation levels nld
  # then averaged for different levels between nld and lld
  
  sumstab <- matrix(0,nld,4)
  
  for (KK in ll:lld){
    # computation of the seasonal ACF
    SACF1 <- seasonalACF(x,1:KK,period)
    #positive part of the sum
    scal1 <- sum(t(SACF1)%*%as.vector(1:KK/period))
    
    # negative part of the sum
    
    SACF2 <- seasonalACF(x,KK-1:KK,period)
    scal2 <- sum(t(SACF2)%*%as.vector(1:KK/period))
    
    G1 <- G1+scal1+scal2
    sumstab[KK-ll+1,] <- c(KK,scal1,scal2,G1/(KK-ll+1))
    
  }
  
  aa <- length(sumstab[,4])
  bb <- floor(aa/2)
  G <- mean(sumstab[aa:bb,4]) #average over the last values to stabilize the sum.
  
  #  Computation of the term D in Bertail and Dudek's Bernoulli paper
  res1 <- vector(length=period)*0
  len_res=length(ll:lld)
  table_res=matrix(0,ncol=len_res, nrow=period)
  for (KK in ll:lld){
    ii=1
    for (i in 0:(KK-1)) {
      res1 <- res1 + acfCoeff(x,i,period)/pi }
    table_res[,ii]=res1
    ii=ii+1
  }
  res=apply(table_res,1, mean)
  # computation of D
  D <- sum(abs(res)^2)*4*((pi*period)^2)/3
  
  if ((method == "EMBB") | (method == "CEMBB")){ # nopt for EMBB and CEMBB
    nopt <- ceiling((2*G^2/D)^(1/3)*n^(1/3))
    
  }
  else { # nopt for GSBB and CGSBB
    nopt <- ceiling((2*G^2/D)^(1/3)*n^(1/3))
    if (nopt%%period<period/2) {
      nopt <- (nopt%/%period)*period+1
    }
    else {
      nopt <- (nopt%/%period+1)*period-1
    }
  }
  # MSE function for EMBB
  MSE <- function(b,t=n,D1=D,G1=G){
    MSeval=b/t*D1+G1^2/b^2
    MSeval
  }
  
  if (PLT) {
    if ((method == "EMBB") | (method == "CEMBB")){
      y_min=MSE(nopt)
      x_min <- nopt
      plot(2:plot_range,MSE(2:plot_range))
      points(x_min, y_min, pch = 16, col = "red")
      text(x_min, y_min, labels = paste("Min MSE:", y_min), pos = 3, col= "blue")
    }
    else{
      kd1 <- seq(period+1,plot_range,by=period)
      kdd1 <- seq(period-1,plot_range,by=period)
      domain=sort(c(kd1, kdd1)) # the subset of the points kd+1 and kd+d-1
      y_min=MSE(nopt)
      x_min <- nopt
      plot(domain,MSE(domain))
      points(x_min, y_min, pch = 16, col = "red")
      text(x_min, y_min, labels = paste("Min MSE:", y_min), pos = 3, col= "blue")
    }
  }
  return(nopt)
}

bopt_circy_seasonal_mean <- function(x,period,PLT=FALSE, method,
                                     plot_range) {
  
  n <- length(x) #length of the time series
  b <- floor(n^(1/3)) # prior size of the blocks
  # bm <- floor(n^(2/3))   # maximum block size
  
  # Automatic choice of the block length for the overall mean.
  
  #check ranges
  ll <- floor(n^(1/3))+2*period # it was floor(n^(1/4))+2*period
  lld <- floor(n^(1/2))+2*period # it was floor(n^(2/3))+2*period
  nld <- lld-ll+1 # number of estimation of the "truncated sums"
  
  #initialize G1
  G1 <- 0
  
  # Computation of G_d in Bertail and Dudek's Bernoulli paper
  # the value G is computed for different trucation levels nld
  # then averaged for different levels between nld and lld
  
  sumstab <- matrix(0,nld,4)
  G1 <- matrix(0,period, 1)
  for (KK in ll:lld){
    # computation of the seasonal ACF
    SACF1 <- seasonalACF(x,(1:KK),period)
    #positive part of the sum
    scal1 <- t(SACF1)%*%as.vector(1:KK/period)
    
    # negative part of the sum
    
    SACF2 <- seasonalACF(x,(KK-1:KK),period)
    scal2 <- t(SACF2)%*%as.vector(floor(1:KK/period))
    G2 <- as.matrix(scal1+scal2)
    G1 <- cbind(G1,G2)
  }
  aa <- dim(G1)[2]
  bb <- floor(aa/2)
  G1 <- G1[,bb:aa]
  G <- apply(G1, 1, mean)
  
  GsSQ <- sum(G^2)
  #  Computation of the term D in Bertail and Dudek's Bernoulli paper
  sumstabD <- matrix(0,nld,4)
  D1 <- matrix(0,period, 1) # why here we do range while before for the mean (also in the paper) we had ceiling(4*n^{1/4})??
  for (KK in ll:lld){
    # computation of the seasonal ACF
    SACF1D <- seasonalACF(x,(1:KK),period)
    #positive part of the sum
    scal1D <- t(SACF1D)%*%as.vector(1:KK/period)
    
    # negative part of the sum
    
    SACF2D <- seasonalACF(x,(KK-1:KK),period)
    scal2D <- t(SACF2D)%*%as.vector(1:KK/period)
    D2 <- as.matrix(scal1D+scal2D)
    D1 <- cbind(D1,D2)
  }
  aa <- dim(D1)[2]
  bb <- floor(aa/2)
  D1 <- D1[,bb:aa]
  D1s <- apply(D1, 1, mean)*period
  DsSQ <- 4/3*sum(D1s^2)
  
  if ((method == "EMBB") | (method == "CEMBB")){ # nopt for EMBB and CEMBB
    nopt <- ceiling((2*GsSQ/DsSQ)^(1/3)*n^(1/3))
  }
  else { # nopt for GSBB and CGSBB
    nopt <- ceiling((2*GsSQ/DsSQ)^(1/3)*n^(1/3))
    if (nopt%%period<period/2) {
      nopt <- (nopt%/%period)*period+1
    }
    else {
      nopt <- (nopt%/%period+1)*period-1
    }
  }
  MSE <- function(b,t=n,d1=period,D1=DsSQ,G1=GsSQ){
    MSeval=b/t*D1+G1/b^2
    MSeval
  }
  if (PLT) {
    if ((method == "EMBB") | (method == "CEMBB")){
      y_min=MSE(nopt)
      x_min <- nopt
      plot(2:plot_range,MSE(2:plot_range))
      points(x_min, y_min, pch = 16, col = "red")
      text(x_min, y_min, labels = paste("Min MSE:", y_min), pos = 3, col= "blue")
    }
    else{
      kd1 <- seq(period+1,plot_range,by=period)
      kdd1 <- seq(period-1,plot_range,by=period)
      domain=sort(c(kd1, kdd1)) # the subset of the points kd+1 and kd+d-1
      y_min=MSE(nopt)
      x_min <- nopt
      plot(domain,MSE(domain))
      points(x_min, y_min, pch = 16, col = "red")
      text(x_min, y_min, labels = paste("Min MSE:", y_min), pos = 3, col= "blue")
    }
  }
  return(nopt)
}
