.packageName <- "GeneticsDesign"
# Genetics power calculator for linear trend association studies
#
# Purcell's Power estimation method
# (http://pngu.mgh.harvard.edu/~purcell/gpc/cc2.html)

# Linear Tread Test
# http://linkage.rockefeller.edu/pawe3d/help/Linear-trend-test-ncp.html


# Given
# pA -- High risk allele frequency (A)
# pD -- disease prevalence
# RRAa -- Genotype relative risk Aa = RR(Aa|aa)=Pr(D|Aa)/Pr(D|aa)
# RRAA -- Genotype relative risk AA = RR(AA|aa)=Pr(D|AA)/Pr(D|aa)
# Dprime -- LD measure
# pB -- Marker allele frequency (B)
# nCase -- Number of cases
# ratio -- Control: case ratio = nControl/nCase
# alpha -- User-defined type I error rate
GPC.default<-function(pA, pD, RRAa, RRAA, Dprime, pB, 
                   nCase=500, ratio=1, alpha=0.05, quiet=FALSE)
{
  if(!(pA>0 && pA<1) || !(pB>0 && pB<1) || !(Dprime>=0 && Dprime<=1)
     || !(pD>0 && pD<1) || !(RRAa>1) || !(RRAA>1) ||
     !(nCase>1) || !(ratio>0) || !(alpha>0 && alpha<0.5))
  { cat("Some arguments are out of bounds!\n") 
    cat("0<pA<1\n") 
    cat("0<pD<1\n") 
    cat("1<RRAa\n") 
    cat("1<RRAA\n") 
    cat("0<=Dprime<=1\n") 
    cat("0<pB<1\n") 
    cat("1<nCase\n") 
    cat("0<ratio\n") 
    cat("0.05<alpha<0.5\n") 
    stop("Program ends due to input errors!\n")
  }
  # get penetrances Pr(D|aa), Pr(D|Aa), Pr(D|AA)
  pa<-1-pA
  pb<-1-pB
  denom<-RRAA*pA^2+RRAa*2*pA*pa+pa^2
  PrDgaa<-pD/denom

  PrDgAa<-RRAa*PrDgaa
  PrDgAA<-RRAA*PrDgaa

  pen<-c(PrDgaa, PrDgAa, PrDgAA)

  # estimate haplotype frequencies Pr(AB), Pr(Ab), Pr(aB), and Pr(ab)
  # based on pA, pB, and Dprime. We assume D>0
  myHapFreqs<-hapFreq(Dprime, pA, pB)

  # estimate the sampling probabilities Pr(BB|D), Pr(Bb|D), Pr(bb|D)
  tmp<-samplingProb(myHapFreqs, pen, pD, pA, pB)
  PrBBgD<-tmp[1]
  PrBbgD<-tmp[2]
  PrbbgD<-tmp[3]
  PrBBgDbar<-tmp[4]
  PrBbgDbar<-tmp[5]
  PrbbgDbar<-tmp[6]

  PrDgBB<-tmp[7]
  PrDgBb<-tmp[8]
  PrDgbb<-tmp[9]

  PrBgD<-tmp[10]
  PrbgD<-tmp[11]
  PrBgDbar<-tmp[12]
  PrbgDbar<-tmp[13]

  n0c<-(nCase*PrbbgD)
  n1c<-(nCase*PrBbgD)
  n2c<-nCase-n0c-n1c

  nControl<-(nCase*ratio)

  n0n<-(nControl*PrbbgDbar)
  n1n<-(nControl*PrBbgDbar)
  n2n<-nControl-n0n-n1n

  nc.vec<-c(n2c, n1c, n0c)
  nn.vec<-c(n2n, n1n, n0n)

  mat<-data.frame(case=nc.vec, control=nn.vec, code=c(2,1,0))

  x.vec<-c(2, 1, 0)

  R<-sum(nc.vec)
  S<-sum(nn.vec)

  n0<-n0c+n0n
  n1<-n1c+n1n
  n2<-n2c+n2n
  n.vec<-c(n2, n1, n0)

  N<-R+S

  p1.vec<-c(PrBBgD, PrBbgD, PrbbgD)
  p0.vec<-c(PrBBgDbar, PrBbgDbar, PrbbgDbar)

  numer<-sum(x.vec*(p1.vec-p0.vec))
  numer<-numer^2
  part1<-sum(x.vec^2*(R*p0.vec+S*p1.vec))
  part2<-sum(x.vec*(R*p0.vec+S*p1.vec))
  denom<-part1-part2^2/N

  # non-centrality parameter for the linear trend test
  ncp<-R*S*numer/denom

  # format outputs
  # Case-control parameters
  mat.para<-matrix(0, nrow=7, ncol=1)
  rownames(mat.para)<-c("Number of cases", "Number of controls", 
    "High risk allele frequency (A)", "Prevalence",
    "Genotypic relative risk Aa", "Genotypic relative risk AA",
    "Genotypic risk for aa (baseline)")
  mat.para[1,1]<-nCase
  mat.para[2,1]<-nControl
  mat.para[3,1]<-pA
  mat.para[4,1]<-pD
  mat.para[5,1]<-RRAa
  mat.para[6,1]<-RRAA
  mat.para[7,1]<-PrDgaa

  # Marker locus B
  mat.B<-matrix(0,nrow=7, ncol=1)
  rownames(mat.B)<-c("High risk allele frequency (B)",
    "Linkage disequilibrium (D')", "Penetrance at marker genotype bb",
    "Penetrance at marker genotype Bb", "Penetrance at marker genotype BB",
    "Genotypic odds ratio Bb", "Genotypic odds ratio BB") 
  mat.B[1,1]<-pB
  mat.B[2,1]<-Dprime
  mat.B[3,1]<-PrDgbb
  mat.B[4,1]<-PrDgBb
  mat.B[5,1]<-PrDgBB
  # OR(Bb|bb)
  mat.B[6,1]<-PrBbgD*PrbbgDbar/(PrBbgDbar*PrbbgD)
  # OR(BB|bb)
  mat.B[7,1]<-PrBBgD*PrbbgDbar/(PrBBgDbar*PrbbgD)

  # Expected allele frequencies Pr(B|D), Pr(b|D), Pr(B|\bar{D}), Pr(b|\bar{D})
  mat.aFreq<-matrix(0,nrow=2,ncol=2)
  rownames(mat.aFreq)<-c("B","b")
  colnames(mat.aFreq)<-c("Case","Control")
  mat.aFreq[1,1]<-PrBgD
  mat.aFreq[1,2]<-PrBgDbar
  mat.aFreq[2,1]<-PrbgD
  mat.aFreq[2,2]<-PrbgDbar

  # Expected genotype frequencies
  # Pr(BB|D), Pr(Bb|D), Pr(bb|D)
  # Pr(BB|\bar{D}), Pr(Bb|\bar{D}), Pr(bb|\bar{D})
  mat.gFreq<-matrix(0, nrow=3, ncol=2)
  rownames(mat.gFreq)<-c("BB","Bb", "bb")
  colnames(mat.gFreq)<-c("Case", "Control")
  mat.gFreq[1,1]<-PrBBgD
  mat.gFreq[1,2]<-PrBBgDbar
  mat.gFreq[2,1]<-PrBbgD
  mat.gFreq[2,2]<-PrBbgDbar
  mat.gFreq[3,1]<-PrbbgD
  mat.gFreq[3,2]<-PrbbgDbar

  alpha.vec<-c(0.1, 0.05, 0.01, 0.001, alpha)
  power.vec<-rep(0,5)
  for(i in 1:5)
  { a<-alpha.vec[i]
    const<-qchisq(1-a, df=1)
    power.vec[i]<-1-pchisq(const, df=1, ncp=ncp)
  }

  # Case-Control statistics
  mat.stat<-cbind(alpha.vec, power.vec)
  colnames(mat.stat)<-c("Alpha", "Power")
  rownames(mat.stat)<-rep("",5)
 
  res<-list(power=power.vec[5], ncp=ncp,
            mat.para=mat.para, mat.B=mat.B, mat.aFreq=mat.aFreq,
            mat.gFreq=mat.gFreq, mat.stat=mat.stat)

  if(quiet==FALSE)
  {
    cat("\n Case-control parameters>>\n");
    print(mat.para)
    cat("\n Marker locus B>>\n");
    print(mat.B)
    cat("\n Expected allele frequencies>>\n");
    print(mat.aFreq)
    cat("\n Expected genotype frequencies>>\n");
    print(mat.gFreq)
    cat("\n Case-control statistics>>\n");
    print(mat.stat)
    cat("\n power (alpha=",alpha, ")=", power.vec[5], " ncp=", ncp, "\n")
  }

  invisible(res)
}

GPC<-function(pA, pD, RRAa, RRAA, r2, pB, 
                   nCase=500, ratio=1, alpha=0.05, quiet=FALSE)
{
  if(!(pA>0 && pA<1) || !(pB>0 && pB<1) || !(r2>=0 && r2<=1)
     || !(pD>0 && pD<1) || !(RRAa>1) || !(RRAA>1) ||
     !(nCase>1) || !(ratio>0) || !(alpha>0 && alpha<0.5))
  { cat("Some arguments are out of bounds!\n") 
    cat("0<pA<1\n") 
    cat("0<pD<1\n") 
    cat("1<RRAa\n") 
    cat("1<RRAA\n") 
    cat("0<=r2<=1\n") 
    cat("0<pB<1\n") 
    cat("1<nCase\n") 
    cat("0<ratio\n") 
    cat("0.05<alpha<0.5\n") 
    stop("Program ends due to input errors!\n")
  }

  # estimate Dprime based on r2, pA, pB
  Dprime<-Dprime.fun2(r2, pA, pB)

  res<-GPC.default(pA, pD, RRAa, RRAA, Dprime, pB, 
                   nCase, ratio, alpha, quiet)

  invisible(res)
}

# estimate haplotype frequencies Pr(AB), Pr(Ab), Pr(aB), and Pr(ab) based
# on pA, pB, and Dprime. We assume D>0
hapFreq<-function(Dprime, pA, pB)
{
  pa<-1-pA
  pb<-1-pB
  dmax<-min(c(pA*pb, pa*pB))
  D<-Dprime * dmax

  PrAB<-pA*pB+D
  PraB<-pa*pB-D
  PrAb<-pA*pb-D
  Prab<-pa*pb+D

  return(c(PrAB, PraB, PrAb, Prab))
}


# estimate the sampling probabilities Pr(BB|D), Pr(Bb|D), Pr(bb|D)
samplingProb<-function(myHapFreqs, pen, pD, pA, pB)
{
  pa<-1-pA
  pb<-1-pB
  # penetrances Pr(D|aa), Pr(D|Aa), Pr(D|AA)
  PrDgaa<-pen[1]
  PrDgAa<-pen[2]
  PrDgAA<-pen[3]

  # haplotype frequencies Pr(AB), Pr(aB), Pr(Ab), Pr(ab)
  PrAB<-myHapFreqs[1]
  PraB<-myHapFreqs[2]
  PrAb<-myHapFreqs[3]
  Prab<-myHapFreqs[4]
  
  # sampling probabilities for cases
  # Pr(BB|D), Pr(Bb|D), Pr(bb|D)
  numer<-PrDgAA*PrAB^2+PrDgAa*2*PrAB*PraB+PrDgaa*PraB^2
  PrBBgD<-numer/pD

  numer<-PrDgAA*2*PrAB*PrAb+PrDgAa*2*(PrAB*Prab+PrAb*PraB)+PrDgaa*2*PraB*Prab
  PrBbgD<-numer/pD

  numer<-PrDgAA*PrAb^2+PrDgAa*2*PrAb*Prab+PrDgaa*Prab^2
  PrbbgD<-numer/pD

  PrDgBB<-PrBBgD*pD/(pB^2)
  PrDgBb<-PrBbgD*pD/(2*pB*pb)
  PrDgbb<-PrbbgD*pD/(pb^2)

  # sampling probabilities for controls
  # Pr(BB|\bar{D}), Pr(Bb|\bar{D}), Pr(bb|\bar{D})
  PrBBgDbar<-(1-PrDgBB)*pB^2/(1-pD)
  PrBbgDbar<-(1-PrDgBb)*2*pB*pb/(1-pD)
  PrbbgDbar<-(1-PrDgbb)*pb^2/(1-pD)

  # Expected allele frequencies Pr(B|D), Pr(b|D), Pr(B|Dbar), Pr(b|Dbar)
  PrBgD<-PrBBgD+PrBbgD/2
  PrbgD<-PrbbgD+PrBbgD/2
  PrBgDbar<-PrBBgDbar+PrBbgDbar/2
  PrbgDbar<-PrbbgDbar+PrBbgDbar/2

  return(c(PrBBgD, PrBbgD, PrbbgD, PrBBgDbar, PrBbgDbar, PrbbgDbar,
           PrDgBB, PrDgBb, PrDgbb, PrBgD, PrbgD, PrBgDbar, PrbgDbar))
}


# r2 -- LD measure r^2 between SNP 1 and SNP 2
# pA -- frequency of minor allele (A) for SNP 1
# pB -- frequency of minor allele (B) for SNP 2
#
# D = pA.pB - pAB
# 
# dmax = min(pA.(1-pB),(1-pA).pB)
# 
# dmin = max(-pA.pB, -(1-pA).(1-pB))
# 
# if D < 0, D' = D/dmin else D' = D/dmax
# 
# r2 = D.D/(pA.pB.(1-pA).(1-pB) 
#
# D' = |r|*sqrt(pA*pB*(1-pA)*(1-pB)) / dmax if D > 0
# D' = -|r|*sqrt(pA*pB*(1-pA)*(1-pB)) / dmin if D < 0
# 
#
# suppose that D < 0
Dprime.fun1<-function(r2, pA, pB)
{ tmpr2<-r2.upp1(pA, pB)
  if(r2>tmpr2)
  { msg<-paste("r2 = ", r2, " > upper bound of r2 = ", tmpr2, ". r2 is changed to floor(tmpr2*100)/100!\n");
    warning(msg);
    r2<-floor(tmpr2*100)/100
  }
  numer<- - sqrt(r2*pA*(1-pA)*pB*(1-pB));
  dmin<- max(-pA*pB, -(1-pA)*(1-pB));
  res<-numer/dmin;
  return(res)
}

# suppose that D > 0
Dprime.fun2<-function(r2, pA, pB)
{ 
  tmpr2<-r2.upp2(pA, pB)
  if(r2>tmpr2)
  { msg<-paste("r2 = ", r2, " > upper bound of r2 = ", tmpr2, ". r2 is changed to floor(tmpr2*100)/100!\n");
    warning(msg);
    r2<-floor(tmpr2*100)/100
  }
  numer<- sqrt(r2*pA*(1-pA)*pB*(1-pB));
  dmax<- min(pA*(1-pB), pB*(1-pA));
  res<-numer/dmax;
  return(res)
}

# upper bound of r2 given pA, pB for D<0
r2.upp1<-function(pA, pB)
{
  numer<-pA*pB*(1-pA)*(1-pB)
  dmin<- max(-pA*pB, -(1-pA)*(1-pB));
  res<-numer/(dmin^2)
  return(1/res)
}


# upper bound of r2 given pA, pB for D>0
r2.upp2<-function(pA, pB)
{
  numer<-pA*pB*(1-pA)*(1-pB)
  dmax<- min(pA*(1-pB), pB*(1-pA));
  res<-numer/(dmax^2)
  res<-dmax^2/numer
  return(res)
}

# $Id: gregorius.R 114 2003-05-22 17:25:23Z warnesgr $
#
# Code contributed by David Duffy <davidD@qumr.edu.au>.
#
# Gregorius, H.-R. 1980. The probability of losing an allele when
# diploid genotypes are sampled.  Biometrics 36, 643-652.
#
# Formula from "Corollary 2" and "Corollary 3" of that paper
#
# N is the number of genotypes sampled,
# freq=frequency of least common allele to be detected by the study,
# missprob=the probability of missing at least one allele
#
# tol=smallest term in series to be accumulated
#
gregorius <- function(freq, N, missprob, tol=1.0e-10, maxN=1e4, maxiter=100,
                      showiter=FALSE)
{
   

  find.alpha <- function(N, freq, tol) #, showiter=FALSE)
    {
      n<- floor(1/freq)
      i<-1
      sgn<- -1
      term<-1.0
      res<-0.0
      while(abs(term)>tol && i<n) {
        sgn<- (-1) ^ (i+1)
        term<- exp( lchoose(n-1,i) +
                   log(exp(N*log(1-i*freq))+
                       exp(i+N*log(freq)+(N-1)*log(n-i))))
        res<-res+sgn*term
        i<-i+1

#        if(showiter)
#          {
#            cat("i=",i,"\n")
#            cat("sgn=",sgn,"\n")
#            cat("term=",term,"\n")
#            cat("res=",res,"\n")
#          }
      }

      max(min(res,1),0)
    }


  retval <- list()
  retval$call <- match.call()
    
  
  if(!missing(N) && missing(missprob) )
    {
      retval$method <- "Compute missprob given N and freq"
      retval$freq <- freq
      retval$N <- N
      retval$missprob <- find.alpha(N=N,freq=freq,tol=tol) 
    }
  else if(missing(N) && !missing(missprob) )
    {
      retval$method <- "Determine minimal N given missprob and freq"
      retval$freq <- freq
      val <- binsearch( function(N) find.alpha(N=N, freq=freq, tol=tol),
                       range=c(1, maxN), target=missprob, showiter=showiter,
                       maxiter=maxiter )
      if(length(val$where)==2)
        {
          retval$N <- val$where[2]
          retval$missprob <- val$value[2]
        }
      else
        {
          retval$N <- val$where[1]
          retval$missprob <- val$value[1] 
       }
    }
  else
    stop("Exactly two of N, freq, and missprob must be specified")

  return(retval)
}

# power calculation for case-control design (default: case:control = 1:1)
# Author: Michael Man
#   Date: May 5, 2004
#      N: total number of subjects
#  gamma: relative risk in multiplicative model;
#         not used in Dominant or Recessive model (assume A as protective allele)
#      p: frequency of A allele
#     kp: prevalence of disease
#  alpha: significance level
#     fc: fraction of cases
#     pi: probability of 'aa' genotype has the disease
#   minh: mode of inheritance
# reference: Long, A. D. and C. H. Langley (1997). Genetic analysis of complex traits. Science 275: 1328.
#            Agresti, A. (2002) Categorical Data Analysis. Second Edition, p243.
# ( modified from pbsize{gap} )
# requirement: It is recommended to use R 1.9.0 or above.  The function 'qchisq' in earlier version
#              has problem with large noncentrality parameter.


# under HWE                       AA       Aa       aa       
#   fHW = p(genotype)        = c( p^2,     2pq,     q^2 )     

# model specification 
#   f.mod = relative risk    = c(gamma^2,  gamma,    1 )    # multiplicative model
#   f.mod =                  = c(  0,        0,      1 )    #       dominant model
#   f.mod =                  = c(  0,        1,      1 )    #      recessive model

# conditional prob.
#   p(D|genotype) = f.mod*pi = c(gamma^2,  gamma,    1 )*pi

# population joint prob. (f.mod = 1 under Ho)
#   Case     p(D,     genotype) = p(genotype)*     p(D|genotype)  = fHW*   f.mod*pi
#   Control  p(D_not, genotype) = p(genotype)*(1 - p(D|genotype)) = fHW*(1-f.mod*pi)

# population conditional prob. (f.mod = 1 under Ho)
#   Case     p(genotype|D)     = p(D    , genotype)/P(D    ) = P(D    , genotype)/sum(P(D    , genotype)) = fHW*   f.mod*pi  /    sum(fHW*f.mod*pi)
#   Control  p(genotype|D_not) = p(D_not, genotype)/P(D_not) = P(D_not, genotype)/sum(P(D_not, genotype)) = fHW*(1-f.mod*pi) / (1-sum(fHW*f.mod*pi))

# sample or allocation probability
#   1:1 case-control design  p(D|Sample) = fc = 1/2
#   1:2 case-control design                fc = 1/3
#   a prospective design                   fc = sum(fHW*f.mod*pi)

# sample joint prob. (f.mod = 1 under Ho)
# for prospective design, this is the same as population joint prob. since 'fc' cancels out with 'sum(fHW*f.mod*pi)' 
#   Case     p(genotype,D    |sample) = p(genotype|D    )*     p(D|Sample)  =    fc *fHW*   f.mod*pi  /    sum(fHW*f.mod*pi)
#   Control  p(genotype,D_not|sample) = p(genotype|D_not)*(1 - p(D|Sample)) = (1-fc)*fHW*(1-f.mod*pi) / (1-sum(fHW*f.mod*pi))


power.casectrl <- function (N, gamma = 4.5, p = 0.15, kp=.1, alpha=.05, fc=0.5,
                            minh=c('multiplicative', 'dominant','recessive',
                                   'partialrecessive')
                            ) 
{
  minh <- match.arg(minh)
  if ( !all(gamma > 0, N > 0) ) stop('N and gamma must be greater than 0')
  if ( min(p, kp, alpha, fc) <= 0 | max(p, kp, alpha, fc) >=1 ) stop('p, kp, alpha, and fc must be between 0 and 1.') 
  f.mod <- switch(minh,
         multiplicative = c(gamma^2, gamma, 1),
      partialrecessive = c(gamma,       1, 1),           
         dominant       = c(      0,     0, 1),
         recessive      = c(      0,     1, 1)  ) 
  q <- 1 - p
  fhw <- c(p^2, 2*p*q, q^2)
  pi <- kp/sum(f.mod*fhw)
  if (pi <= 0 | pi >=1) {
    warning('The combination of p, kp, and gamma produces an unrealistic value of pi.')
    ret <- NA
  } else {
    fe  <- rbind(fhw, fhw)
    dimnames(fe) <- list(c("Case", "Control"), c("AA", "Aa", "aa"))
    f <- fe*rbind(f.mod*pi, 1-f.mod*pi)
    Pct <- apply(f, 1, sum)
    f2 <-  f *c(fc, 1-fc)/Pct   # normalize the frequencies for each row
    fe2 <- fe*c(fc, 1-fc)  
    fe2; apply(fe2, 1, sum); f2; apply(f2, 1, sum)  
    lambda <- sum((f2-fe2)^2/fe2)*N
    ret <- 1 - pchisq(qchisq(1-alpha, df=1), df=1, ncp=lambda, lower.tail=T)
  }
  ret
}

power.casectrl.plot <- function (N, gamma=1.6, p=1:9/10, kp=0.1, alpha=0.05, fc=0.5,
                                 minh=c('multiplicative', 'dominant','recessive'),
                                 Nsnp=1, vary=c('prevalence','SNPs'), ylim=c(0,1), PLOT=T, ... )
{
  minh <- match.arg(minh)
  vary <- match.arg(vary)
  if (length(p)<2) stop('Must have more than 1 value in p.')
  if (length(kp) > 1 & length(Nsnp) > 1) stop("Nsnps and kp can't be all > 1.")
  if (vary=='prevalence') {
    cmd <- expression(tapply(p, p, function(x, ...) power.casectrl(p=x,...), N=N, gamma=gamma, kp=kp[j], alpha=alpha/Nsnp, fc=fc, minh=minh))
    Xvary <- kp 
  } else if (vary=='SNPs') {
    cmd <- expression(tapply(p, p, function(x, ...) power.casectrl(p=x,...), N=N, gamma=gamma, kp=kp, alpha=alpha/Nsnp[j], fc=fc, minh=minh))
    Xvary <- Nsnp 
  }
  J <- length(Xvary)
  ret <- matrix(NA, nc=J, nr=length(p))
  colnames(ret) <- paste(vary, '=', Xvary)
  for (j in 1:J) ret[,j] <- eval(cmd)

  if (PLOT) {
    nc <- 1:ncol(ret)
    subt <- paste("( RR", gamma, "; total subjects", N,"; SNPs", Nsnp[1], "; prevalence", kp[1],
                   "; mode of inheritance:", minh, "; overall sig.level", alpha, ")" )
    matplot(p, ret, type="l", ylim=ylim, lty=1, col=nc, xlab="Allele Frequency", ylab="Power", sub=subt, ...)
    abline(h=c(.8), lty=1)
    legend( locator(1), colnames(ret), lty=1, col=nc )
  }
  ret
}

# power calculation for studies using baseline measure 
#   - simulation: continuous response, baseline and genotype as covariate (ANCOVA)
#   - can specify various modes of inheritance ('additive', 'dominant','recessive')
#   - use compound symmetry for covariance matrix 
# Author: Michael Man
#   Date: June 22, 2004
#      N: total number of subjects
#      p: frequency of A allele
#  alpha: significance level                     (used only in 'power.genotype.conti')
#    Rep: number of iterations to generate power (used only in 'power.genotype.conti')
#     pi: correlation coefficient
#    me1: mean of control group
#  delta: treatment/genotype effect
#  sd1/2: standard deviation of the control and treatment groups
#   minh: mode of inheritance
# genotype.delta: the effect due to individual genotype effect or overall effect
# Factor: whether treat 'Trt' as a factor in 'x'
#   TEST: debug
# reference:
#   Frison and Pocock (1992) "Repeated measures in clinical trials: analysis using mean summary statistics
#     and its implications for design" Statistics in Medicine 11:1685-1704
#   Vickers (2001) "The use of percentage change from baseline as an outcome in a controlled trial is
#     statistically inefficient: a simulation study" BMC Med Res Methodol. 2001; 1 (1): 6
# requirement: need library 'mvtnorm'

### power calculation
power.genotype.conti <- function(N, Rep=2000, alpha=.05, ...){
  pval <- sapply(rep(N, Rep), FUN=simu.genotype.conti, ...)
  retval <- list()
  retval$power <- mean(pval<=alpha)
  retval$ci    <- ci.binom(pval<=alpha)
  rownames(retval$ci) <- "power"
  retval$call  <- match.call()
  class(retval) <- "power.genoytpe.conti"
  retval
}

print.power.genoytpe.conti <- function(x, ...)
  {
    cat("\n")
    cat("Power calculation for a study including genotyping\n")
    cat("\n")
    cat("call:\n")
    print(x$call)
    cat("\n")
    cat("Estimated power:", x$power, "\n")
    cat("\n")
    cat("Simulation confidence region for estimated power:\n")
    cat("\n")
    print(x$ci)
  }


### simulation
simu.genotype.conti <- function (N, p=0.15, pi=0, me1=50, me2=me1, delta=-5, sd1=10, sd2=10, verbose=FALSE,
                                 minh=c('additive', 'dominant','recessive'), genotype.delta=TRUE, Factor=FALSE) 
{
  minh <- match.arg(minh)
  if ( min(N, sd1, sd2)<0 ) stop('N, sd1, and sd2 must be greater than 0')
  if ( p<=0 | abs(pi)<0 | p>=1 | abs(pi)>1 ) stop('p and abs(pi) must be between 0 and 1.') 
  f.mod <- switch(minh,
         dominant       = c(      0,     1, 1),
         additive       = c(      0 ,  0.5, 1),
         recessive      = c(      0,     0, 1)  ) 
  q <- 1 - p
  fhw <- c(q^2, 2*p*q, p^2)  # major allele first
  nhw <- round(N*fhw)
  if (sum(nhw)!=N) nhw[3] <- N-sum(nhw[1:2])
  covm <- matrix(c(1,    pi,    pi,    1   ), nr=2)*
          matrix(c(sd1^2, sd1*sd2, sd1*sd2, sd2^2), nr=2)
  if (!genotype.delta) delta <- delta/sum(fhw*f.mod)  # convert to overall delta due to all genotypes

  ## explicitly create x1:x3, t1:t3 to avoid R CMD check warning for R > 2.6.0
  x1 <- x2 <- x3 <- NULL
  t1 <- t2 <- t3 <- NULL
  
  for (i in 1:3) {
    if (nhw[i]!=0) {
      assign(paste('x',i,sep=''), rmvnorm(n=nhw[i], mean=c(me1,me2+f.mod[i]*delta), sigma=covm))
      assign(paste('t',i,sep=''), rep(f.mod[i],nhw[i]))
    } else {
      assign(paste('x',i,sep=''), NULL)
      assign(paste('t',i,sep=''), NULL)
    }
  }
  x <- data.frame(rbind(x1,x2,x3), Trt=c(t1,t2,t3))
  if (Factor) x$Trt <- as.factor(x$Trt)
  colnames(x) <- c('X', 'Y', 'Trt')
  mod <- lm(Y~X+Trt, data=x)   # ANCOVA
  if (verbose) {
    print( summary(mod) )
    plot(Y~X+Trt, data=x)
  }
  ret <- anova(mod, test='F')['Trt','Pr(>F)']
  ret
}




## contributed by Michael Man.
## TODO: Needs documentation before exporting.


### simple simulation for two group design
pw <- function(n1, n2=n1*(1-fc)/fc, fc=.5, pi=0, me1=50, me2=45, sd1=10, sd2=10, TEST=F){
  covm <- matrix(c(1,    pi,    pi,    1   ), nr=2)*
          matrix(c(sd1^2, sd1*sd2, sd1*sd2, sd2^2), nr=2)
  x1 <- rmvnorm(n=n1, mean=c(me1,me1), sigma=covm)
  x2 <- rmvnorm(n=n2, mean=c(me1,me2), sigma=covm)
  x <- data.frame(rbind(x1,x2), Trt=c(rep(0,n1), rep(1,n2)))
  colnames(x) <- c('X', 'Y', 'Trt')
  mod <- lm(Y~X+Trt, data=x)
  if (TEST) {
    print( summary(mod) )
    plot(Y~X+Trt, data=x)
  }
  ret <- anova(mod, test='F')['Trt','Pr(>F)']
  ret
}

