.packageName <- "GeneticsDesign"
# Genetics power calculator for linear trend association studies
#
# Purcell's Power estimation method
# (http://pngu.mgh.harvard.edu/~purcell/gpc/cc2.html)
#
# Linear Tread Test
# http://linkage.rockefeller.edu/pawe3d/help/Linear-trend-test-ncp.html
#
#
# Given
# pA -- High risk allele frequency (A)
# pD -- disease prevalence
# RRAa -- Genotype relative risk Aa = RR(Aa|aa)=Pr(D|Aa)/Pr(D|aa)
# RRAA -- Genotype relative risk AA = RR(AA|aa)=Pr(D|AA)/Pr(D|aa)
# Dprime -- LD measure
# pB -- Marker allele frequency (B)
# nCase -- Number of cases
# ratio -- Control: case ratio = nControl/nCase
# alpha -- User-defined type I error rate
GPC.default<-function(pA, pD, RRAa, RRAA, Dprime, pB, 
                   nCase=500, ratio=1, alpha=0.05, quiet=FALSE)
{
  if(!(pA>0 && pA<1))
    stop("Invalid value for pA, should fulfill 0<pA<1")
  if(!(pB>0 && pB<1))
    stop("Invalid value for pB, should fulfill 0<pB<1")
  if(!(pD>0 && pD<1))
    stop("Invalid value for pD, should fulfill 0<pD<1") 
  if(!(Dprime>=0 && Dprime<=1))
    stop("Invalid value for Dprime, should fulfill 0<=Dprime<=1")
  if(!(RRAa>1))
    stop("Invalid value for RRAa, should fulfill RRAa>1")
  if(!(RRAA>1))
    stop("Invalid value for RRAA, should fulfill RRAA>1")
  if(!(nCase>1))
    stop("Invalid value for nCase, should be > 1")
  if(!(ratio>0))
    stop("Invalid value for ratio, should be > 0")
  if(!(alpha>0 && alpha<0.5))
    stop("Invalid value for alpha, should fulfill 0<alpha<0.5")
  # get penetrances Pr(D|aa), Pr(D|Aa), Pr(D|AA)
  pa<-1-pA
  pb<-1-pB
  denom<-RRAA*pA^2+RRAa*2*pA*pa+pa^2
  PrDgaa<-pD/denom

  PrDgAa<-RRAa*PrDgaa
  PrDgAA<-RRAA*PrDgaa

  pen<-c(PrDgaa, PrDgAa, PrDgAA)

  # estimate haplotype frequencies Pr(AB), Pr(Ab), Pr(aB), and Pr(ab)
  # based on pA, pB, and Dprime. We assume D>0
  myHapFreqs<-hapFreq(Dprime, pA, pB)

  # estimate the sampling probabilities Pr(BB|D), Pr(Bb|D), Pr(bb|D)
  tmp<-samplingProb(myHapFreqs, pen, pD, pA, pB)
  PrBBgD<-tmp[1]
  PrBbgD<-tmp[2]
  PrbbgD<-tmp[3]
  PrBBgDbar<-tmp[4]
  PrBbgDbar<-tmp[5]
  PrbbgDbar<-tmp[6]

  PrDgBB<-tmp[7]
  PrDgBb<-tmp[8]
  PrDgbb<-tmp[9]

  PrBgD<-tmp[10]
  PrbgD<-tmp[11]
  PrBgDbar<-tmp[12]
  PrbgDbar<-tmp[13]

  n0c<-(nCase*PrbbgD)
  n1c<-(nCase*PrBbgD)
  n2c<-nCase-n0c-n1c

  nControl<-(nCase*ratio)

  n0n<-(nControl*PrbbgDbar)
  n1n<-(nControl*PrBbgDbar)
  n2n<-nControl-n0n-n1n

  nc.vec<-c(n2c, n1c, n0c)
  nn.vec<-c(n2n, n1n, n0n)

  mat<-data.frame(case=nc.vec, control=nn.vec, code=c(2,1,0))

  x.vec<-c(2, 1, 0)

  R<-sum(nc.vec)
  S<-sum(nn.vec)

  n0<-n0c+n0n
  n1<-n1c+n1n
  n2<-n2c+n2n
  n.vec<-c(n2, n1, n0)

  N<-R+S

  p1.vec<-c(PrBBgD, PrBbgD, PrbbgD)
  p0.vec<-c(PrBBgDbar, PrBbgDbar, PrbbgDbar)

  numer<-sum(x.vec*(p1.vec-p0.vec))
  numer<-numer^2
  part1<-sum(x.vec^2*(R*p0.vec+S*p1.vec))
  part2<-sum(x.vec*(R*p0.vec+S*p1.vec))
  denom<-part1-part2^2/N

  # non-centrality parameter for the linear trend test
  ncp<-R*S*numer/denom

  # format outputs
  # Case-control parameters
  mat.para<-matrix(0, nrow=7, ncol=1)
  rownames(mat.para)<-c("Number of cases", "Number of controls", 
    "High risk allele frequency (A)", "Prevalence",
    "Genotypic relative risk Aa", "Genotypic relative risk AA",
    "Genotypic risk for aa (baseline)")
  mat.para[1,1]<-nCase
  mat.para[2,1]<-nControl
  mat.para[3,1]<-pA
  mat.para[4,1]<-pD
  mat.para[5,1]<-RRAa
  mat.para[6,1]<-RRAA
  mat.para[7,1]<-PrDgaa

  # Marker locus B
  mat.B<-matrix(0,nrow=7, ncol=1)
  rownames(mat.B)<-c("High risk allele frequency (B)",
    "Linkage disequilibrium (D')", "Penetrance at marker genotype bb",
    "Penetrance at marker genotype Bb", "Penetrance at marker genotype BB",
    "Genotypic odds ratio Bb", "Genotypic odds ratio BB") 
  mat.B[1,1]<-pB
  mat.B[2,1]<-Dprime
  mat.B[3,1]<-PrDgbb
  mat.B[4,1]<-PrDgBb
  mat.B[5,1]<-PrDgBB
  # OR(Bb|bb)
  mat.B[6,1]<-PrBbgD*PrbbgDbar/(PrBbgDbar*PrbbgD)
  # OR(BB|bb)
  mat.B[7,1]<-PrBBgD*PrbbgDbar/(PrBBgDbar*PrbbgD)

  # Expected allele frequencies Pr(B|D), Pr(b|D), Pr(B|\bar{D}), Pr(b|\bar{D})
  mat.aFreq<-matrix(0,nrow=2,ncol=2)
  rownames(mat.aFreq)<-c("B","b")
  colnames(mat.aFreq)<-c("Case","Control")
  mat.aFreq[1,1]<-PrBgD
  mat.aFreq[1,2]<-PrBgDbar
  mat.aFreq[2,1]<-PrbgD
  mat.aFreq[2,2]<-PrbgDbar

  # Expected genotype frequencies
  # Pr(BB|D), Pr(Bb|D), Pr(bb|D)
  # Pr(BB|\bar{D}), Pr(Bb|\bar{D}), Pr(bb|\bar{D})
  mat.gFreq<-matrix(0, nrow=3, ncol=2)
  rownames(mat.gFreq)<-c("BB","Bb", "bb")
  colnames(mat.gFreq)<-c("Case", "Control")
  mat.gFreq[1,1]<-PrBBgD
  mat.gFreq[1,2]<-PrBBgDbar
  mat.gFreq[2,1]<-PrBbgD
  mat.gFreq[2,2]<-PrBbgDbar
  mat.gFreq[3,1]<-PrbbgD
  mat.gFreq[3,2]<-PrbbgDbar

  alpha.vec<-c(0.1, 0.05, 0.01, 0.001, alpha)
  power.vec<-rep(0,5)
  for(i in 1:5)
  { a<-alpha.vec[i]
    const<-qchisq(1-a, df=1)
    power.vec[i]<-1-pchisq(const, df=1, ncp=ncp)
  }

  # Case-Control statistics
  mat.stat<-cbind(alpha.vec, power.vec)
  colnames(mat.stat)<-c("Alpha", "Power")
  rownames(mat.stat)<-rep("",5)
 
  res<-list(power=power.vec[5], ncp=ncp,
            mat.para=mat.para, mat.B=mat.B, mat.aFreq=mat.aFreq,
            mat.gFreq=mat.gFreq, mat.stat=mat.stat)

  if(quiet==FALSE)
  {
    cat("\n Case-control parameters>>\n");
    print(mat.para)
    cat("\n Marker locus B>>\n");
    print(mat.B)
    cat("\n Expected allele frequencies>>\n");
    print(mat.aFreq)
    cat("\n Expected genotype frequencies>>\n");
    print(mat.gFreq)
    cat("\n Case-control statistics>>\n");
    print(mat.stat)
    cat("\n power (alpha=",alpha, ")=", power.vec[5], " ncp=", ncp, "\n")
  }

  invisible(res)
}

GPC<-function(pA, pD, RRAa, RRAA, r2, pB, 
                   nCase=500, ratio=1, alpha=0.05, quiet=FALSE)
{
  if(!(pA>0 && pA<1))
    stop("Invalid value for pA, should fulfill 0<pA<1")
  if(!(pB>0 && pB<1))
    stop("Invalid value for pB, should fulfill 0<pB<1")
  if(!(pD>0 && pD<1))
    stop("Invalid value for pD, should fulfill 0<pD<1") 
  if(!(RRAa>1))
    stop("Invalid value for RRAa, should fulfill RRAa>1")
  if(!(RRAA>1))
    stop("Invalid value for RRAA, should fulfill RRAA>1")
  if(!(nCase>1))
    stop("Invalid value for nCase, should be > 1")
  if(!(ratio>0))
    stop("Invalid value for ratio, should be > 0")
  if(!(alpha>0 && alpha<0.5))
    stop("Invalid value for alpha, should fulfill 0<alpha<0.5")
  if(!(r2>=0 && r2<=1))
    stop("Invalid value for r2, should fulfill 0<=r2<=1") 

  # estimate Dprime based on r2, pA, pB
  Dprime<-Dprime.fun2(r2, pA, pB)

  res<-GPC.default(pA, pD, RRAa, RRAA, Dprime, pB, 
                   nCase, ratio, alpha, quiet)

  invisible(res)
}

# estimate haplotype frequencies Pr(AB), Pr(Ab), Pr(aB), and Pr(ab) based
# on pA, pB, and Dprime. We assume D>0
hapFreq<-function(Dprime, pA, pB)
{
  pa<-1-pA
  pb<-1-pB
  dmax<-min(c(pA*pb, pa*pB))
  D<-Dprime * dmax

  PrAB<-pA*pB+D
  PraB<-pa*pB-D
  PrAb<-pA*pb-D
  Prab<-pa*pb+D

  return(c(PrAB, PraB, PrAb, Prab))
}


# estimate the sampling probabilities Pr(BB|D), Pr(Bb|D), Pr(bb|D)
samplingProb<-function(myHapFreqs, pen, pD, pA, pB)
{
  pa<-1-pA
  pb<-1-pB
  # penetrances Pr(D|aa), Pr(D|Aa), Pr(D|AA)
  PrDgaa<-pen[1]
  PrDgAa<-pen[2]
  PrDgAA<-pen[3]

  # haplotype frequencies Pr(AB), Pr(aB), Pr(Ab), Pr(ab)
  PrAB<-myHapFreqs[1]
  PraB<-myHapFreqs[2]
  PrAb<-myHapFreqs[3]
  Prab<-myHapFreqs[4]
  
  # sampling probabilities for cases
  # Pr(BB|D), Pr(Bb|D), Pr(bb|D)
  numer<-PrDgAA*PrAB^2+PrDgAa*2*PrAB*PraB+PrDgaa*PraB^2
  PrBBgD<-numer/pD

  numer<-PrDgAA*2*PrAB*PrAb+PrDgAa*2*(PrAB*Prab+PrAb*PraB)+PrDgaa*2*PraB*Prab
  PrBbgD<-numer/pD

  numer<-PrDgAA*PrAb^2+PrDgAa*2*PrAb*Prab+PrDgaa*Prab^2
  PrbbgD<-numer/pD

  PrDgBB<-PrBBgD*pD/(pB^2)
  PrDgBb<-PrBbgD*pD/(2*pB*pb)
  PrDgbb<-PrbbgD*pD/(pb^2)

  # sampling probabilities for controls
  # Pr(BB|\bar{D}), Pr(Bb|\bar{D}), Pr(bb|\bar{D})
  PrBBgDbar<-(1-PrDgBB)*pB^2/(1-pD)
  PrBbgDbar<-(1-PrDgBb)*2*pB*pb/(1-pD)
  PrbbgDbar<-(1-PrDgbb)*pb^2/(1-pD)

  # Expected allele frequencies Pr(B|D), Pr(b|D), Pr(B|Dbar), Pr(b|Dbar)
  PrBgD<-PrBBgD+PrBbgD/2
  PrbgD<-PrbbgD+PrBbgD/2
  PrBgDbar<-PrBBgDbar+PrBbgDbar/2
  PrbgDbar<-PrbbgDbar+PrBbgDbar/2

  return(c(PrBBgD, PrBbgD, PrbbgD, PrBBgDbar, PrBbgDbar, PrbbgDbar,
           PrDgBB, PrDgBb, PrDgbb, PrBgD, PrbgD, PrBgDbar, PrbgDbar))
}


# r2 -- LD measure r^2 between SNP 1 and SNP 2
# pA -- frequency of minor allele (A) for SNP 1
# pB -- frequency of minor allele (B) for SNP 2
#
# D = pA.pB - pAB
# 
# dmax = min(pA.(1-pB),(1-pA).pB)
# 
# dmin = max(-pA.pB, -(1-pA).(1-pB))
# 
# if D < 0, D' = D/dmin else D' = D/dmax
# 
# r2 = D.D/(pA.pB.(1-pA).(1-pB) 
#
# D' = |r|*sqrt(pA*pB*(1-pA)*(1-pB)) / dmax if D > 0
# D' = -|r|*sqrt(pA*pB*(1-pA)*(1-pB)) / dmin if D < 0
# 
#
# suppose that D < 0
Dprime.fun1<-function(r2, pA, pB)
{ tmpr2<-r2.upp1(pA, pB)
  if(r2>tmpr2)
  { msg<-paste("r2 = ", r2, " > upper bound of r2 = ", tmpr2, ". r2 is changed to floor(tmpr2*100)/100!\n");
    warning(msg);
    r2<-floor(tmpr2*100)/100
  }
  numer<- - sqrt(r2*pA*(1-pA)*pB*(1-pB));
  dmin<- max(-pA*pB, -(1-pA)*(1-pB));
  res<-numer/dmin;
  return(res)
}

# suppose that D > 0
Dprime.fun2<-function(r2, pA, pB)
{ 
  tmpr2<-r2.upp2(pA, pB)
  if(r2>tmpr2)
  { msg<-paste("r2 = ", r2, " > upper bound of r2 = ", tmpr2, ". r2 is changed to floor(tmpr2*100)/100!\n");
    warning(msg);
    r2<-floor(tmpr2*100)/100
  }
  numer<- sqrt(r2*pA*(1-pA)*pB*(1-pB));
  dmax<- min(pA*(1-pB), pB*(1-pA));
  res<-numer/dmax;
  return(res)
}

# upper bound of r2 given pA, pB for D<0
r2.upp1<-function(pA, pB)
{
  numer<-pA*pB*(1-pA)*(1-pB)
  dmin<- max(-pA*pB, -(1-pA)*(1-pB));
  res<-numer/(dmin^2)
  return(1/res)
}


# upper bound of r2 given pA, pB for D>0
r2.upp2<-function(pA, pB)
{
  numer<-pA*pB*(1-pA)*(1-pB)
  dmax<- min(pA*(1-pB), pB*(1-pA));
  res<-numer/(dmax^2)
  res<-dmax^2/numer
  return(res)
}

GeneticPower.Quantitative.Factor <- function(N=1000,
                                             delta=1,
                                             freq=0.15,
                                             minh=c("additive","dominant","recessive"),
                                             sigma=1,
                                             OtherParms=0,
                                             alpha=0.05,
                                             numtests=1,
                                             moi=NULL,
                                             rsquared=NULL) 
{
## N = total samples in the analysis
## delta = mean(bb) - mean (AA), where 'b' is the disease allele, 'A' is the reference allele
## freq = allele frequency of disease allele 'b'
## minh = mode of inheritance:  "recessive", "additive", "dominant" same as moi=0,0.5, and 1.0, respectively
##        defaults to "additive" if no moi specified eith
## sigma = standard deviation of the response phenotype
## OtherParms = the number of additional parameters (really, DOF) in the model that will reduce your overall DOF
## alpha = the desired significance level
## numtests = the number of tests to be corrected by Bonferroni adjustment beforee achieving 'alpha'
## moi = mode of inheritance: 0 for recessive, 0.5 for additive, 1.0 for dominant, or anywhere in between,
##       this OVER-RIDES minh...useful for modeling i-between moi's...
## rsquared = fraction of total sum-of-squares explained by fit. OVER-RIDES delta AND sigma.

    alphy <- 1-(1-alpha)^(1/numtests);

    if (is.null(moi)) {
      minh <- match.arg(minh) # can use abbreviated names
      moi <- switch(minh,
                    additive   = 0.5,
                    dominant   = 1.0,
                    recessive  = 0  ) # define the mode of inheritance
    }
  
    N1 <- N*(1-freq)^2;
    N2 <- 2*N*freq*(1-freq);
    N3 <- N*freq^2;
     
    mu <- c(0,moi,1)*delta; 
    mu.bar <- (mu[1]*N1+mu[2]*N2+mu[3]*N3)/N;

    lambda <- ifelse(is.null(rsquared),
                     (N1*(mu[1]-mu.bar)^2+N2*(mu[2]-mu.bar)^2+N3*(mu[3]-mu.bar)^2)/(sigma^2),
                     (N-3)*rsquared/(1-rsquared)
                     );

    power <- pf(qf(1-alphy,df1=2,df2=(N-3-OtherParms)),ncp=lambda,df1=2,df2=(N-3-OtherParms),lower.tail=F)

    return(power)
}
GeneticPower.Quantitative.Numeric <- function(N=1000,
                                              delta=1,
                                              freq=0.15,
                                              minh=c('additive','dominant','recessive'),
                                              sigma=1,
                                              OtherParms=0,
                                              alpha=0.05,
                                              numtests=1,
                                              moi=NULL,
                                              rsquared=NULL)
{
## N = total samples in the analysis
## delta = mean(bb) - mean (AA), where 'b' is the disease allele, 'A' is the reference allele
## freq = allele frequency of disease allele 'b'
## minh = mode of inheritance:  "recessive", "additive", "dominant" same as moi=0,0.5,and 1.0, respectively.
##        defaults to "additive" if no moi specified either
## sigma = standard deviation of the response phenotype
## OtherParms = the number of additional parameters (really, DOF) in the model that will reduce your overall DOF
## alpha = the desired significance level
## numtests = the number of tests to be corrected by Bonferroni adjustment beforee achieving 'alpha'
## moi = mode of inheritance: 0 for recessive, 0.5 for additive, 1.0 for dominant, or anywhere in between,
##       this OVER-RIDES minh...useful for modeling in-between moi's...
## rsquared = fraction of total sum-of-squares explained by fit. OVER-RIDES delta AND sigma.

    alphy <- 1-(1-alpha)^(1/numtests);

    if (is.null(moi)) {
      minh <- match.arg(minh) # can use abbreviated names
      moi <- switch(minh,
                    additive   = 0.5,
                    dominant   = 1.0,
                    recessive  = 0  ) # define the mode of inheritance
    }
     
    N1 <- N*(1-freq)^2;
    N2 <- 2*N*freq*(1-freq);
    N3 <- N*freq^2;
    
    mu <- c(0,moi,1)*delta; 

    if (is.null(rsquared)) {
      
      xbar <- (N2 + N3*2)/N;
      ybar <- (N2*mu[2] + N3*mu[3])/N;
      
      slope <- (N1*xbar*ybar + (1-xbar)*N2*(mu[2]-ybar) + (2-xbar)*N3*(mu[3]-ybar))/(N1*xbar^2 + N2*(1-xbar)^2 + N3*(2-xbar)^2);
      a <- ybar - slope*xbar;
      
      fits <- a + slope*c(0,1,2);
      
      eNs <- c(N1,N2,N3);
      
      Rsqtop <- sum(eNs*(fits-ybar)^2);
      
      Rsqbottom <- Rsqtop + sum((eNs-1)*sigma^2 + eNs*(mu - fits) + eNs*(mu-fits)^2);
      
      NewRsquared <- Rsqtop/Rsqbottom;
      
    } else {
      
      NewRsquared <- rsquared;
    } 
    
    power <- pf(qf(1-alphy,df1=1,df2=(N-2-OtherParms)),ncp=(NewRsquared*(N-2)/((1-NewRsquared))),df1=1,df2=(N-2-OtherParms),lower.tail=F);

    return(power);
}




# $Id: gregorius.R 114 2003-05-22 17:25:23Z warnesgr $
#
# Code contributed by David Duffy <davidD@qumr.edu.au>.
#
# Gregorius, H.-R. 1980. The probability of losing an allele when
# diploid genotypes are sampled.  Biometrics 36, 643-652.
#
# Formula from "Corollary 2" and "Corollary 3" of that paper
#
# N is the number of genotypes sampled,
# freq=frequency of least common allele to be detected by the study,
# missprob=the probability of missing at least one allele
#
# tol=smallest term in series to be accumulated
#
gregorius <- function(freq, N, missprob, tol=1.0e-10, maxN=1e4, maxiter=100,
                      showiter=FALSE)
{
   

  find.alpha <- function(N, freq, tol) #, showiter=FALSE)
    {
      n<- floor(1/freq)
      i<-1
      sgn<- -1
      term<-1.0
      res<-0.0
      while(abs(term)>tol && i<n) {
        sgn<- (-1) ^ (i+1)
        term<- exp( lchoose(n-1,i) +
                   log(exp(N*log(1-i*freq))+
                       exp(i+N*log(freq)+(N-1)*log(n-i))))
        res<-res+sgn*term
        i<-i+1

#        if(showiter)
#          {
#            cat("i=",i,"\n")
#            cat("sgn=",sgn,"\n")
#            cat("term=",term,"\n")
#            cat("res=",res,"\n")
#          }
      }

      max(min(res,1),0)
    }


  retval <- list()
  retval$call <- match.call()
    
  
  if(!missing(N) && missing(missprob) )
    {
      retval$method <- "Compute missprob given N and freq"
      retval$freq <- freq
      retval$N <- N
      retval$missprob <- find.alpha(N=N,freq=freq,tol=tol) 
    }
  else if(missing(N) && !missing(missprob) )
    {
      retval$method <- "Determine minimal N given missprob and freq"
      retval$freq <- freq
      val <- binsearch( function(N) find.alpha(N=N, freq=freq, tol=tol),
                       range=c(1, maxN), target=missprob, showiter=showiter,
                       maxiter=maxiter )
      if(length(val$where)==2)
        {
          retval$N <- val$where[2]
          retval$missprob <- val$value[2]
        }
      else
        {
          retval$N <- val$where[1]
          retval$missprob <- val$value[1] 
       }
    }
  else
    stop("Exactly two of N, freq, and missprob must be specified")

  return(retval)
}

power.casectrl <- function (...)
{
  .Deprecated("'GPC', 'GeneticPower.Quantitative.Factor', or 'GeneticPower.Quantitative.Numeric")
}
power.casectrl.plot <- function (N, gamma=1.6, p=1:9/10, kp=0.1, alpha=0.05, fc=0.5,
                                 minh=c('multiplicative', 'dominant','recessive'),
                                 Nsnp=1, vary=c('prevalence','SNPs'), ylim=c(0,1), PLOT=T, ... )
{
  minh <- match.arg(minh)
  vary <- match.arg(vary)
  if (length(p)<2) stop('Must have more than 1 value in p.')
  if (length(kp) > 1 & length(Nsnp) > 1) stop("Nsnps and kp can't be all > 1.")
  if (vary=='prevalence') {
    cmd <- expression(tapply(p, p, function(x, ...) power.casectrl(p=x,...), N=N, gamma=gamma, kp=kp[j], alpha=alpha/Nsnp, fc=fc, minh=minh))
    Xvary <- kp 
  } else if (vary=='SNPs') {
    cmd <- expression(tapply(p, p, function(x, ...) power.casectrl(p=x,...), N=N, gamma=gamma, kp=kp, alpha=alpha/Nsnp[j], fc=fc, minh=minh))
    Xvary <- Nsnp 
  }
  J <- length(Xvary)
  ret <- matrix(NA, nc=J, nr=length(p))
  colnames(ret) <- paste(vary, '=', Xvary)
  for (j in 1:J) ret[,j] <- eval(cmd)

  if (PLOT) {
    nc <- 1:ncol(ret)
    subt <- paste("( RR", gamma, "; total subjects", N,"; SNPs", Nsnp[1], "; prevalence", kp[1],
                   "; mode of inheritance:", minh, "; overall sig.level", alpha, ")" )
    matplot(p, ret, type="l", ylim=ylim, lty=1, col=nc, xlab="Allele Frequency", ylab="Power", sub=subt, ...)
    abline(h=c(.8), lty=1)
    legend( locator(1), colnames(ret), lty=1, col=nc )
  }
  ret
}

# power calculation for studies using baseline measure 
#   - simulation: continuous response, baseline and genotype as covariate (ANCOVA)
#   - can specify various modes of inheritance ('additive', 'dominant','recessive')
#   - use compound symmetry for covariance matrix 
# Author: Michael Man
#   Date: June 22, 2004
#      N: total number of subjects
#      p: frequency of A allele
#  alpha: significance level                     (used only in 'power.genotype.conti')
#    Rep: number of iterations to generate power (used only in 'power.genotype.conti')
#     pi: correlation coefficient
#    me1: mean of control group
#  delta: treatment/genotype effect
#  sd1/2: standard deviation of the control and treatment groups
#   minh: mode of inheritance
# genotype.delta: the effect due to individual genotype effect or overall effect
# Factor: whether treat 'Trt' as a factor in 'x'
#   TEST: debug
# reference:
#   Frison and Pocock (1992) "Repeated measures in clinical trials: analysis using mean summary statistics
#     and its implications for design" Statistics in Medicine 11:1685-1704
#   Vickers (2001) "The use of percentage change from baseline as an outcome in a controlled trial is
#     statistically inefficient: a simulation study" BMC Med Res Methodol. 2001; 1 (1): 6
# requirement: need library 'mvtnorm'

### power calculation
power.genotype.conti <- function(N, Rep=2000, alpha=.05, ...){
  pval <- sapply(rep(N, Rep), FUN=simu.genotype.conti, ...)
  retval <- list()
  retval$power <- mean(pval<=alpha)
  retval$ci    <- ci.binom(pval<=alpha)
  rownames(retval$ci) <- "power"
  retval$call  <- match.call()
  class(retval) <- "power.genoytpe.conti"
  retval
}

print.power.genoytpe.conti <- function(x, ...)
  {
    cat("\n")
    cat("Power calculation for a study including genotyping\n")
    cat("\n")
    cat("call:\n")
    print(x$call)
    cat("\n")
    cat("Estimated power:", x$power, "\n")
    cat("\n")
    cat("Simulation confidence region for estimated power:\n")
    cat("\n")
    print(x$ci)
  }


### simulation
simu.genotype.conti <- function (N, p=0.15, pi=0, me1=50, me2=me1, delta=-5, sd1=10, sd2=10, verbose=FALSE,
                                 minh=c('additive', 'dominant','recessive'), genotype.delta=TRUE, Factor=FALSE) 
{
  minh <- match.arg(minh)
  if ( min(N, sd1, sd2)<0 ) stop('N, sd1, and sd2 must be greater than 0')
  if ( p<=0 | abs(pi)<0 | p>=1 | abs(pi)>1 ) stop('p and abs(pi) must be between 0 and 1.') 
  f.mod <- switch(minh,
         dominant       = c(      0,     1, 1),
         additive       = c(      0 ,  0.5, 1),
         recessive      = c(      0,     0, 1)  ) 
  q <- 1 - p
  fhw <- c(q^2, 2*p*q, p^2)  # major allele first
  nhw <- round(N*fhw)
  if (sum(nhw)!=N) nhw[3] <- N-sum(nhw[1:2])
  covm <- matrix(c(1,    pi,    pi,    1   ), nr=2)*
          matrix(c(sd1^2, sd1*sd2, sd1*sd2, sd2^2), nr=2)
  if (!genotype.delta) delta <- delta/sum(fhw*f.mod)  # convert to overall delta due to all genotypes

  ## explicitly create x1:x3, t1:t3 to avoid R CMD check warning for R > 2.6.0
  x1 <- x2 <- x3 <- NULL
  t1 <- t2 <- t3 <- NULL
  
  for (i in 1:3) {
    if (nhw[i]!=0) {
      assign(paste('x',i,sep=''), rmvnorm(n=nhw[i], mean=c(me1,me2+f.mod[i]*delta), sigma=covm))
      assign(paste('t',i,sep=''), rep(f.mod[i],nhw[i]))
    } else {
      assign(paste('x',i,sep=''), NULL)
      assign(paste('t',i,sep=''), NULL)
    }
  }
  x <- data.frame(rbind(x1,x2,x3), Trt=c(t1,t2,t3))
  if (Factor) x$Trt <- as.factor(x$Trt)
  colnames(x) <- c('X', 'Y', 'Trt')
  mod <- lm(Y~X+Trt, data=x)   # ANCOVA
  if (verbose) {
    print( summary(mod) )
    plot(Y~X+Trt, data=x)
  }
  ret <- anova(mod, test='F')['Trt','Pr(>F)']
  ret
}




## contributed by Michael Man.
## TODO: Needs documentation before exporting.


### simple simulation for two group design
pw <- function(n1, n2=n1*(1-fc)/fc, fc=.5, pi=0, me1=50, me2=45, sd1=10, sd2=10, TEST=F){
  covm <- matrix(c(1,    pi,    pi,    1   ), nr=2)*
          matrix(c(sd1^2, sd1*sd2, sd1*sd2, sd2^2), nr=2)
  x1 <- rmvnorm(n=n1, mean=c(me1,me1), sigma=covm)
  x2 <- rmvnorm(n=n2, mean=c(me1,me2), sigma=covm)
  x <- data.frame(rbind(x1,x2), Trt=c(rep(0,n1), rep(1,n2)))
  colnames(x) <- c('X', 'Y', 'Trt')
  mod <- lm(Y~X+Trt, data=x)
  if (TEST) {
    print( summary(mod) )
    plot(Y~X+Trt, data=x)
  }
  ret <- anova(mod, test='F')['Trt','Pr(>F)']
  ret
}

