.packageName <- "gaga"
checkfit.gagafit <- function(gg.fit,x,groups,type='data',logexpr=FALSE,xlab,ylab,main,lty,lwd,...) {
# Plots to check fit of GaGa and MiGaGa models.

if (is(x, "exprSet") | is(x, "ExpressionSet")) {
  if (is.character(groups) && length(groups)==1) { groups <- as.factor(pData(x)[, groups]) }
  x <- exprs(x)
} else if (!is(x,"data.frame") & !is(x,"matrix")) { stop("x must be an ExpressionSet, exprSet, data.frame or matrix") }

if (ncol(x) != length(groups)) stop('The length of argument groups does not match with the number of columns in x')
if ((type!='data') && (type!='shape') && (type!='mean') && (type!='shapemean')) stop('The argument type is not valid')

xpred <- simnewsamples(gg.fit=gg.fit,groupsnew=groups,x=x,groups=groups)
if (type=='data') {
  if (logexpr) {
    xnewpdf <- density(log2(unlist(exprs(xpred)))); if (is.list(x)) xpdf <- density(log2(unlist(x))) else xpdf <- density(log2(x))
  } else {
    xnewpdf <- density(unlist(exprs(xpred))); if (is.list(x)) xpdf <- density(unlist(x)) else xpdf <- density(x)
  }
  if (missing(xlab)) xlab <- 'Expression values'; if (missing(ylab)) ylab <- 'Density'; if (missing(main)) main <- ''
  plot(xpdf,type='l',xlab=xlab,ylab=ylab,main=main,...); lines(xnewpdf,lty=2,lwd=2); legend(max(xpdf$x),max(xpdf$y),c('Observed data','Posterior predictive'),lty=1:2,lwd=1:2,xjust=1,yjust=1) 
} else if (type=='shape') {
  colsel <- (1+ncol(fData(xpred))/3):(2*ncol(fData(xpred))/3)
  aest <- rowMeans(x)^2/apply(x,1,'var'); apdf <- density(unlist(fData(xpred)[,colsel]))
  if (missing(xlab)) xlab <- 'alpha parameters (shape)'; if (missing(ylab)) ylab <- 'Density'; if (missing(main)) main <- ''
  plot(apdf,xlab=xlab,ylab=ylab,main=main,lty=1,...); lines(density(aest),lty=2)
  legend(quantile(aest,probs=.95),max(apdf$y),c('Model-based','Moments estimate'),lty=1:2,xjust=0,yjust=1)
} else if (type=='mean') {
  colsel <- (1+2*ncol(fData(xpred))/3):ncol(fData(xpred))
  lest <- rowMeans(x); lpdf <- density(unlist(fData(xpred)[,colsel]))
  if (missing(xlab)) xlab <- 'mean parameters'; if (missing(ylab)) ylab <- 'Density'; if (missing(main)) main <- ''
  plot(lpdf,xlab=xlab,ylab=ylab,main=main,lty=1,...); lines(density(lest),lty=2)
  legend(max(lest),max(lpdf$y),c('Model-based','Moments estimate'),lty=1:2,xjust=1,yjust=1)
} else if (type=='shapemean') {
  colsel1 <- (1+ncol(fData(xpred))/3):(2*ncol(fData(xpred))/3)
  colsel2 <- (1+2*ncol(fData(xpred))/3):ncol(fData(xpred))
  aest <- rowMeans(x)^2/apply(x,1,'var'); lest <- rowMeans(x)
  if (missing(xlab)) xlab <- 'mean parameters'; if (missing(ylab)) ylab <- 'alpha parameters (shape)'; if (missing(main)) main <- ''
  plot(unlist(fData(xpred)[,colsel2]),unlist(fData(xpred)[,colsel1]),xlab=xlab,ylab=ylab,main=main,...); points(lest,aest,col=2)
}

}
checkfit <- function(gg.fit,x,groups,type='data',logexpr=FALSE,xlab,ylab,main,lty,lwd,...) { UseMethod("checkfit") }
classpred.gagafit <- function(gg.fit,xnew,x,groups,prgroups,ngene=100) {
# Classify a new sample (array) into the group with highest posterior probability

gapprox <- TRUE
sel <- (1:nrow(x))[order(gg.fit$pp[,1])][1:ngene]
if (!is.vector(xnew)) stop('xnew must be a vector')
if (!is.numeric(sel)) stop('sel must contain numerical indexes')
if (is(x, "exprSet") | is(x,"ExpressionSet")) {
  if (is.character(groups) && length(groups)==1) { groups <- as.factor(pData(x)[, groups]) }
  x <- exprs(x)
} else if (!is(x,"data.frame") & !is(x,"matrix")) { stop("x must be an exprSet, data.frame or matrix") }
if (ncol(x)!=length(groups)) stop('Argument groups must have length equal to number of columns in argument x')
par <- getpar(gg.fit)
a0 <- as.double(par$a0); nu <- as.double(par$nu); balpha <- as.double(par$balpha)
nualpha <- as.double(par$nualpha)
probclus <- as.double(par$probclus); probpat <- as.double(par$probpat)
equalcv <- as.integer(gg.fit$equalcv)
nclust <- as.integer(gg.fit$nclust)
patterns <- gg.fit$patterns
if (nrow(patterns)!=length(probpat)) stop('Argument patterns must be equal to the length of gg.fit@probEst')

groupsr <- groups2int(groups,patterns); K <- as.integer(max(groupsr)+1)
if (missing(prgroups)) prgroups <- rep(1/K,K)
prgroups <- as.double(prgroups)
if (ncol(patterns)!=K) stop('patterns must have number of columns equal to the number of distinct elements in groups')
npat <- as.integer(nrow(patterns))
for (i in 1:nrow(patterns)) { patterns[i,] <- as.integer(as.integer(as.factor(patterns[i,]))-1) }
ngrouppat <- as.integer(apply(patterns,1,'max')+1)
sumx <- double(nrow(x)*sum(ngrouppat)); nobsx <- double(sum(ngrouppat))
if (gg.fit$equalcv) {
  prodx <- double(nrow(x))
} else {
  prodx <- double(nrow(x)*sum(ngrouppat))
}
usesumx <- as.integer(0); gapprox <- as.integer(gapprox)

xnew <- as.double(xnew[sel])
sel <- sel-1  #in C vectors start at 0
d <- integer(1); posgroups <- double(K)

z <- .C("sampleclas_ggC",d=d,posgroups=posgroups,xnew,as.integer(length(sel)),as.integer(sel),as.integer(nrow(x)),as.integer(ncol(x)),as.double(t(x)),groupsr,K,prgroups,probclus,probpat,a0,nu,balpha,nualpha,equalcv,nclust,npat,as.integer(t(patterns)),ngrouppat,sumx,prodx,nobsx,usesumx,gapprox)

return(list(d=z$d+1,posgroups=z$posgroups))

}
classpred <- function(gg.fit,xnew,x,groups,prgroups,ngene=100) { UseMethod("classpred") }
dcgamma <- function(x,a,b,c,d,r,s,newton=TRUE) {
# Density of a conjugate gamma shape distribution evaluated at x

if ((sum(a<0)>0) | (b<0) | (d<0) | (r<0) | (sum(s<0)>0)) stop('Parameters a,b,d,r,s must be >=0')
if ((sum(a)+sum(s))==0) { 
  if (b<=0) stop('Non-valid parameters. b must be > 0')
  if (c<=0) stop('Non-valid parameters. c must be > 0')
} else {
  if (b+sum(.5*a-1.5)+1<=0) stop('Non-valid parameters. b+sum(.5*a-1.5)+1 must be > 0')
  if (c+sum(a*log(s/a))<=0) stop('Non-valid parameters. c+sum(a*log(s/a)) must be > 0')
}
if (length(a)!=length(s)) stop('Arguments a and s must be vectors of the same length')

x <- as.double(x); n <- as.integer(length(x)); a <- as.double(a); b <- as.double(b); c <- as.double(c); d <- as.double(d)
r <- as.double(r);s <- as.double(s); y <- double(n); newton <- as.integer(newton)
normk <- as.double(-1)            #indicates the C routine to calculate the normalization constant
z <- .C("dcgammaC",y=y,normk,x,n,a,b,c,d,r,s,as.integer(length(a)),newton)
return(z$y)
}
findgenes.gagafit <- function(gg.fit,x,groups,fdrmax=.05,parametric=TRUE,B=500) {

centers <- 1; v <- gg.fit$pp
if (!is.matrix(v)) stop('gg.fit$pp must be a matrix containing posterior probabilities of each expression pattern')
cf <- as.double(2)

if (is(x, "exprSet") | is(x,"ExpressionSet")) {
  if (is.character(groups) && length(groups)==1) { groups <- as.factor(pData(x)[, groups]) }
  x <- exprs(x)
} else if (!is(x,"data.frame") & !is(x,"matrix")) { stop("x must be an exprSet, data.frame or matrix") }

patterns <- gg.fit$patterns
groupsr <- groups2int(groups,patterns); K <- as.integer(max(groupsr)+1)
if (length(groups) != ncol(x)) stop('groups must have length equal to the number of columns in x')
if (K==1) stop('At least two different groups must be specified')
if (ncol(patterns)!=K) stop('patterns must have number of columns equal to the number of distinct elements in groups')
for (i in 1:nrow(patterns)) { patterns[i,] <- as.integer(as.integer(as.factor(patterns[i,]))-1) }
ngrouppat <- as.integer(apply(patterns,1,'max')+1)
par <- getpar(gg.fit)
a0 <- as.double(par$a0); nu <- as.double(par$nu); balpha <- as.double(par$balpha); nualpha <- as.double(par$nualpha); probclus <- as.double(par$probclus); probpat <- as.double(par$probpat)
cluslist <- as.integer(c((0:(length(probclus)-1)),-1))
if (B<10) { warning('B was set to less than 10, too small a number of permutations. Increased to B=10'); B <- 10 }

sumx <- double(nrow(x)*sum(ngrouppat)); nobsx <- double(sum(ngrouppat))
sumxpred <- double(nrow(x)*sum(ngrouppat)); nobsxpred <- double(sum(ngrouppat))
if (gg.fit$equalcv) {
  prodx <- prodxpred <- double(nrow(x))
} else {
  prodx <- prodxpred <- double(nrow(x)*sum(ngrouppat))
}
gapprox <- 1

nsel <- nrow(v); sel <- as.integer((1:nsel)-1)
util <- as.integer(1)
u <- double(1); d <- integer(nrow(v)); fdr <- fnr <- power <- double(1); threshold <- double(1)
z <- .C("utgene_parC",u=u,d=d,fdr=fdr,fnr=fnr,power=power,threshold=threshold,util=util,as.double(cf),nsel,sel,as.double(t(v)),as.integer(ncol(v)),as.double(fdrmax))
fdr <- fdrest <- fdrpar <- z$fdr

if (parametric==FALSE) {
  fdrseq <- as.double(seq(fdrmax/1000,min(fdrmax*2,1),length=1000))
  fdrest <- double(length(fdrseq))
  cat("Finding clusters of z-scores for bootstrap... ")
  m <- rowMeans(x); s <- sqrt((rowMeans(x^2)-rowMeans(x)^2)*ncol(x)/(ncol(x)-1))
  zscore <- (x-m)/s
  if (centers>1) {
    nquant <- min(10,ncol(x))
    qx <- t(matrix(unlist(apply(zscore,1,'quantile',probs=seq(0,1,length=nquant))),nrow=nquant))
    zcluster <- kmeans(qx,centers=centers,iter.max=50)
    zclustsize <- as.integer(table(zcluster$cluster))
    index <- as.integer(order(zcluster$cluster)-1)
  } else if ((centers==1) | (centers==0)) {
    zclustsize <- nrow(x)
    index <- as.integer(0:(nrow(x)-1))
  } else { stop('centers must be an integer >=0') }
  znclust <- as.integer(centers); niter <- 10

  cat("Done\nStarting",B,"bootstrap iterations...\n")
  znp <- .C("expected_fp",efp=fdrest,fdrseq,as.integer(length(fdrseq)),as.integer(B),as.integer(niter),as.double(t(zscore)),as.double(m),as.double(s),index,znclust,zclustsize,as.integer(nrow(x)),as.integer(ncol(x)),as.double(t(x)),as.integer(groupsr),as.integer(ncol(patterns)),as.double(a0),as.double(nu),as.double(balpha),as.double(nualpha),as.integer(gg.fit$equalcv),as.integer(length(probclus)),cluslist,as.double(t(probclus)),as.double(t(probpat)),as.integer(nrow(patterns)),as.integer(t(patterns)),ngrouppat,sumx,prodx,nobsx,sumxpred,prodxpred,nobsxpred,as.integer(gapprox))
  fdrest <- znp$efp
  fdrpar <- fdrseq[abs(fdrest-fdrmax)==min(abs(fdrest-fdrmax))][1]
  fdr <- fdrest[abs(fdrest-fdrmax)==min(abs(fdrest-fdrmax))][1]
  if (fdr>1.1*fdrmax) {
    warning('estimated FDR too large. Try decreasing fdrmax')
    #fdrpar <- 0
  }
  z <- .C("utgene_parC",u=u,d=d,fdr=fdr,fnr=fnr,power=power,threshold=threshold,util=util,as.double(cf),nsel,sel,as.double(t(v)),as.integer(ncol(v)),as.double(fdrpar))
  fdrest <- data.frame(fdrseq=fdrseq,fdrest=fdrest)
}

return(list(efp=z$u,d=z$d,fdr=fdr,fdrpar=fdrpar,fdrest=fdrest,fnr=z$fnr,power=z$power,threshold=z$threshold))

}
findgenes <- function(gg.fit,x,groups,fdrmax=.05,parametric=TRUE,B=500) { UseMethod("findgenes") }
fitGG <- function(x,groups,patterns,equalcv=TRUE,nclust=1,method='quickEM',B,priorpar,parini,trace=TRUE) {

#Input processing: check errors, format and set missing parameters to default
if (missing(B)) { if (method=='SA') { B <- 200 } else if (method=='MH' | method=='Gibbs') { B <- 1000 } else { B <- 10 } }
gapprox <- TRUE
if (is(x, "exprSet") | is(x, "ExpressionSet")) {
  if (is.character(groups) && length(groups)==1) { groups <- as.factor(pData(x)[, groups]) }
  x <- exprs(x)
} else if (!is(x,"data.frame") & !is(x,"matrix")) { stop("x must be an ExpressionSet, exprSet, data.frame or matrix") }
if (min(x)<0) stop("x can only have positive values")
if (sum(is.na(x))>0) stop("x cannot have any NA values")
if (ncol(x)!=length(groups)) stop('length(groups) must be equal to the number of columns in x')
if ((method=='quickEM') && (nrow(x)>10^4)) x <- x[sample(1:nrow(x),10^4),]  #limit nb genes to 10,000 for speed

K <- length(unique.default(groups))
if (missing(patterns)) { patterns <- rbind(rep(0,K),0:(K-1)); colnames(patterns) <- unique.default(groups) }
groupsr <- groups2int(groups,patterns)

if (length(table(groupsr))!=ncol(patterns)) stop('patterns must have the same number of columns as the number of distinct groups')
if (ncol(patterns)!=K) stop('patterns must have number of columns equal to the number of distinct elements in groups')
if (sum(is.na(patterns))>0) stop('patterns cannot have any NA values')
if (sum(is.nan(patterns))>0) stop('patterns cannot have any NaN values')
if (sum(is.infinite(patterns))>0) stop('patterns cannot have any Inf values')
for (i in 1:nrow(patterns)) { patterns[i,] <- as.integer(as.integer(as.factor(patterns[i,]))-1) }
class(patterns) <- 'gagahyp'
ngrouppat <- as.integer(apply(patterns,1,'max')+1)
nclust <- as.integer(nclust)
npat <- as.integer(nrow(patterns))

# Initialize parameters by method of moments
if (nclust==1) probclusini <- 1
if (missing(parini)) {
  if (trace) cat('Initializing parameters...')
  aest <- rowMeans(x)^2/((rowMeans(x^2)-rowMeans(x)^2)*ncol(x)/(ncol(x)-1)); lest <- 1/rowMeans(x)
  sel <- (aest<quantile(aest,probs=.99,na.rm=TRUE)) & (lest<quantile(lest,probs=.99,na.rm=TRUE))
  aest <- aest[sel]; lest <- lest[sel]
  balphaini <- as.double(mean(aest)^2/var(aest,na.rm=TRUE)); nualphaini <- as.double(mean(aest,na.rm=TRUE))
  if (nclust==1) {
    a0ini <- as.double(mean(lest)^2/var(lest,na.rm=TRUE)); nuini <- as.double(mean(lest,na.rm=TRUE))
    probclusini <- as.double(1)
  } else {
    clusini <- kmeans(x=lest,centers=nclust)$cluster
    a0ini <- as.double(tapply(lest,clusini,'mean')^2/tapply(lest,clusini,'var'))
    nuini <- as.double(tapply(lest,clusini,'mean'))
    probclusini <- as.double(table(clusini)/length(clusini))
  }
  probpatini <- rep(1/nrow(patterns),nrow(patterns))
  if (trace) cat(' Done.\n')
} else {
  if (is.null(parini$a0) | is.null(parini$nu) | is.null(parini$balpha) | is.null(parini$nualpha) | is.null(parini$probpat)) stop('some components of parini are empty')
  a0ini <- parini$a0; nuini <- parini$nu; balphaini <- parini$balpha; nualphaini <- parini$nualpha
  if (nclust==1) { probclusini <- 1 } else { if (is.null(parini$probclus)) { stop('component probclus of parini is empty') } else { probclusini <- parini$probclus/sum(parini$probclus) } }
  probpatini <- parini$probpat/sum(parini$probpat)
}


if (method=='EM' | method=='quickEM') {

  if (method=='quickEM') B <- 1
  balpha <- nualpha <- double(1); alpha0 <- nu <- probclus <- double(nclust); prob <- double(npat)
  lhood <- double(1); trace <- as.integer(trace)
  a0ini <- as.double(a0ini); nuini <- as.double(nuini); balphaini <- as.double(balphaini); nualphaini <- as.double(nualphaini)
  probclusini <- as.double(probclusini); probpatini <- as.double(probpatini)
  groupsr <- as.integer(groupsr); K <- as.integer(K); equalcv <- as.integer(equalcv)
  nclust <- as.integer(nclust); npat <- as.integer(npat); ngrouppat <- as.integer(ngrouppat)
  gapprox <- as.integer(gapprox); trace <- as.integer(trace)
  
  z <- .C("fitEM_ggC",alpha0=alpha0,nu=nu,balpha=balpha,nualpha=nualpha,probclus=probclus,prob=prob,lhood=lhood,as.integer(B),a0ini,nuini,balphaini,nualphaini,probclusini,probpatini,as.integer(nrow(x)),as.integer(ncol(x)),as.double(t(x)),groupsr,K,equalcv,nclust,npat,as.integer(t(patterns)),ngrouppat,gapprox,trace)
  
  parest <- c(alpha0=z$alpha0,nu=z$nu,balpha=z$balpha,nualpha=z$nualpha,probclus=z$probclus,probpat=z$prob)
  gg.fit <- list(parest=parest,mcmc=as.mcmc(NA),lhood=z$lhood,equalcv=equalcv,nclust=nclust,patterns=patterns,method=method)
  class(gg.fit) <- 'gagafit'
  return(gg.fit)

} else if (method=='MH' | method=='Gibbs' | method=='SA') {

  if (trace) cat('Refining initial estimates...')
  eps <- 1; i <- 1
  while ((eps>.001) && (i<=B)) {
    probnew <- colMeans(ppGG(x,groups,a0ini,nuini,balphaini,nualphaini,equalcv,probclusini,probpatini,patterns)$pp)
    eps <- max(abs(probnew-probpatini))
    probpatini <- probnew
    i <- i+1
  }
  if (trace) cat(' Done.\n')

  if (missing(priorpar)) {
    a.alpha0 <-  .0016; b.alpha0 <-  .0001; a.nu <-  .016; b.nu <-  .0016
    a.balpha <-  .004; b.balpha <- .001; a.nualpha <- .004; b.nualpha <- 20
    p.probclus <- as.double(rep(.1,nclust))
    p.probpat <- as.double(rep(.1,npat))
  } else {
    if (is.null(priorpar$a.alpha0) | (is.null(priorpar$b.alpha0)) | (is.null(priorpar$a.nu)) | (is.null(priorpar$b.nu)) | (is.null(priorpar$a.balpha)) | (is.null(priorpar$b.balpha)) | (is.null(priorpar$a.nualpha)) | (is.null(priorpar$b.nualpha))) stop('Some components of priorpar have not been specified')
    a.alpha0 <- priorpar$a.alpha0; b.alpha0 <- priorpar$b.alpha0
    a.nu <- priorpar$a.nu; b.nu <- priorpar$b.nu
    a.balpha <- priorpar$a.balpha; b.balpha <- priorpar$b.balpha
    a.nualpha <- priorpar$a.nualpha; b.nualpha <- priorpar$b.nualpha
    if (nclust>1) { p.probclus <- 1 } else { if (is.null(priorpar$p.probclus)) stop('component p.probclus of priorpar has not been specified') else p.probclus <- priorpar$p.probclus }
    p.probpat <- priorpar$p.probpat
  }

# Call MCMC sampling routine
  balpha <- nualpha <- double(B); alpha0 <- nu <- probclus <- double(B*nclust); prob <- double(B*npat)
  lhood <- double(B); trace <- as.integer(trace)

  if (method=='Gibbs') {
    z <- .C("fit_ggC",alpha0=alpha0,nu=nu,balpha=balpha,nualpha=nualpha,probclus=probclus,prob=prob,lhood=lhood,as.integer(B),as.double(a.alpha0),as.double(b.alpha0),as.double(a.nu),as.double(b.nu),as.double(a.balpha),as.double(b.balpha),as.double(a.nualpha),as.double(b.nualpha),as.double(p.probclus),as.double(p.probpat),a0ini,nuini,balphaini,nualphaini,probclusini,probpatini,as.integer(nrow(x)),as.integer(ncol(x)),as.double(t(x)),as.integer(groupsr),K,as.integer(equalcv),nclust,npat,as.integer(t(patterns)),ngrouppat,as.integer(gapprox),trace)
  } else if (method=='MH' | method=='SA') {
    acprop <- as.double(0)
    h.alpha0 <- h.nu <- h.balpha <- h.nualpha <- h.rho <- h.prob <- as.double(0)
    if (method=='MH') { cmethod <- as.integer(1); Bgibbs <- as.integer(50) } else { cmethod <- as.integer(0); Bgibbs <- as.integer(20) }
    z <- .C("fitMH_ggC",acprop=acprop,alpha0=alpha0,nu=nu,balpha=balpha,nualpha=nualpha,probclus=probclus,prob=prob,lhood=lhood,as.integer(B),as.double(a.alpha0),as.double(b.alpha0),as.double(a.nu),as.double(b.nu),as.double(a.balpha),as.double(b.balpha),as.double(a.nualpha),as.double(b.nualpha),as.double(p.probclus),as.double(p.probpat),a0ini,nuini,balphaini,nualphaini,probclusini,probpatini,as.integer(nrow(x)),as.integer(ncol(x)),as.double(t(x)),as.integer(groupsr),K,as.integer(equalcv),nclust,npat,as.integer(t(patterns)),ngrouppat,as.integer(gapprox),trace,cmethod,Bgibbs,h.alpha0,h.nu,h.balpha,h.nualpha,h.rho,h.prob)
  }
  if (trace) cat('Done.\n')

  gg.fit <- list(parest=NA,mcmc=as.mcmc(data.frame(alpha0=matrix(z$alpha0,nrow=B,ncol=nclust,byrow=TRUE),nu=matrix(z$nu,nrow=B,ncol=nclust,byrow=TRUE),balpha=z$balpha,nualpha=z$nualpha,probclus=matrix(z$probclus,nrow=B,ncol=nclust,byrow=TRUE),probpat=matrix(z$prob,nrow=B,byrow=TRUE))),lhood=z$lhood,equalcv=equalcv,nclust=nclust,patterns=patterns,method=method)
  class(gg.fit) <- 'gagafit'
  return(gg.fit)

}

}
geneclus.gagafit <- function(gg.fit,method='posprob') {

  d <- integer(nrow(gg.fit$pp)); ppat <- double(nrow(gg.fit$pp))
  nsel <- as.integer(nrow(gg.fit$pp)); sel <- as.integer((1:nsel)-1)
  npat <- as.integer(nrow(gg.fit$patterns))
  probpat <- as.double(getpar(gg.fit)$probpat)
  if (method=='likelihood') { gg.fit$pp <- t(t(gg.fit$pp)/probpat); gg.fit$pp <- gg.fit$pp/rowSums(gg.fit$pp) }
  v <- as.double(t(gg.fit$pp))

  z <- .C("geneclus",d=d,ppat=ppat,nsel,sel,v,npat)
  z <- list(d=z$d,posprob=z$ppat,patterns=gg.fit$patterns)
  class(z) <- 'gagaclus'
  return(z)

}
geneclus <- function(gg.fit,method='posprob') { UseMethod("geneclus") }
getpar.gagafit <- function(gg.fit) {
#returns parameter estimates from a 'gagafit' object in a named list
if (sum(is.na(gg.fit$parest)>0)) stop('Parameter estimates not available. Use function parest first.')
nclust <- gg.fit$nclust
return(list(a0=gg.fit$parest[1:nclust],nu=gg.fit$parest[(nclust+1):(2*nclust)],balpha=gg.fit$parest[2*nclust+1],nualpha=gg.fit$parest[2*nclust+2],probclus=gg.fit$parest[(2*nclust+3):(3*nclust+2)],probpat=gg.fit$parest[-1:-(3*nclust+2)]))
}
getpar <- function(gg.fit) { UseMethod("getpar") }
groups2int <- function(groups,patterns) {
#check that names in groups and patterns match
if (is.null(colnames(patterns))) stop('You must specify colnames(patterns)')
if (sum(unique.default(groups)[order(unique.default(groups))]==colnames(patterns)[order(colnames(patterns))])<ncol(patterns)) stop('Group names in colnames(patterns) do no match group names indicated in groups')

#convert groups to integer vector
groupsr <- integer(length(groups))
for (i in 1:ncol(patterns)) { groupsr[groups==colnames(patterns)[i]] <- i-1 }
groupsr <- as.integer(groupsr)
return(groupsr)
}
mcgamma <- function(a,b,c,d,r,s,newton=TRUE) {
# Moments for a conjugate gamma shape distribution */

if ((sum(a<0)>0) | (b<0) | (d<0) | (r<0) | (sum(s<0)>0)) stop('Parameters a,b,d,r,s must be >=0')
if ((sum(a)+sum(s))==0) { 
  if (b<=0) stop('Non-valid parameters. b must be > 0')
  if (c<=0) stop('Non-valid parameters. c must be > 0')
} else {
  if (b+sum(.5*a-1.5)+1<=0) stop('Non-valid parameters. b+sum(.5*a-1.5)+1 must be > 0')
  if (c+sum(a*log(s/a))<=0) stop('Non-valid parameters. c+sum(a*log(s/a)) must be > 0')
}
if (length(a)!=length(s)) stop('Arguments a and s must be vectors of the same length')

a <- as.double(a); b <- as.double(b); c <- as.double(c); d <- as.double(d); r <- as.double(r); s <- as.double(s)
newton <- as.integer(newton)
normk <- m <- as.double(-1); v <- double(1)
z <- .C("mcgammaC",normk=normk,m=m,v=v,a,b,c,d,r,s,as.integer(length(a)),newton)
return(list(m=z$m,v=z$v,normk=z$normk))

}
parest.gagafit <- function(gg.fit,x,groups,burnin,alpha=.05) {

if (missing(x)) stop('argument x must be specified')
if (missing(groups)) stop('argument groups must be specified')
if (is(x, "exprSet") | is(x, "ExpressionSet")) {
  if (is.character(groups) && length(groups)==1) { groups <- as.factor(pData(x)[, groups]) }
  x <- exprs(x)
} else if (!is(x,"data.frame") & !is(x,"matrix")) { stop("x must be an ExpressionSet, exprSet, data.frame or matrix") }

if (ncol(x)!=length(groups)) stop('length(groups) must be equal to the number of columns in x')
groupsr <- groups2int(groups,gg.fit$patterns)

nclust <- gg.fit$nclust
if (gg.fit$method=='EM' | gg.fit$method=='quickEM') {
  a0 <- getpar(gg.fit)$a0; nu <- getpar(gg.fit)$nu
  balpha <- getpar(gg.fit)$balpha; nualpha <- getpar(gg.fit)$nualpha
  probclus <- getpar(gg.fit)$probclus; probpat <- getpar(gg.fit)$probpat
  ci<-list(a0=NA,nu=NA,balpha=NA,nualpha=NA,probclus=NA,probpat=NA)
  pp <- ppGG(x,groups,a0=a0,nu=nu,balpha=balpha,nualpha=nualpha,equalcv=gg.fit$equalcv,probclus=probclus,probpat=probpat,patterns=gg.fit$patterns)
  dic <- NA
} else if (gg.fit$method=='SA') {
  posmax <- (gg.fit$lhood==max(gg.fit$lhood))
  if (nclust>1) {
    a0 <- gg.fit$mcmc[posmax,1:nclust][1,]
    nu <- gg.fit$mcmc[posmax,(nclust+1):(2*nclust)][1,]
    balpha <- gg.fit$mcmc[posmax,2*nclust+1][1]
    nualpha <- gg.fit$mcmc[posmax,2*nclust+2][1]
    probclus <- gg.fit$mcmc[posmax,(2*nclust+3):(3*nclust+2)][1,]
    probpat <- gg.fit$mcmc[posmax,-1:-(3*nclust+2)][1,]
  } else {
    a0 <- gg.fit$mcmc[posmax,1:nclust][1]
    nu <- gg.fit$mcmc[posmax,(nclust+1):(2*nclust)][1]
    balpha <- gg.fit$mcmc[posmax,(2*nclust+1):(3*nclust)][1]
    nualpha <- gg.fit$mcmc[posmax,(3*nclust+1):(4*nclust)][1]
    probclus <- gg.fit$mcmc[posmax,(4*nclust+1):(5*nclust)][1]
    probpat <- gg.fit$mcmc[posmax,-1:-(5*nclust)][1,]
  }
  gg.fit$parest <- c(a0=a0,nu=nu,balpha=balpha,nualpha=nualpha,probclus=probclus,probpat)
  ci<-list(a0=NA,nu=NA,balpha=NA,nualpha=NA,probclus=NA,probpat=NA)
  pp <- ppGG(x,groups,a0=a0,nu=nu,balpha=balpha,nualpha=nualpha,equalcv=gg.fit$equalcv,probclus=probclus,probpat=probpat,patterns=gg.fit$patterns)
  dic <- NA
} else if (gg.fit$method=='MH' | gg.fit$method=='Gibbs') {
  if (missing(burnin)) {
    warning('burnin not specified, discarding 25% of the MCMC samples')
    gg.fit$mcmc <- mcmc(gg.fit$mcmc[-1:-(.25*nrow(gg.fit$mcmc)),])
    gg.fit$lhood <- gg.fit$lhood[-1:-(.25*nrow(gg.fit$mcmc))]
    lhood <- mean(gg.fit$lhood)
  } else {
    if (burnin>=nrow(gg.fit$mcmc)) stop('burnin must be smaller than the number of MCMC samples')
    gg.fit$mcmc <- mcmc(gg.fit$mcmc[-1:-burnin,])
    gg.fit$lhood <- gg.fit$lhood[-1:-burnin]
    lhood <- mean(gg.fit$lhood)
  }

  if (nclust>1) {
    a0 <- colMeans(gg.fit$mcmc[,1:nclust])
    nu <- colMeans(gg.fit$mcmc[,(nclust+1):(2*nclust)])
    balpha <- mean(gg.fit$mcmc[,2*nclust+1])
    nualpha <- mean(gg.fit$mcmc[,2*nclust+2])
    probclus <- colMeans(gg.fit$mcmc[,(2*nclust+3):(3*nclust+2)])
    probpat <- colMeans(gg.fit$mcmc[,-1:-(3*nclust+2)])
    a0.ci <- apply(gg.fit$mcmc[,1:nclust],2,quantile,probs=c(alpha/2,1-alpha/2))
    nu.ci <- apply(gg.fit$mcmc[,(nclust+1):(2*nclust)],2,quantile,probs=c(alpha/2,1-alpha/2))
    balpha.ci <- quantile(gg.fit$mcmc[,2*nclust+1],probs=c(alpha/2,1-alpha/2))
    nualpha.ci <- quantile(gg.fit$mcmc[,2*nclust+2],probs=c(alpha/2,1-alpha/2))
    probclus.ci <- apply(gg.fit$mcmc[,(2*nclust+3):(3*nclust+2)],2,quantile,probs=c(alpha/2,1-alpha/2))
    probpat.ci <- apply(gg.fit$mcmc[,-1:-(3*nclust+2)],2,quantile,probs=c(alpha/2,1-alpha/2))
  } else {
    a0 <- mean(gg.fit$mcmc[,1:nclust])
    nu <- mean(gg.fit$mcmc[,(nclust+1):(2*nclust)])
    balpha <- mean(gg.fit$mcmc[,(2*nclust+1):(3*nclust)])
    nualpha <- mean(gg.fit$mcmc[,(3*nclust+1):(4*nclust)])
    probclus <- mean(gg.fit$mcmc[,(4*nclust+1):(5*nclust)])
    probpat <- colMeans(gg.fit$mcmc[,-1:-(5*nclust)])
    a0.ci <- quantile(gg.fit$mcmc[,1:nclust],probs=c(alpha/2,1-alpha/2))
    nu.ci <- quantile(gg.fit$mcmc[,(nclust+1):(2*nclust)],probs=c(alpha/2,1-alpha/2))
    balpha.ci <- quantile(gg.fit$mcmc[,2*nclust+1],probs=c(alpha/2,1-alpha/2))
    nualpha.ci <- quantile(gg.fit$mcmc[,2*nclust+2],probs=c(alpha/2,1-alpha/2))
    probclus.ci <- c(1,1)
    probpat.ci <- apply(gg.fit$mcmc[,-1:-(3*nclust+2)],2,quantile,probs=c(alpha/2,1-alpha/2))
  }

  gg.fit$parest <- c(a0=a0,nu=nu,balpha=balpha,nualpha=nualpha,probclus=probclus,probpat)
  ci<-list(a0=a0.ci,nu=nu.ci,balpha=balpha.ci,nualpha=nualpha.ci,probclus=probclus.ci,probpat=probpat.ci)
  pp <- ppGG(x,groups,a0=a0,nu=nu,balpha=balpha,nualpha=nualpha,equalcv=gg.fit$equalcv,probclus=probclus,probpat=probpat,patterns=gg.fit$patterns)
  dic <- -2*(2*lhood-pp$lhood)
}

gg.fit$ci <- ci; gg.fit$pp <- pp$pp; gg.fit$dic <- dic
return(gg.fit)
}
parest <- function(gg.fit,x,groups,burnin,alpha=.05) { UseMethod("parest") }
posmeansGG.gagafit <- function(gg.fit,x,groups,sel,underpattern) {
# Posterior means for each gene, assuming that pattern==underpattern holds for genes

gapprox <- TRUE
if (is(x, "exprSet") | is(x,"ExpressionSet")) {
  if (is.character(groups) && length(groups)==1) { groups <- as.factor(pData(x)[, groups]) }
  x <- exprs(x)
} else if (!is(x,"data.frame") & !is(x,"matrix")) { stop("x must be an exprSet, data.frame or matrix") } 

patterns <- gg.fit$patterns
v <- gg.fit$pp

groupsr <- groups2int(groups,patterns); K <- as.integer(max(groupsr)+1)

if (ncol(x)!=length(groups)) stop('length(groups) must be equal to the number of columns in x')
if (!is.matrix(v)) stop('Argument v must be a matrix')
if (ncol(v)!=nrow(patterns)) stop('Argument v must have as many columns as rows has patterns')
if (nrow(x)!=nrow(v)) stop('Arguments x and v must have the same number of rows')

if (ncol(patterns)!=K) stop('patterns must have number of columns equal to the number of distinct elements in groups')
for (i in 1:nrow(patterns)) { patterns[i,] <- as.integer(as.integer(as.factor(patterns[i,]))-1) }
ngrouppat <- as.integer(apply(patterns,1,'max')+1)
par <- getpar(gg.fit)
alpha0 <- as.double(par$a0); nu <- as.double(par$nu); balpha <- as.double(par$balpha)
nualpha <- as.double(par$nualpha)
nclust <- as.integer(gg.fit$nclust); rho <- as.double(par$probclus)
sumx <- double(nrow(x)*sum(ngrouppat)); nobsx <- double(sum(ngrouppat))
if (gg.fit$equalcv) {
  prodx <- double(nrow(x))
} else {
  prodx <- double(nrow(x)*sum(ngrouppat))
}
gapprox <- as.integer(gapprox)

if (missing(underpattern)) {
  underpattern <- nrow(gg.fit$patterns)-1
} else if (underpattern>nrow(gg.fit$patterns)) {
  stop('The specified pattern number is not valid')
}
underpattern <- as.integer(underpattern)
cat(' Computing posterior means under expression pattern',underpattern,'...\n')

if (missing(sel)) sel <- 1:nrow(x)
if (is.logical(sel)) sel <- (1:nrow(x))[sel]
sel <- as.integer(sel-1) #in C indices start at 0
nsel <- length(sel)
posmeans <- double(nsel*K)

z <- .C("compute_sumxC",sumx=t(sumx),prodx=t(prodx),nobsx=nobsx,as.integer(gg.fit$equalcv),nsel,sel,as.integer(sum(ngrouppat)),ncol(x),as.double(t(x)),groupsr,K,as.integer(nrow(patterns)),as.integer(t(patterns)),ngrouppat,as.integer(1))
sumx <- matrix(z$sumx,nrow=nrow(x),byrow=TRUE); prodx <- matrix(z$prodx,nrow=nrow(x),byrow=TRUE)
nobsx <- z$nobsx

z <- .C("posmeans_ggC",posmeans=posmeans,underpattern,K,nsel,sel,alpha0,nu,balpha,nualpha,as.integer(gg.fit$equalcv),nclust,rho,as.double(t(v)),as.integer(nrow(patterns)),as.integer(t(patterns)),ngrouppat,t(sumx),t(prodx),nobsx,gapprox)

x <- data.frame(matrix(z$posmeans,nrow=nsel,byrow=TRUE))
for (i in 1:K) { names(x)[i] <- as.character(groups[groupsr==i-1][1]) }
return(x)

}
posmeansGG <- function(gg.fit,x,groups,sel,underpattern) { UseMethod("posmeansGG") }
powclasspred.gagafit <- function(gg.fit,x,groups,prgroups,v0thre=1,ngene=100,B=100) {
# Estimate expected probability that a future sample is correctly classified.

gapprox <- TRUE
patterns <- gg.fit$patterns
if (is(x, "exprSet") | is(x,"ExpressionSet")) {
  if (is.character(groups) && length(groups)==1) { groups <- as.factor(pData(x)[, groups]) }
  x <- exprs(x)
} else if (!is(x,"data.frame") & !is(x,"matrix")) { stop("x must be an exprSet, data.frame or matrix") }
if (ncol(x)!=length(groups)) stop('Argument groups must have length equal to number of columns in argument x')
par <- getpar(gg.fit)
a0 <- as.double(par$a0); nu <- as.double(par$nu); balpha <- as.double(par$balpha)
nualpha <- as.double(par$nualpha); probclus <- as.double(par$probclus); probpat <- as.double(par$probpat); nclust <- as.integer(gg.fit$nclust)
if (nrow(patterns)!=length(probpat)) stop('Argument patterns must be equal to the length of gg.fit$prob')
if ((missing(genelimit)) & (v0thre==1)) warning("You selected to use all genes. It's recommended to narrow the selection with the arguments v0thre and genelimit")
if (missing(genelimit)) { genelimit <- nrow(x); }

genelimit <- as.integer(genelimit); v0thre <- as.double(v0thre)
usesel <- as.integer(0); nsel <- as.integer(0); sel <- integer(nrow(x))

npat <- as.integer(nrow(patterns))
groupsr <- groups2int(groups,patterns); K <- as.integer(max(groupsr)+1)
if (missing(prgroups)) prgroups <- rep(1/K,K)
if (ncol(patterns)!=K) stop('patterns must have number of columns equal to the number of distinct elements in groups')
for (i in 1:nrow(patterns)) { patterns[i,] <- as.integer(as.integer(as.factor(patterns[i,]))-1) }
ngrouppat <- as.integer(apply(patterns,1,'max')+1)
ncolsumx <- as.integer(sum(ngrouppat))
sumx <- double(nrow(x)*ncolsumx); nobsx <- double(ncolsumx)
if (gg.fit$equalcv) {
  prodx <- double(nrow(x))
} else {
  prodx <- double(nrow(x)*sum(ngrouppat))
}
usesumx <- as.integer(0); gapprox <- as.integer(gapprox)

ccall <- seccall <- double(1); ccgroup <- double(K); ngroup <- integer(K); preceps <- as.double(0)

v <- ppGG(x=x,groups=groups,a0=a0,nu=nu,balpha=balpha,nualpha=nualpha,equalcv=gg.fit$equalcv,probclus=probclus,probpat=probpat,patterns=patterns)$pp

z <- .C("utsample_ggC",ccall=ccall,seccall=seccall,ccgroup=ccgroup,ngroup=ngroup,as.integer(B),preceps,genelimit,v0thre,nsel,sel,usesel,as.integer(nrow(x)),as.integer(ncol(x)),as.double(t(x)),as.integer(groupsr),as.double(t(v)),K,as.double(prgroups),a0,nu,balpha,nualpha,as.integer(gg.fit$equalcv),nclust,probclus,probpat,npat,as.integer(t(patterns)),ngrouppat,ncolsumx,sumx,prodx,nobsx,usesumx,gapprox)

ccgroup <- z$ccgroup/z$ngroup
return(list(ccall=z$ccall,seccall=z$seccall,ccgroup=ccgroup,segroup=ccgroup*(1-ccgroup)/sqrt(z$ngroup)))

}
powclasspred <- function(gg.fit,x,groups,prgroups,v0thre=1,ngene=100,B=100) { UseMethod("powclasspred") }
ppGG <- function(x,groups,a0,nu,balpha,nualpha,equalcv,probclus,probpat,patterns) {
# Posterior probabilities of DE in GaGa and MiGaGa models given data and hyper-param estimate

gapprox <- TRUE
if (is(x, "exprSet") | is(x,"ExpressionSet")) {
  if (is.character(groups) && length(groups)==1) { groups <- as.factor(pData(x)[, groups]) }
  x <- exprs(x)
} else if (!is(x,"data.frame") & !is(x,"matrix")) { stop("x must be an exprSet, data.frame or matrix") }

groupsr <- groups2int(groups,patterns)
K <- as.integer(max(groupsr)+1)
if (ncol(x)!=length(groups)) stop('length(groups) must be equal to the number of columns in x')
if (missing(a0)) stop('a0 must be specified')
if (missing(nu)) stop('nu must be specified')
if (missing(balpha)) stop('balpha must be specified')
if (missing(nualpha)) stop('nualpha must be specified')
if (!is.vector(probclus)) stop('probclus must be a vector')
if (!is.vector(probpat)) stop('probpat must be a vector')
if ((length(a0)!=length(nu)) || (length(a0)!=length(probclus))) stop('a0,nu and probclus must have the same length')
if (length(balpha)>1 || length(nualpha)>1) stop('balpha and nualpha must be vectors of length 1')

if (ncol(patterns)!=K) stop('patterns must have number of columns equal to the number of distinct elements in groups')
for (i in 1:nrow(patterns)) { patterns[i,] <- as.integer(as.integer(as.factor(patterns[i,]))-1) }
ngrouppat <- as.integer(apply(patterns,1,'max')+1)
v <- double(nrow(x)*nrow(patterns)); lhood <- double(1)
usesumx <- as.integer(0)
sumx <- double(nrow(x)*sum(ngrouppat)); nobsx <- double(sum(ngrouppat))
sumxpred <- double(nrow(x)*sum(ngrouppat));  nobsxpred <- double(sum(ngrouppat));
if (as.logical(equalcv)) {
  prodx <- double(nrow(x));
  prodxpred <- double(nrow(x));
} else {
  prodx <- double(nrow(x)*sum(ngrouppat));
  prodxpred <- double(nrow(x)*sum(ngrouppat));
}
nsel <- nrow(x); sel <- as.integer(0:(nsel-1))
cluslist <- as.integer(c((0:(length(probclus)-1)),-1))
z <- .C("pp_ggC",v=v,lhood=lhood,nsel,sel,as.integer(ncol(x)),as.double(t(x)),groupsr,as.integer(ncol(patterns)),as.double(a0),as.double(nu),as.double(balpha),as.double(nualpha),as.integer(equalcv),as.integer(length(probclus)),cluslist,as.double(t(probclus)),as.double(t(probpat)),as.integer(nrow(patterns)),as.integer(t(patterns)),ngrouppat,sumx,prodx,nobsx,sumxpred,prodxpred,nobsxpred,usesumx,as.integer(gapprox))
v <- matrix(z$v,nrow=nrow(x),byrow=TRUE)

return(list(pp=v,lhood=z$lhood))

}
print.gagaclus <- function(x,...) {
  print(x$patterns,table(factor(x$d,levels=0:nrow(x$patterns))))
}
print.gagafit <- function(x,...) {

if (x$nclust==1) cat("GaGa hierarchical model.") else cat("MiGaGa hierarchical model (",round(x$nclust)," clusters.",sep="")
if (x$method=='EM') cat(" Fit via Expectation-Maximization\n") else if (x$method=='quickEM') cat("Fit via quick Expectation-Maximization\n") else if (x$method=='Gibbs') cat(" Fit via Gibbs sampling (",nrow(x$mcmc)," iterations kept)\n",sep="") else if (x$method=='MH') cat(" Fit via Metropolis-Hastings sampling (",nrow(x$mcmc)," iterations kept)\n",sep="") else if (x$method=='SA') cat(" Fit via Simulated Annealing (",nrow(x$mcmc)," iterations)\n",sep="")
if (x$equalcv) cat("Assumed constant CV across groups \n") else cat("Assumed varying CV across groups\n")

if (is.null(x$pp)) {
  cat("  ",ncol(x$patterns)," groups, ",nrow(x$patterns)," hypotheses (expression patterns)\n\n",sep="") } else {
  cat("  ",nrow(x$pp)," genes, ",ncol(x$patterns)," groups, ",nrow(x$patterns)," hypotheses (expression patterns)\n\n",sep="")
}
cat("The expression patterns are\n")
if (sum(is.na(x$parest))==0) { probpat <- getpar(x)$probpat } else { probpat <- NA }
print(x$patterns,probpat)
cat("\n")
if (!is.na(x$parest[1])) {
  cat("Hyper-parameter estimates\n\n")
  par <- getpar(x)
  cat("  ",names(x$parest)[1:(2+2*x$nclust)],"\n")
  cat("  ",round(par$a0,3),round(par$nu,3),round(par$balpha,3),round(par$nualpha,3),"\n\n")
  cat("  ",names(x$parest)[(2+2*x$nclust+1):(2+3*x$nclust)],"\n")
  cat("  ",round(par$probclus,3),"\n\n")
} else {
  cat("Hyper-parameter estimates not computed yet. Run function parest.\n")
}
if (is.null(x$pp)) { cat("Posterior probabilities not computed yet. Run function parest.\n") }
}
print.gagahyp <- function(x,probpat=NA,...) {
  groups <- colnames(x)
  for (i in 1:nrow(x)) {
    cnt <- length(table(x[i,]))
    cat("  Pattern ",i-1," ",sep="")
    if (sum(is.na(probpat))==0) {
      if (is.integer(probpat)) {
        cat("(",probpat[i]," genes): ",sep='') 
      } else {
        cat("(",round(100*probpat[i],1),"% genes): ",sep='')
      }
    } else { cat(": ") }
    for (j in 0:(cnt-1)) {
      if (j<(cnt-1)) { eq <- c(rep(" =",sum(x[i,]==j)-1),"!=") } else { eq <- c(rep("=",sum(x[i,]==j)-1),"\n") }
      cat(paste(groups[x[i,]==j],eq))
    }
  }
}
rcgamma <- function(n,a,b,c,d,r,s,newton=TRUE) {
# Random draws from a conjugate gamma shape distribution by approximating it with a Gamma

if ((sum(a<0)>0) | (b<0) | (d<0) | (r<0) | (sum(s<0)>0)) stop('Parameters a,b,d,r,s must be >=0')
if ((sum(a)+sum(s))==0) { 
  if (b<=0) stop('Non-valid parameters. b must be > 0')
  if (c<=0) stop('Non-valid parameters. c must be > 0')
} else {
  if (b+sum(.5*a-1.5)+1<=0) stop('Non-valid parameters. b+sum(.5*a-1.5)+1 must be > 0')
  if (c+sum(a*log(s/a))<=0) stop('Non-valid parameters. c+sum(a*log(s/a)) must be > 0')
}
if (length(a)!=length(s)) stop('Arguments a and s must be vectors of the same length')

n <- as.integer(n); a <- as.double(a); b <- as.double(b); c <- as.double(c); d <- as.double(d); r <- as.double(r); s <- as.double(s)
newton <- as.integer(newton)

x <- double(n)
z <- .C("rcgammaC",x=x,n,a,b,c,d,r,s,as.integer(length(a)),newton)
return(z$x)

}
simGG <- function(n,m,p.de=.1,a0,nu,balpha,nualpha,equalcv=TRUE,probclus=1,a=NA,l=NA,useal=FALSE) {
  # Simulates data from the GaGa model with several groups

  if (n<=0) stop("Number of genes must be positive")
  if (sum(m<0)>0) stop("Number of observations per group must be positive")
  if (!missing(a)) { if (length(m)!=ncol(a)) stop("length(m) must be equal to ncol(a)") }
  if (!missing(l)) { if (length(m)!=ncol(l)) stop("length(m) must be equal to ncol(l)") }
  
  if (useal==FALSE) {
    if (p.de<0 & p.de>1) stop("proportion of differentially expressed genes must be between 0 and 1")
    if (balpha<=0 | nualpha<=0) stop("balpha and nualpha must be >0")
    if (min(a0)<=0 | min(nu)<=0) stop(cat("a0 and nu must be >0 but a0=",a0,", nu=",nu,"was specified \n"))

    a0 <- rep(a0,round(probclus*n)); nu <- rep(nu,round(probclus*n))
    balpha <- rep(balpha,length(a0)); nualpha <- rep(nualpha,length(nu))
 
    a <- l <- matrix(NA,nrow=n,ncol=length(m))
    a[,1] <- rgamma(n,balpha,balpha/nualpha); l[,1] <- 1/rgamma(n,a0,a0/nu)
    if (ncol(a)>1) {
      if ((round((1-p.de)*n)+1)<=n) {   #generate parameter values for DE genes
        sel <- sample(1:n,round(p.de*n),replace=FALSE)
        for (i in 2:length(m)) {
          l[,i] <- l[,1]; a[,i] <- a[,1]
          if (!equalcv) a[sel,i] <- rgamma(round(p.de*n),balpha[sel],balpha[sel]/nualpha[sel])
          l[sel,i] <- 1/rgamma(round(p.de*n),a0[sel],a0[sel]/nu[sel])
        }
      }
    }
  }
  x <- matrix(rgamma(m[1]*n,a[,1],a[,1]/l[,1]),nrow=n,ncol=m[1])
  i <- 2
  while (i<=length(m)) {
    x <- cbind(x,matrix(rgamma(m[i]*n,a[,i],a[,i]/l[,i]),nrow=n,ncol=m[i]))
    i <- i+1
  }

  metadata <- data.frame(labelDescription='Group that each array belongs to',row.names='group')
  group <- paste('group',1:length(m))
  pheno <- new("AnnotatedDataFrame", data=data.frame(group=rep(group,each=m)), dimLabels=c("rowNames", "columnNames"), varMetadata=metadata)
  sampleNames(pheno) <- paste("Array",1:nrow(pheno))
  metadata <- data.frame(labelDescription=c(paste('alpha parameter for array',1:length(m)),paste('mean parameter for array',1:length(m))),row.names=c(paste('alpha',1:length(m),sep='.'),paste('mean',1:length(m),sep='.')))
  fdata <- new("AnnotatedDataFrame", data=data.frame(a,l),varMetadata=metadata)
  sampleNames(fdata) <- paste("Gene ",1:n)
  varLabels(fdata) <- c(paste('alpha',1:length(m),sep='.'),paste('mean',1:length(m),sep='.'))
  experimentData <- new("MIAME", title = "Dataset simulated with simGG", abstract = "This dataset contains simulated expression data from a GaGa model. The expression data can be accessed via exprs(object), and the parameter values used to generate the data through fData(object)")
  x <- new("ExpressionSet", phenoData=pheno, featureData=fdata, exprs = x, experimentData = experimentData)
  
  return(x)
}
simnewsamples.gagafit <- function(gg.fit,groupsnew,sel,x,groups) {
# Simulate parameters from the posterior of a GaGa or MiGaGa model, and new observations from the posterior predictive

gapprox <- TRUE
if (is(x, "exprSet") | is(x,"ExpressionSet")) {
  if (is.character(groups) && length(groups)==1) { groups <- as.factor(pData(x)[, groups]) }
  x <- exprs(x)
} else if (!is(x,"data.frame") & !is(x,"matrix")) { stop("x must be an exprSet, data.frame or matrix") } 

patterns <- gg.fit$patterns
v <- gg.fit$pp

groupsr <- groups2int(groups,patterns); K <- as.integer(max(groupsr)+1)
groupsnewr <- groups2int(groupsnew,patterns)
#groupsnewr <- integer(length(groupsnew))
#for (i in 1:ncol(patterns)) { groupsnewr[groupsnew == colnames(patterns)[i]] <- i - 1 }
#groupsnewr <- as.integer(groupsnewr)

if ((max(groupsnewr)>max(groupsr)) | (min(groupsnewr)<min(groupsr))) stop('Groups indicated in groupsnew do not match with those indicated in groups')
if (ncol(x)!=length(groups)) stop('length(groups) must be equal to the number of columns in x')
if (!is.matrix(v)) stop('Argument v must be a matrix')
if (ncol(v)!=nrow(patterns)) stop('Argument v must have as many columns as rows has patterns')
if (nrow(x)!=nrow(v)) stop('Arguments x and v must have the same number of rows')

if (ncol(patterns)!=K) stop('patterns must have number of columns equal to the number of distinct elements in groups')
for (i in 1:nrow(patterns)) { patterns[i,] <- as.integer(as.integer(as.factor(patterns[i,]))-1) }
ngrouppat <- as.integer(apply(patterns,1,'max')+1)
par <- getpar(gg.fit)
alpha0 <- as.double(par$a0); nu <- as.double(par$nu); balpha <- as.double(par$balpha)
nualpha <- as.double(par$nualpha)
nclust <- as.integer(gg.fit$nclust); rho <- as.double(par$probclus)
sumx <- double(nrow(x)*sum(ngrouppat)); nobsx <- double(sum(ngrouppat))
if (gg.fit$equalcv) {
  prodx <- double(nrow(x))
} else {
  prodx <- double(nrow(x)*sum(ngrouppat))
}
gapprox <- as.integer(gapprox)

if (missing(sel)) sel <- 1:nrow(x)
if (is.logical(sel)) sel <- (1:nrow(x))[sel]
sel <- as.integer(sel-1) #in C indices start at 0
nsel <- length(sel); nsamples <- length(groupsnewr)
xnew <- anew <- lnew <- double(nsel*nsamples); dnew <- integer(nsel*nsamples)

z <- .C("compute_sumxC",sumx=t(sumx),prodx=t(prodx),nobsx=nobsx,as.integer(gg.fit$equalcv),nsel,sel,as.integer(sum(ngrouppat)),ncol(x),as.double(t(x)),groupsr,K,as.integer(nrow(patterns)),as.integer(t(patterns)),ngrouppat,as.integer(1))
sumx <- matrix(z$sumx,nrow=nrow(x),byrow=TRUE); prodx <- matrix(z$prodx,nrow=nrow(x),byrow=TRUE)
nobsx <- z$nobsx

z <- .C("simnewsamples_ggC",xnew=xnew,dnew=dnew,anew=anew,lnew=lnew,nsamples,groupsnewr,K,nsel,sel,alpha0,nu,balpha,nualpha,as.integer(gg.fit$equalcv),nclust,rho,as.double(t(v)),as.integer(nrow(patterns)),as.integer(t(patterns)),ngrouppat,t(sumx),t(prodx),nobsx,gapprox)

metadata <- data.frame(labelDescription='Group that each array belongs to',row.names='group')
pheno <- new("AnnotatedDataFrame", data=data.frame(group=groupsnew), dimLabels=c("rowNames", "columnNames"), varMetadata=metadata)
sampleNames(pheno) <- paste("Array",1:nrow(pheno))

metadata <- data.frame(labelDescription=c(paste('Expression patterns for array',1:length(groupsnewr)),paste('alpha parameters for array',1:length(groupsnewr)),paste('mean parameters for array',1:length(groupsnewr))),row.names=c(paste('d',1:length(groupsnewr),sep='.'),paste('alpha',1:length(groupsnewr),sep='.'),paste('mean',1:length(groupsnewr),sep='.')))
fdata <- new("AnnotatedDataFrame", data=data.frame(matrix(z$dnew,nrow=nsel,byrow=TRUE),matrix(z$anew,nrow=nsel,byrow=TRUE),matrix(z$lnew,nrow=nsel,byrow=TRUE)),varMetadata=metadata)
sampleNames(fdata) <- paste("Gene",1:nrow(fdata))
varLabels(fdata) <- c(paste('d',1:length(groupsnewr),sep='.'),paste('alpha',1:length(groupsnewr),sep='.'),paste('mean',1:length(groupsnewr),sep='.'))

experimentData <- new("MIAME", title = "Dataset simulated with simnewsamples", abstract = "This dataset contains expression data simulated from the posterior predictive distribution of a GaGa or MiGaGa. The expression data can be accessed via exprs(object), and the parameter values used to generate the data through fData(object)")

x <- data.frame(matrix(z$xnew,nrow=nsel,byrow=TRUE))
names(x) <- paste('Array',1:nrow(pheno)); rownames(x) <- paste('Gene',1:nrow(fdata))
x <- new("ExpressionSet", phenoData=pheno, featureData=fdata, exprs = x, experimentData = experimentData)
return(x)

}
simnewsamples <- function(gg.fit,groupsnew,sel,x,groups) { UseMethod("simnewsamples") }
