\name{ebam}
\alias{ebam}
\title{Empirical Bayes Analysis of Microarrays}
\description{
    Performs an Empirical Bayes Analysis of Microarrays for a specified value of the
    fudge factor \eqn{a_0}{a0}. Modified versions of the t statistics are used.
}
\usage{
    ebam(a0.out,data,a0=NA,p0=NA,delta=NA,stable=TRUE,number.int=139,local.bin=.1,
    col.accession=NA,col.gene.name=NA,q.values=TRUE,R.fold=TRUE,R.dataset=data,na.rm=FALSE,
    file.out=NA)
}

\arguments{
    \item{a0.out}{the object to which the output of a previous analysis with \code{find.a0}
        was assigned.}
    \item{data}{the data set that should be analyzed. Each row of this data set must
        correspond to a gene. It has to be the same data set that was used in 
        \code{find.a0}.}
    \item{a0}{the fudge factor. If \code{NA}, the value suggested by \code{find.a0}
        will be used.}
    \item{p0}{prior probability that a gene is differentially expressed. If not specified
        (i.e. \code{NA}), it will automatically be computed.}
    \item{delta}{a gene will be called differentially expressed, if its posterior
        probability of being differentially expressed is large than or equal to
        \code{delta}. By default, the same \code{delta} is used as in \code{find.a0}.}
    \item{stable}{if \code{TRUE} (default), \eqn{p_0}{p0} will be computed by the algorithm of
        Storey and Tibshirani (2003). If \code{FALSE}, the (unstable) estimate will be computed
        that ensures that the posterior probability of being differentially expressed
        is always nonnegative.}
    \item{number.int}{the number of equally spaced intervals that is used in the 
        logistic regression for the estimation of the ratio of the null density to the mixture
        density.}
    \item{local.bin}{specifies the interval used in the estimation of the local FDR for the
        expression score \eqn{z}. By default, this interval is \eqn{[z-0.1,z+0.1]}.}
    \item{col.accession}{the column of \code{data} containing the accession numbers of
        the genes. If specified, the accession numbers of the significant genes
        will be added to the output.}
    \item{col.gene.name}{the column of \code{data} that contains the names of the genes.
        If specified, the names of the significant genes will be added to the output.}
    \item{q.values}{if \code{TRUE} (default), the q-value for each gene will be computed.}
    \item{R.fold}{if \code{TRUE} (default), the fold change for each differentially
        expressed gene will be computed.}
    \item{R.dataset}{the data set used in the computation of the fold change. This data
        set can be a transformed version of \code{data}.}
    \item{na.rm}{if \code{FALSE} (default), the fold change of genes with at least one
        missing value will be set to \code{NA}. If \code{TRUE}, missing values will be
        replaced by the genewise mean.}
    \item{file.out}{if specified, general information like the number of significant 
        genes and the estimated FDR and gene-specific information like the expression
        scores, the q-values, the R fold etc. of the differentially expressed genes
        are stored in this file.}
}


\value{a plot of the expression scores against their posterior probability of
    being differentially expressed, and (optional) a file containing general information
    like the estimated FDR and the number of differentially expressed genes and 
    gene-specific information about the differentially expressed genes like their names,
    their expression scores, q values and their fold changes.

    \item{FDR}{vector containing the estimated \eqn{p_0}{p0}, the number of significant genes,
        the number of falsely called genes and the estimated FDR.}
    \item{ebam.out}{table containing gene-specific information about the differentially
        expressed genes.}
    \item{row.sig.genes}{vector consisting of the row numbers that belong to the differentially
        expressed genes.}
    \item{...}
}

\note{The number of false positives are computed by \eqn{p_0}{p0} times the number of falsely
    called genes.}

\seealso{
    \code{\link{find.a0}}   \code{\link{ebam.wilc}}
}

\references{
    Efron, B., Tibshirani, R., Storey, J.D., and Tusher, V. (2001). Empirical Bayes Analysis
    of a Microarray Experiment, \emph{JASA}, 96, 1151-1160.}

    Storey, J.D., and Tibshirani, R. (2003). Statistical significance for genome-wide
    experiments, \emph{Technical Report}, Department of Statistics, Stanford University.

    Schwender, H. (2003). Assessing the false discovery rate in a statistical analysis of
    gene expression data, Chapter 7, \emph{Diploma thesis}, Department of Statistics,
    University of Dortmund, \url{http://de.geocities.com/holgerschw/thesis.pdf}.
}

\author{Holger Schwender, \email{holger.schw@gmx.de}}

\keyword{htest}

\eof
\name{ebam.wilc}
\alias{ebam.wilc}
\title{Empirical Bayes Analysis using Wilcoxon Rank Sums}
\description{Performs an Empirical Bayes Analysis of Microarrays by using Wilcoxon Rank Sums
as expression scores for the genes.}
\usage{
    ebam.wilc(data,x,y,paired=FALSE,delta=.9,p0=NA,stable.p0=TRUE,use.offset=TRUE,use.weights=TRUE,
    ties.rand=TRUE,zero.rand=TRUE,ns.df=5,col.accession=NA,col.gene.name=NA,R.fold=TRUE,
    R.dataset=data,file.out=NA,rand=NA,na.rm=FALSE)
}

\arguments{
    \item{data}{the data set that should be analyzed. Every row of this data set must
         correspond to a gene.}
    \item{x}{vector of the columns of \code{data} that correspond to the treatment group. In the
        paired case, \eqn{(x[i],y[i])} build a pair. If, e.g., the first \eqn{n_1}{n1} columns of 
        \code{data} build the treatment group, \code{x=1:\eqn{n_1}{n1}}.}
    \item{y}{vector of the columns of \code{data} that correspond to the control group. In the
        paired case, \eqn{(x[i], y[i])} are an observation pair.}
    \item{paired}{paired (\code{TRUE}) or unpaired (\code{FALSE}) data. Default is \code{FALSE}.}
    \item{delta}{a gene will be called significant, if its posterior probability of
        being differentially expressed is larger than or equal to \code{delta}.}
    \item{p0}{prior probability that a gene is differentially expressed. If not specified,
        it will automatically be computed.}
    \item{stable.p0}{if \code{TRUE} (default), \eqn{p_0}{p0} will be computed by the algorithm
        of Storey and Tibshirani (2003). If \code{FALSE}, the (unstable) estimate will
        be computed that ensures that the posterior probability of being differentially
        expressed is always nonnegative.}
    \item{use.offset}{if \code{TRUE} (default), an offset will be used in the Poisson regression
        for the estimation of the density of the expression scores of all genes.}
    \item{use.weights}{if \code{TRUE} (default), weights are used in the natural cubic spline
        fit for the estimation of \eqn{p_0}{p0}.}
    \item{ties.rand}{if \code{TRUE} (default), non-integer expression scores will be randomly
        assigned to the next lower or upper integer. Otherwise, they are assigned to
        the integer that is closer to the mean.}
    \item{zero.rand}{if \code{TRUE} (default), the sign of each Zero in the computation of
        the Wilcoxon signed rank sums will be randomly assigned. If \code{FALSE}, the
        sign of the Zeros will be set to '--'.}
    \item{ns.df}{the number of degrees of freedom used in the Poisson regression for the 
        estimation of the mixture density of the expression scores of all genes.}
    \item{col.accession}{the column of \code{data} containing the accession numbers of
        the genes. If specified, the accession numbers of the significant genes
        will be added to the output.}
    \item{col.gene.name}{the column of \code{data} that contains the names of the genes.
        If specified, the names of the significant genes will be added to the output.}
    \item{R.fold}{if \code{TRUE} (default), the fold change for each differentially
        expressed gene will be computed.}
    \item{R.dataset}{the data set used in the computation of the fold change. This data
        set can be a transformed version of \code{data}.}
    \item{file.out}{if specified, general information like the number of significant 
        genes and the estimated FDR and gene-specific information like the expression
        scores, the q-values, the R fold etc. of the differentially expressed genes
        are stored in this file.}
    \item{rand}{if specified, the random number generator will be set in a reproducible state.}
    \item{na.rm}{if \code{FALSE} (default), the fold change of genes with at least one
        missing value will be set to \code{NA}. If \code{TRUE}, missing values will be
        replaced by the genewise mean.}
}

\value{a plot of the expression scores vs.\ their posterior probability of being differentially
    expressed, and (optionally) a file containing general information like the FDR and the
    number of differentially expressed genes and gene-specific information on the differentially
    expressed genes like their names, their q-values and their fold change.

    \item{nsig}{number of significant genes.}
    \item{fdr}{estimated FDR.}
    \item{ebam.output}{table containing gene-specific information on the differentially
        expressed genes.}
    \item{row.sig.genes}{vector containing of the row numbers that belong to the differentially
        expressed genes.}
    \item{...}
}

\seealso{
    \code{\link{ebam}}
}

\references{

    Efron, B., Storey, J.D., Tibshirani, R.\ (2001). Microarrays, empirical Bayes methods, and
    the false discovery rate, \emph{Technical Report}, Department of Statistics, Stanford
    University.

    Storey, J.D., and Tibshirani, R. (2003). Statistical significance for genome-wide
    experiments, \emph{Technical Report}, Department of Statistics, Stanford University.

    Schwender, H. (2003). Assessing the false discovery rate in a statistical analysis of
    gene expression data, Chapter 8, \emph{Diploma thesis}, Department of Statistics,
    University of Dortmund, \url{http://de.geocities.com/holgerschw/thesis.pdf}.
}

\author{Holger Schwender, \email{holger.schw@gmx.de}}

\keyword{htest}

\eof
\name{find.a0}
\alias{find.a0}
\title{Computation of the Fudge Factor}
\description{
    Provides the required information for obtaining the optimal choice of the fudge
    factor in the Empirical Bayes Analysis of Microarrays that uses the modified
    t statistics.
}
\usage{
    find.a0(data,x,y,paired=FALSE,mat.samp=NULL,B=100,balanced=FALSE,na.rm=FALSE,delta=0.9,
    alpha=(0:9)/10,include.0=TRUE,p0=NA,stable=TRUE,number.int=139,rand=NA,plot.legend=TRUE)
}

\arguments{
    \item{data}{the data set that should be analyzed. Every row of this data set must
        correspond to a gene.}
    \item{x}{vector of the columns of \code{data} that correspond to the treatment group.
        In the paired case, \eqn{(x[i],y[i])} build a pair. If, e.g., the first \eqn{n_1}{n1}
        columns contain the gene expression values of the treatment group,\code{x=1:\eqn{n_1}{n1}}.}
    \item{y}{vector of the columns of \code{data} that correspond to the control group. In the
        paired case, \eqn{(x[i], y[i])} are an observation pair.}
    \item{paired}{paired (\code{TRUE}) or unpaired (\code{FALSE}) data. Default is \code{FALSE}.}
    \item{mat.samp}{a permutation matrix. If specified, this matrix will be used, even if
        \code{rand} and \code{B} are specified.}
    \item{B}{number of permutations used in the calculation of the null density.}
    \item{balanced}{if \code{TRUE}, only balanced permutations will be used. Default is
        \code{FALSE}.}
    \item{na.rm}{if \code{FALSE} (default), the expression score of genes with one or more
        missing values will be set to \code{NA}. If \code{TRUE}, the missing values
        will be replaced by the genewise mean of the non-missing values.}
    \item{delta}{a gene will be called differentially expressed, if its posterior
        probability of being differentially expressed is large than or equal to
        \code{delta}.}
    \item{alpha}{a vector of possible values for the fudge factor \eqn{a_0}{a0} in terms of quantiles of the
        standard deviations of the genes.}
    \item{include.0}{if \code{TRUE} (default), \eqn{a_0=0}{a0=0} will also be a possible choice
        of the fudge factor.}
    \item{p0}{the prior probability that a gene is differentially expressed. If not specified,
        it will automatically be computed.}
    \item{stable}{if \code{TRUE} (default), \eqn{p_0}{p0} will be computed by the algorithm of
        Storey and Tibshirani (2003). If \code{FALSE}, the (unstable) estimate that
        ensures that the posterior probability of being differentially expressed is
        always non-negative is computed.}
    \item{number.int}{number of equally spaced intervals between the minimum and and the maximum
        of the expression scores \eqn{z} that are used in the logistic regression for estimating
        the ratio of the null density to the mixture density.}
    \item{rand}{if specified, the random number generator will be put in a reproducible
        state.}
    \item{plot.legend}{if \code{TRUE} (default), a legend will be added to the plot of the
        expression scores vs.\ their logit-transformed posterior probability.}
}
\note{The results of \code{find.a0} must be assigned to an object for the further analysis
    with \code{ebam}.}

\value{a list of the numbers of genes called differentially expressed by the EBAM analysis
    for several choices of \eqn{a_0}{a0}, and the plot of the expression scores vs.\ their
    corresponding logit-transformed posterior probability of being significant.

    \item{sig.a0}{vector containing the number of differentially expressed genes for the
        specified set of values for \eqn{a_0}{a0}.}
    \item{a0}{the optimal choice of the fudge factor using the criterion of Efron et al. (2001)
        that the \eqn{a_0}{a0} should be used which leads to the most differentially
        expressed genes.}
}
\seealso{
    \code{\link{ebam}}  \code{\link{ebam.wilc}}
}

\references{
    Efron, B., Tibshirani, R., Storey, J.D., and Tusher, V. (2001). Empirical Bayes Analysis
    of a Microarray Experiment, \emph{JASA}, 96, 1151-1160.

    Storey, J.D., and Tibshirani, R. (2003). Statistical significance for genome-wide
    experiments, \emph{Technical Report}, Department of Statistics, Stanford University.

    Schwender, H. (2003). Assessing the false discovery rate in a statistical analysis of
    gene expression data, Chapter 7, \emph{Diploma thesis}, Department of Statistics,
    University of Dortmund, \url{http://de.geocities.com/holgerschw/thesis.pdf}.
}

\author{Holger Schwender, \email{holger.schw@gmx.de}}

\keyword{htest}

\eof
\name{quantiles}
\alias{quantiles}
\title{Empirical Quantiles}
\description{Computes the desired quantiles of a sample and returns them.}
\usage{quantiles(x,prob)}

\arguments{
    \item{x}{vector of data}
    \item{prob}{vector of desired probability levels. Values must be
        between 0 and 1 inclusive.}
}

\value{
    a vector containing the desired quantiles of the sample
}

\details{
Consider a sample containing \eqn{n} observations. If \eqn{\alpha n} is a non-integer, then
the \eqn{\alpha} quantile will be given by the \eqn{k}th smallest observation \eqn{x(k)}, where
\eqn{k} is the smallest integer larger than \eqn{\alpha n}. If \eqn{\alpha n} is an integer, then
the \eqn{\alpha} quantile will be computed by \eqn{0.5(x(k)+x(k+1))}.}

\author{Holger Schwender, \email{holger.schw@gmx.de}}

\keyword{univar}

\eof
\name{sam}
\alias{sam}
\title{Significance Analysis of Microarrays}
\description{Performs a Significance Analysis of Microarrays (SAM) for
    a set of positive thresholds. It is possible to do either an one class or a two class
    SAM analysis.}
\usage{sam(data,x,y=NULL,paired=FALSE,mat.samp=NULL,B=100,balanced=FALSE,
    na.rm=FALSE,s0=NA,alpha.s0=seq(0,1,.05),include.s0=TRUE,factor.s0=
    1.4826,p0=NA,lambda.p0=1,vec.lambda.p0=(0:95)/100,delta.fdr=
    (1:10)/5,med.fdr=TRUE,graphic.fdr=TRUE,thres.fdr=seq(0.5,2,.5),
    pty.fdr=TRUE,help.fdr=TRUE,ngenes=NA,iteration=3,initial.delta=
    c(0.1,seq(.2,2,.2),4),rand=NA)}
\arguments{
    \item{data}{the data set that should be analyzed. Every row of this data set
        must correspond to a gene.}
    \item{x}{vector of the columns of the data set that correspond to the treatment group (in the two
        class case) or to the biological samples that should be analyzed (in the one class case).
        In the paired (two class) case \eqn{(x[i],y[i])} build a pair. If, e.g., the first \eqn{n_1}{n1}
        columns contain the gene expression values of the treatment group, \code{x=1:n1}.}
    \item{y}{vector of the columns of the data set that correspond to the control group (in the two class
        case). If an one class analysis is done, \code{y} will be set to \code{NULL} (default).  In the paired
        (two class) case \eqn{(x[i], y[i])} are an observation pair.}
    \item{paired}{paired (\code{TRUE}) or unpaired (\code{FALSE}) data. Default is \code{FALSE}}
    \item{mat.samp}{a permutation matrix. If specified, this matrix will be used,
        even if \code{rand} and \code{B} are specified.}
    \item{B}{number of permutations used in the calculation of the null density.
        Default is \code{B=100}.}
    \item{balanced}{if \code{TRUE}, balanced permutations will be used. Default is \code{FALSE}.}
    \item{na.rm}{if \code{FALSE} (default), the expression scores \eqn{d} of genes with one or more
         missing values will be set to \code{NA}. If \code{TRUE}, the missing
         values will be replaced by the genewise mean of the non-missing values.}
    \item{s0}{the fudge factor. If \code{NA} (default), the fudge factor \eqn{s_0}{s0} will be computed
         automatically.}
    \item{alpha.s0}{the possible values of the fudge factor \eqn{s_0}{s0} in terms of quantiles of the
        standard deviations of the genes.}
    \item{include.s0}{if \code{TRUE} (default), \eqn{s_0=0}{s0=0} is a possible choice for the
        fudge factor.}
    \item{factor.s0}{constant with which the MAD is multiplied in the computation of
         the fudge factor.}
    \item{p0}{the probability that a gene is not differentially expressed. If not
        specified (default), it will be computed.}
    \item{lambda.p0}{number between 0 and 1 that is used to estimate \eqn{p_0}{p0}. 
        If set to \code{1} (default), the automatic \eqn{p_0}{p0} selection using 
        the natural cubic spline fit is used.}
    \item{vec.lambda.p0}{vector of values for \eqn{\lambda} used in the automatical
         computation of \eqn{p_0}{p0}.}
    \item{delta.fdr}{a vector of values for the threshold \eqn{\Delta}{Delta} for which the SAM
         analysis is performed.}
    \item{med.fdr}{if \code{TRUE} (default), the median number, otherwise the expected
         number, of falsely called genes will be computed.}
    \item{graphic.fdr}{if \code{TRUE} (default), both the SAM plot and the plots of Delta vs.
        FDR and Delta vs. number of significant genes will be generated.}
    \item{thres.fdr}{for each value contained in \code{thres.fdr}, two lines parallel
         to the 45-degree line are generated in the SAM plot.}
    \item{pty.fdr}{if \code{TRUE} (default), a square SAM Plot will be generated.}
    \item{help.fdr}{if \code{TRUE} (default), help-lines will be drawn in both Delta
        plots.}
    \item{ngenes}{a number or proportion of genes for which the FDR is estmated.}
    \item{iteration}{the number of iterations used in the estimation of the FDR
        for a given number or proportion of genes.}
    \item{initial.delta}{a set of initial guesses for \eqn{\Delta}{Delta} in the computation
         of the FDR for a given number or proportion of genes.}
    \item{rand}{if specified, the random number generator will be put in a 
        reproducible state.}
}
\value{a table of statistics (estimate of \eqn{p_0}{p0}, number of significant genes,
    number of falsely called genes and FDR) for the specified set of Deltas, a
    SAM Plot, a Delta vs.\ FDR plot, and a plot of Delta vs.\ the number of significant genes.}

\note{For further analyses with \code{sam.plot}, the results of \code{sam} must be assigned
    to an object.
    
    SAM was deveoped by Tusher et al. (2001).
    
    !!! There is a patent pending for the SAM technology at Stanford University. !!! 
}

\section{Warning}{In the one class case, the null distribution will only be computed correctly,
    if the expression values are log ratios. So in the one class case only log ratios should be
    used. (There will be no checking, if the expression values are
    really log ratios.)}

\seealso{
    \code{\link{sam.plot}} \code{\link{sam.wilc}} \code{\link{sam.lambda}}
}

\references{
    Tusher, V.G., Tibshirani, R., and Chu, G. (2001). Significance analysis of microarrays
    applied to the ionizing radiation response, \emph{PNAS}, 98, 5116-5121.

    Storey, J.D. (2002). A direct approach to the false discovery rate, \emph{Journal of
    the Royal Statistical Society, Series B}, 64, 479-498.

    Storey, J.D., and Tibshirani, R. (2003). Statistical significance for genome-wide
    experiments, \emph{Technical Report}, Department of Statistics, Stanford University.

    Schwender, H. (2003). Assessing the false discovery rate in a statistical analysis of
    gene expression data, Chapter 5, \emph{Diploma thesis}, Department of Statistics,
    University of Dortmund, \url{http://de.geocities.com/holgerschw/thesis.pdf}.
}

\author{Holger Schwender \email{holger.schw@gmx.de}}

\keyword{htest}

\eof
\name{sam.lambda}
\alias{sam.lambda}
\title{Further SAM analysis}
\description{If the output of a previous analysis with \code{sam} was assigned to a vector, this function
    can be used to compute the number of significant genes and the FDR for values of the threshold Delta
    that was not used in the previous analysis.}

\usage{sam.lambda(sam.out,delta)}

\arguments{
    \item{sam.out}{the object to which the output of a previous analysis with \code{sam} was assigned.}
    \item{delta}{vector of values for the threshold Delta.}
}

\value{a table of statistics (estimate for \eqn{p_0}{p0}, number of significant genes,
    number of falsely called genes and FDR) for the specified set of Deltas.
}

\note{
    This function can only be used for a SAM analysis -- not for a SAM.Wilc analysis.

    SAM was introduced by Tusher et al. (2001).

    !!! There is a patent pending for the SAM technology at Stanford University. !!!
}

\seealso{
    \code{\link{sam.plot}} \code{\link{sam}} \code{\link{sam.wilc}}
}


\references{
    Tusher, V.G., Tibshirani, R., and Chu, G. (2001). Significance analysis of microarrays
    applied to the ionizing radiation response, \emph{PNAS}, 98, 5116-5121.

    Schwender, H. (2003). Assessing the False Discovery Rate in a Statistical Analysis
    of Gene Expression Data, Chapter 5, \emph{Diploma thesis}, Department of Statistics,
    University of Dortmund, \url{http://de.geocities.com/holgerschw/thesis.pdf}.
}

\author{Holger Schwender, \email{holger.schw@gmx.de}}

\keyword{htest}

\eof
\name{sam.plot}
\alias{sam.plot}
\title{SAM Analysis for a specific threshold}
\description{
    This functions performs a SAM or a SAM-Wilc analysis,
    respectively, for a specific threshold Delta, and (optionally) stores information
    on the differentially expressed genes in a file.
}
\usage{
    sam.plot(sam.out,delta,data,q.values=TRUE,R.fold=TRUE,na.rm=FALSE,pty.square=TRUE,file.out=NA,
        col.accession=NA,col.gene.name=NA,use.numbers=sam.out$use.numbers,rand=sam.out$rand)
}
\arguments{
    \item{sam.out}{the object to which the output of a previous analysis with
        \code{sam} or \code{sam.wilc} was stored.}
    \item{delta}{the value of the threshold \eqn{\Delta} for which the analysis should be performed.}
    \item{data}{the used data set. If the fold change should be computed, this data set
        can be a transformed version of the data set used in the previous analysis
        with \code{sam} or \code{sam.wilc}.}
    \item{q.values}{if \code{TRUE} (default), the q-value for each gene will be computed.}
    \item{R.fold}{if \code{TRUE} (default), the fold change for each differentially 
        expressed gene will be computed. If the previous SAM analysis was an one class analysis
        \code{R.fold} is automatically set to \code{FALSE}.}
    \item{na.rm}{if \code{FALSE} (default), the fold change of genes with at least one
         missing value will be set to \code{NA}. If \code{TRUE}, missing values will
         be replaced by the genewise mean.}
    \item{pty.square}{if \code{TRUE} (default), a square SAM Plot will be generated with
         x and y axes having the same range.}
    \item{file.out}{if specified (i.e. not \code{NA}), general information like the number
        of significant genes and the estimated FDR and gene-specific information is
        stored in the corresponding file.}
    \item{col.accession}{the column of \code{data} containing the accession numbers of
        the genes. If specified, the accession numbers of the significant genes
        will be added to the output.}
    \item{col.gene.name}{the column of \code{data} that contains the names of the genes.
        If specified, the names of the significant genes will be added to the output.}
    \item{use.numbers}{if \code{TRUE}, the number of observations that correspond to a point
        in the SAM Plot will be used as symbol for this point. Will only be used, if
        \code{sam.out} contains the results of \code{sam.wilc}.}
    \item{rand}{if specified, the random number generator is set in a reproducible state.
        By default, the same \code{set.seed} is used as in the previous analysis with
        either \code{sam} or \code{sam.wilc}}
}
\value{
    a SAM Plot for the specified \eqn{\Delta}{Delta} and (optionally) an output file
    containing general information like the number of significant genes and the FDR and
    information about the differentially expressed genes like their names, q-values and
    fold changes.
    
    \item{vec.fdr}{a list containing the number of \eqn{p_0}{p0}, significant genes, the FDR
        etc. for the specified \eqn{\Delta}{Delta}.}
    \item{sam.output}{a table containing gene-specific information about the differentially
        expressed genes like their IDs (i.e. the rows of the data set that contain the 
        expression data of these genes), their expression score, q-values, the fold changes
        etc.}
    \item{row.sig.genes}{vector that consists of the rows of the data set that contain the
        expression data of the differentially expressed genes.}
}

\note{
    SAM was developed by Tusher et al. (2001).
    
    !!! There is a patent pending for the SAM technology at Stanford University. !!!
}

\seealso{
    \code{\link{sam}} \code{\link{sam.wilc} \code{\link{sam.lambda}}}
}
\references{
    Tusher, V.G., Tibshirani, R., and Chu, G. (2001). Significance analysis of microarrays
    applied to the ionizing radiation response, \emph{PNAS}, 98, 5116-5121.

    Storey, J.D., and Tibshirani, R. (2003). Statistical significance for genome-wide
    experiments, \emph{Technical Report}, Department of Statistics, Stanford University.

    Schwender, H. (2003). Assessing the false discovery rate in a statistical analysis of
    gene expression data, Chapters 5 and 6, \emph{Diploma thesis}, Department of Statistics,
    University of Dortmund, \url{http://de.geocities.com/holgerschw/thesis.pdf}.
}

\author{Holger Schwender \email{holger.schw@gmx.de}}

\keyword{htest}

\eof
\name{sam.wilc}
\alias{sam.wilc}
\title{SAM Analysis using Wilcoxon Rank Sums}
\description{Performs a Significance Analysis of Microarrays for a set of integer thresholds Delta. Instead
of the modified t statistics, it uses Wilcoxon Rank Sums.}
\usage{
    sam.wilc(data,x,y,paired=FALSE,na.rm=FALSE,zero.rand=TRUE,rand=NA,use.weights=TRUE,
    delta=1:max(abs(W.diff)),graphic=TRUE,pty.square=TRUE,
    thres=round(quantile(2:max(abs(W.diff)),(0:3)/3)),use.numbers=TRUE,
    helplines=TRUE)
}
\arguments{
    \item{data}{the data set that should be analyzed. Every row of this data set must
        correspond to a gene.}
    \item{x}{vector of the columns of the data set that correspond to the treatment group.
        In the paired case, \eqn{(x[i],y[i])} build a pair. If, e.g., the first \eqn{n_1}{n1} columns
        of \code{data} contain the gene expression values of the treatment group, \code{x=1:n1}.}
    \item{y}{vector of the columns of the data set that correspond to the control group. In the
        paired case, \eqn{(x[i], y[i])} are an observation pair.}
    \item{paired}{paired (\code{TRUE}) or unpaired (\code{FALSE}) data. Default is \code{FALSE}.}
    \item{na.rm}{if \code{FALSE} (default), the expression scores W of genes with one or
        more missing values will be set to \code{NA}. If \code{TRUE}, the missing values
        will be replaced by the genewise mean of the non-missing values.}
    \item{zero.rand}{if \code{TRUE} (default), the sign of each Zero in the calculation of
        the Wilcoxon signed rank score will be randomly assigned. If \code{FALSE},
        the sign of the Zeroes will be set to '--'.}
    \item{rand}{if specified (i.e. not \code{NA}), the random number generator will be
        put in a reproducible state.}
    \item{use.weights}{if \code{T} (default), then the data points are weighed by \eqn{1-\lambda}
        in the fit of a natural cubic spline.}
    \item{delta}{a vector of integer for which the SAM-Wilc analysis should be performed.}
    \item{graphic}{if \code{TRUE} (default), both the SAM plot and the plots of Delta vs. 
        FDR and Delta vs. number of significant genes are generated. To avoid this
        plotting, set \code{graphic=FALSE}.}
    \item{pty.square}{if \code{TRUE} (default), a square SAM plot will be generated with
        x and y axes having the same range.}
    \item{thres}{a vector of integer values for \eqn{\Delta} for which two lines parallel
        to the 45-degree line are generated.}
    \item{use.numbers}{if \code{TRUE} (default), the symbol for each point in the SAM Plot
        of a SAM-Wilc analysis will be the number of observations that correspond to
        this point.}
    \item{helplines}{if \code{TRUE} (default), help lines will be generated in both the
        Delta vs.\ FDR and the Delta vs.\ number of significant genes
        plot.}
}

\value{a table of statistics (estimate for \eqn{p_0}{p0}, number of significant genes,
    number of falsely called genes and FDR) for the specified set of Deltas, a
    SAM Plot, a Delta vs.\ FDR plot, and a plot of Delta vs.\ the number of significant genes.
}

\note{For further analyses with \code{sam.plot}, the results of \code{sam.wilc} must be assigned
    to an object.
    
    SAM was developed by Tusher et al. (2001).
    
    !!! There is a patent pending for the SAM technology at Stanford University. !!!
}

\seealso{
    \code{\link{sam.plot}} \code{\link{sam}}
}

\references{
    Tusher, V.G., Tibshirani, R., and Chu, G. (2001). Significance analysis of microarrays
    applied to the ionizing radiation response, \emph{PNAS}, 98, 5116-5121.
    
    Schwender, H. (2003). Assessing the False Discovery Rate in a Statistical Analysis
    of Gene Expression Data, Chapter 6, \emph{Diploma thesis}, Department of Statistics,
    University of Dortmund, \url{http://de.geocities.com/holgerschw/thesis.pdf}.
}

\author{Holger Schwender, \email{holger.schw@gmx.de}}

\keyword{htest}

\eof
\name{siggenes-internal}
\alias{find.cuts}
\alias{fudge}
\alias{na.replace}
\alias{neglogLik.repeat}
\alias{p0.est}
\alias{q.value.cal}
\alias{q.value.wilc}
\alias{R.fold.cal}
\alias{ratio.est}
\alias{roller.coaster}
\alias{rs.cal}
\alias{sam.fdr}
\alias{sam.ngenes}
\alias{sam.plotter}
\alias{sam.sampler}
\alias{wilc.cal}
\title{Internal siggenes functions}
\description{Internal siggenes functions.}
\details{These functions are not meant to be called directly by the user.}
\author{Holger Schwender, \email{holger.schw@gmx.de}}
\keyword{internal}{siggenes-internal}

\eof
