\name{GeneTS-internal}
\alias{dominant.freqs.single}
\alias{periodogram.spec.single}
\alias{periodogram.spec}
\alias{periodogram.freq}
\alias{fisher.g.test.single}
\alias{is.constant.single}
\alias{bag.fun}
\alias{myrmvnorm}
\alias{isR181}
\alias{getPackageVersion}
\alias{is.graph.from.BioC13}
\alias{is.Rgraphviz.from.BioC13}
\alias{is.graph.loaded}
\alias{is.Rgraphviz.loaded}
\alias{.First.lib}

\title{Internal GeneTS Functions}
\description{
  Internal GeneTS functions.
}
\note{
  These are not to be called by the user (or in some cases are just
  waiting for proper documentation to be written).
}
\keyword{internal}

\eof
\name{avpg}
\alias{avgp}
\title{Average Periodogram for Multiple (Genetic) Time Series}
\description{
  \code{avgp} calculates and plots the average periodogram as described in
  Wichert, Fokianos and Strimmer (2004).
}
\usage{
avgp(x, title = "untitled", plot = TRUE, angular = FALSE, \dots)
}
\arguments{
  \item{x}{multiple (genetic) time series data (=matrix where each column
           corresponds to one time series)}
  \item{title}{name of the data set}
  \item{plot}{plot the average periodogram?}
  \item{angular}{convert frequencies to angular frequencies?}
  \item{\dots}{arguments passed to \code{\link{plot}}
                and to \code{\link{periodogram}}}
}
\details{
  The average periodogram is simply the frequency-wise average of the spectral density (as estimated
  by the Fourier transform) over all times series. 
  To calculate the average periodogram the function \code{\link{periodogram}}
  is used. See Wichert, Fokianos and Strimmer (2004)
  for more details.
}
\value{
  A list object with the following components:
  \item{freq}{A vector with the discrete Fourier frequencies (see \code{\link{periodogram}}).
              If the option angular=TRUE  then the output are angular frequencies (2*pi*f).}
  \item{avg.spec}{A vector with the average power spectral density at each frequency.}
  \item{title}{Name of the data set underlying the average periodogram.}

  The result is returned invisibly if \code{plot} is true.
}
\author{
  Konstantinos Fokianos (\url{http://www.ucy.ac.cy/~fokianos/}) and
  Korbinian Strimmer (\url{http://www.stat.uni-muenchen.de/~strimmer/}).
}
\references{
  Wichert, S., Fokianos, K., and Strimmer, K. (2004).  Identifying periodically expressed transcripts
   in microarray time series data. \emph{Bioinformatics} \bold{20}:5-20.
}
\seealso{\code{\link{periodogram}}, \code{\link{spectrum}}.}

\examples{
# load GeneTS library
library(GeneTS)

# load data set
data(caulobacter)

# how many samples and how many genes?
dim(caulobacter)

# average periodogram
avgp.caulobacter <- avgp(caulobacter, "Caulobacter")
avgp.caulobacter

# just compute and don't plot 
avgp(caulobacter, "Caulobacter", plot=FALSE)
}
\keyword{ts}

\eof
\name{bagging}
\alias{bagged.cov}
\alias{bagged.cor}
\alias{bagged.pcor}

\title{Bagged Versions of Covariance and (Partial) Correlation Matrix}

\usage{
bagged.cov(x, R=1000, \dots)
bagged.cor(x, R=1000, \dots)
bagged.pcor(x, R=1000, \dots)
}

\arguments{
  \item{x}{data matrix or data frame}
  \item{R}{number of bootstrap replicates (default: 1000)}
  \item{\dots}{options passed to \code{\link{cov}}, \code{\link{cor}}, and \code{\link{partial.cor}} 
             (e.g., to control handling of missing values) }
}
\description{
  \code{bagged.cov}, \code{bagged.cor}, and \code{bagged.pcor} calculate
  the bootstrap aggregated (=bagged) versions of the covariance and
  (partial) covariance estimators.  
  
  Theses estimators are advantageous especially for small sample size
  problems. For example, the bagged correlation matrix typically remains positive
  definite even when the sample size is much smaller than the number of variables.
  
  In Schaefer and Strimmer (2003) the inverse of the bagged correlation matrix
  is used to estimate graphical Gaussian models from sparse microarray data - 
  see also \code{\link{ggm.estimate.pcor}} for various strategies to
  estimate partial correlation coefficients.
}

\details{
  Bagging was first suggested by Breiman (1996) as a means to improve
  and estimator using the bootstrap. The bagged estimate is simply the
  mean of the bootstrap sampling distribution. Thus, bagging is essentially
  a variance reduction method. The bagged estimate may also be interpreted
  as (approximate) posterior mean estimate assuming some implicit prior.
  
}
\value{
  A symmetric matrix.
}
\author{
  Juliane Schaefer (\url{http://www.stat.uni-muenchen.de/~schaefer/}) and
  Korbinian Strimmer (\url{http://www.stat.uni-muenchen.de/~strimmer/}).
}
\references{
  Breiman, L. (1996). Bagging predictors. \emph{Machine Learning}, \bold{24}, 123--140.

  Schaefer, J., and Strimmer, K. (2003).  A practical approach to inferring
  large graphical models from sparse microarray data. Submitted to
  \emph{Bioinformatics} [preprint available online].
}
\seealso{\code{\link{cov}}, \code{\link{cor}}, \code{\link{partial.cor}},
          \code{\link{ggm.estimate.pcor}}, \code{\link{robust.boot}}.}

\examples{
# load GeneTS library
library(GeneTS)

# small example data set 
data(caulobacter)
dat <- caulobacter[,1:15]
dim(dat)

# bagged estimates
b.cov <- bagged.cov(dat)
b.cor <- bagged.cor(dat)
b.pcor <- bagged.pcor(dat)

# total squared difference
sum( (b.cov - cov(dat))^2  )
sum( (b.cor - cor(dat))^2  )
sum( (b.pcor - partial.cor(dat))^2  )

# positive definiteness of bagged correlation
is.positive.definite(cor(dat))
is.positive.definite(b.cor)
}
\keyword{multivariate}

\eof
\name{caulobacter}
\alias{caulobacter}

\title{Microarray Time Series Data for 1444 Caulobacter  Crescentus 
       Genes}

\description{
  This data set describes the temporal expression of 1444 genes
  (open reading frames) in the cell cycle of the bacterium
  \emph{Caulobacter crescentus}.
}

\usage{
data(caulobacter)
}

\format{
  \code{caulobacter} is numerical matrix with 1444 columns (=genes) and
  11 rows (=time points).
}

\source{
  This data is described in Laub et al. (2000) and can be freely
  downloaded from 
  (\url{http://caulobacter.stanford.edu/CellCycle/DownloadData.htm}).
}


\references{
  Laub, M.T., McAdams, H.H., Feldblyum, Fraser, C.M., and Shapiro, L.
  (2000) Global analysis of the genetic network controlling
  a bacterial cell cycle. \emph{Science}, \bold{290}, 2144--1248.
}

\examples{
# load GeneTS library
library(GeneTS)

# load data set
data(caulobacter)

# how many samples and how many genes?
dim(caulobacter)

# compute and plot average periodogram
avgp.caulobacter <- avgp(caulobacter, "Caulobacter")
avgp.caulobacter
}
\keyword{datasets}


\eof
\name{cor.fit.mixture}
\alias{cor.fit.mixture}
\alias{cor.prob.nonzero}

\title{Graphical Gaussian Models: Fit Mixture Distribution to Sample Correlation Coefficients}
\usage{
cor.fit.mixture(r, MAXKAPPA=5000)
cor.prob.nonzero(r, kappa, eta0)
}
\arguments{
  \item{r}{vector of sample correlations}
  \item{kappa}{the degree of freedom of the null distribution}
  \item{eta0}{the proportion of true null r-values (=the prior)}
  \item{MAXKAPPA}{upper bound for the estimated kappa (default: MAXKAPPA=5000)}
}
\description{

  \code{cor.fit.mixture} fits a mixture model 
  
    \code{f(r) = eta0 dcor0(r, kappa) + etaA dunif(r, -1, 1)}
  
  to a vector of empirical partial correlation coefficients using likelihood maximization.
  This allows to estimates both the degree of freedom \code{kappa} in the
  null-distribution and the proportion eta0 of null r-values (note that etaA = 1-eta0).
  
  \code{cor.prob.nonzero} computes

    \code{etaA dunif(r, -1, 1)/f(r)}, 

  i.e. the posterior probability that the true correlation is non-zero given the empirical
  correlation r, the degree of freedom of the null-distribution kappa, and the prior eta0
  for the null-distribution. 
}

\details{
  The above functions are useful to determine the null-distribution of edges in a sparse graphical
  Gaussian model, see Schaefer and Strimmer (2003) for more details and an application to infer 
  genetic networks from microarray data.
}

\value{
  A list object with the following components:
  \item{kappa}{the degree of freedom of the null distribution} 
  \item{eta0}{the prior for the null distribution, i.e. the proportion of null r-values}
  \item{logL}{the maximized log-likelihood}
}


\author{
  Juliane Schaefer (\url{http://www.stat.uni-muenchen.de/~schaefer/}) and
  Korbinian Strimmer (\url{http://www.stat.uni-muenchen.de/~strimmer/}).
}
\references{
  Schaefer, J., and Strimmer, K. (2003).  A practical approach to inferring
  large graphical models from sparse microarray data. Submitted to
  \emph{Bioinformatics} [preprint available online].
}

\seealso{\code{\link{dcor0}}, \code{\link{cor0.estimate.kappa}}, 
\code{\link{kappa2N}}, \code{\link{fdr.estimate.eta0}}.}

\examples{
# load GeneTS library
library(GeneTS)

# simulate mixture distribution
r <- rcor0(700, kappa=10)
u <- runif(200, min=-1, max=1)
rc <- c(r,u)

# estimate kappa and eta0 (=7/9)
cor.fit.mixture(r)
cor.fit.mixture(rc) 

# for comparison
cor0.estimate.kappa(r)
cor0.estimate.kappa(rc)

# posterior probabilities
r <- seq(-1,1, .01)
post.prob <- cor.prob.nonzero(r,kappa=10,eta0=7/9)
plot(r, post.prob, type="l")
}
\keyword{htest}

\eof
\name{cor0.estimate.kappa}
\alias{cor0.estimate.kappa}

\title{Estimating the Degree of Freedom of the Null Distribution
       of the Correlation Coefficient }
\usage{
cor0.estimate.kappa(r, method=c("fisher", "likelihood", "robust"), MAXKAPPA=5000, w=1.0)
}
\arguments{
  \item{r}{vector of sample correlations (assumed true value of rho=0)}
  \item{method}{use Fisher's rule (\code{fisher}),
                optimize likelihood function of null distribution (\code{likelihood}), or
		use Fisher's rule with robust estimate of variance (\code{robust}),
		}
  \item{MAXKAPPA}{upper bound for the estimated kappa (default: MAXKAPPA=5000);
                  only for likelihood estimate}
  \item{w}{winsorize at `w' standard deviations; only for robust estimate}		
}
\description{

  \code{cor0.estimate.kappa} estimates the degree of freedom \code{kappa} in the
  null-distribution of the correlation coefficient (i.e. assuming that rho=0).
  
  According to Fisher's rule \code{kappa = round(1/var(z) + 2)} the degree of freedom
  can be estimated from the variance of the z-transformed sample correlations. 
  
  Maximum-likelihood estimates of the degree of freedom is obtained 
  on the basis of the null distribution of the sample correlation coefficient
  (i.e. assuming rho = 0) using \code{method="likelihood"}. This results
  almost always in the same estimate of kappa as with the simple Fisher's rule.
  
  If \code{method="robust"} then the variance employed in Fisher's rule
  is estimated using the Huber M-estimate of the scale.  This is useful
  if the null-distribution is slightly "contaminated". 
  
  The degree of freedom \code{kappa} depends both on the sample size N and the number 
  G of investigated variables, 
  i.e. whether simple or partial correlation coefficients are being considered.
  For G=2 (simple correlation coefficient) the degree of freedom equals kappa = N-1,
  whereas for arbitrary G (with G-2 variables eliminated in the partial correlation coefficient)
  kappa = N-G+1 (see also \code{\link{dcor0}} and \code{\link{kappa2N}}).

  If the empirical sampling distribution is a mixture
  distribution then use of \code{cor0.estimate.kappa} may not be appropriate;
  instead \code{\link{cor.fit.mixture}} may be used.
}

\value{
  The estimated degree of freedom kappa.
}


\author{
  Juliane Schaefer (\url{http://www.stat.uni-muenchen.de/~schaefer/}) and
  Korbinian Strimmer (\url{http://www.stat.uni-muenchen.de/~strimmer/}).
}

\seealso{\code{\link{dcor0}}, \code{\link{z.transform}}, 
  \code{\link{hubers}}, \code{\link{kappa2N}}, \code{\link{cor.fit.mixture}}.}

\examples{
# load GeneTS library
library(GeneTS)

# distribution of r for kappa=7
x <- seq(-1,1,0.01)
y <- dcor0(x, kappa=7)

# simulated data
r <- rcor0(1000, kappa=7)
hist(r, freq=FALSE, 
  xlim=c(-1,1), ylim=c(0,5))
lines(x,y,type="l")

# estimate kappa
cor0.estimate.kappa(r)
}
\keyword{htest}

\eof
\name{cor0.test}
\alias{cor0.test}

\title{Test of Vanishing (Partial) Correlation}
\usage{
cor0.test(r, kappa, method=c("student", "dcor0", "ztransform"))
}
\arguments{
  \item{r}{observed correlation}
  \item{kappa}{degree of freedom of the null-distribution}
  \item{method}{method used to compute the p-value}
}
\description{
  \code{cor0.test} computes a p-value for the two-sided test with the null
  hypothesis H0: rho == 0 versus the alternative hypothesis HA: rho != 0.
  
  If \code{method="student"} is selected then the statistic 
  \code{t=r*sqrt((kappa-1)/(1-r*r))} is considered which under H0 is
  student-t distributed with \code{df=kappa-1}.  This method is exact.
  
  If \code{method="dcor0"} is selected then the p-value is computed
  directly from the distribution function \code{\link{pcor0}}.
  This method is also exact.
 
  If \code{method="ztransform"} is selected then the p-value is computed
  using the z-transform (see \code{\link{z.transform}}), i.e. using 
  a suitable chosen normal distribution.
  This method returns approximate p-values.
}

\value{
  A p-value.
}


\author{
  Juliane Schaefer (\url{http://www.stat.uni-muenchen.de/~schaefer/}) and
  Korbinian Strimmer (\url{http://www.stat.uni-muenchen.de/~strimmer/}).
}

\seealso{\code{\link{dcor0}}, \code{\link{cor0.estimate.kappa}},
         \code{\link{kappa2N}}, \code{\link{z.transform}}.}

\examples{
# load GeneTS library
library(GeneTS)

# covariance matrix
m.cov <- rbind(
 c(3,1,1,0),
 c(1,3,0,1),
 c(1,0,2,0),
 c(0,1,0,2)
)

# compute partial correlations
m.pcor <- cor2pcor(m.cov)
m.pcor

# corresponding p-values 
# assuming a sample size of 25, i.e. kappa=22
kappa2N(22, 4)
cor0.test(m.pcor, kappa=22)
cor0.test(m.pcor, kappa=22) < 0.05

# p-values become smaller with larger r 
cor0.test(0.7, 12)
cor0.test(0.8, 12)
cor0.test(0.9, 12)

# comparison of various methods
cor0.test(0.2, 45, method="student")
cor0.test(0.2, 45, method="dcor0")
cor0.test(0.2, 45, method="ztransform")
}
\keyword{htest}

\eof
\name{dcor0}
\alias{dcor0}
\alias{pcor0}
\alias{rcor0}
\alias{ibeta}

\title{Distribution of the Vanishing Correlation Coefficient (rho=0) and Related Functions}

\usage{
dcor0(x, kappa, log=FALSE)
pcor0(q, kappa, lower.tail=TRUE, log.p=FALSE)
rcor0(n, kappa)
ibeta(z, a, b)
}

\arguments{
	\item{x,q}{vector of sample correlations}
	\item{kappa}{the degree of freedom of the distribution}
	\item{n}{number of values to generate. If n is a vector, length(n)
                values will be generated}
	\item{log, log.p}{logical vector; if TRUE, probabilities p are given as log(p)}
	\item{lower.tail}{logical vector; if TRUE (default), probabilities are \eqn{P[R <= r]},
	                  otherwise, \eqn{P[R > r]}}
        \item{a,b,z}{numeric vectors}
}

\description{
  Density and distribution function and a random number generator
  of Pearson's correlation coefficient assuming that there is no
  correlation present (rho = 0).
 
  The degree of freedom kappa depends both on the sample size N and the number G
   of investigated variables, 
  i.e. whether simple or partial correlation coefficients are being considered.
  For G=2 (simple correlation coefficient) the degree of freedom equals kappa = N-1,
  whereas for arbitrary G (with G-2 variables eliminated in the partial correlation coefficient)
  kappa = N-G+1 (see also \code{\link{cor0.estimate.kappa}}).
         
  The incomplete beta function is needed to compute the distribution function. For
  z=1 the incomplete beta function reduces to the beta function
  (\code{ibeta}(1, a, b) = \code{beta}(a, b)).
  
}
\details{
  For density and distribution functions (as well as a corresponding random number generator)
  of the correlation coefficient for arbitrary values of rho please refer to the
  \code{SuppDists} package by  Bob Wheeler \email{bwheeler@echip.com} (available on CRAN).
  Note that the parameter N in the \code{dPearson} function (and others in the  \code{SuppDists} package)
  corresponds to  N=kappa+1 (i.e. G=2).
  
}

\value{
  The output values conform to the output from other such functions 
  in \R. \code{dcor0} gives the density and \code{pcor0} 
  the distribution function.
  
  The function \code{ibeta} returns a numeric value.
}

\seealso{\code{\link{cor}}, \code{\link{cor0.estimate.kappa}}, \code{\link{beta}}.}

\author{
  Juliane Schaefer (\url{http://www.stat.uni-muenchen.de/~schaefer/}) and
  Korbinian Strimmer (\url{http://www.stat.uni-muenchen.de/~strimmer/}).
}

\examples{
# load GeneTS library
library(GeneTS)

# distribution of r for various degrees of freedom
x <- seq(-1,1,0.01)
y1 <- dcor0(x, kappa=7)
y2 <- dcor0(x, kappa=15)
plot(x,y2,type="l", xlab="r", ylab="pdf",
  xlim=c(-1,1), ylim=c(0,2))
lines(x,y1)

# simulated data
r <- rcor0(1000, kappa=7)
hist(r, freq=FALSE, 
  xlim=c(-1,1), ylim=c(0,5))
lines(x,y1,type="l")

# distribution function
pcor0(-0.2, kappa=15)

# incomplete beta function
ibeta(0.4, 1, 3)
ibeta(1, 2, 3)
beta(2, 3)
}
\keyword{distribution}

\eof
\name{dominant.freqs}
\alias{dominant.freqs}
\title{Dominant Frequencies in Multiple (Genetic) Time Series}
\usage{
dominant.freqs(x, m=1, \dots)
}
\arguments{
  \item{x}{multivariate (genetic) time series (=matrix where each column
           corresponds to one time series), or a vector with a single
	   time series}
  \item{m}{number of dominant frequences}
  \item{\dots}{arguments passed to \code{\link{periodogram}}}
}
\description{
  \code{dominant.freqs} returns the m dominant frequencies (highest peaks)
  in each of the periodogram computed for the individual time series.
}

\value{
  A matrix (or vector, if only 1 time series is considered) with the
  dominant frequencies. In a matrix, each column
  corresponds to one time series.
}

\author{
  Konstantinos Fokianos (\url{http://www.ucy.ac.cy/~fokianos/}) and
  Korbinian Strimmer (\url{http://www.stat.uni-muenchen.de/~strimmer/}).
}
\seealso{\code{\link{periodogram}}, \code{\link{spectrum}}.}

\examples{
# load GeneTS library
library(GeneTS)

# load data set
data(caulobacter)

# how many samples and how many genes?
dim(caulobacter)

# first three dominant frequencies for each gene
dominant.freqs(caulobacter, 3)

# first four dominant frequencies for gene no. 1000
dominant.freqs(caulobacter[,1000], 4)
}
\keyword{ts}

\eof
\name{fdr.control}
\alias{fdr.control}
\title{Controlling the False Discovery Rate in Multiple Testing}
\usage{
fdr.control(p, Q=0.05, eta0=1.0, robust=FALSE)
}
\description{
  \code{fdr.control} controls the False Discovery Rate (FDR) at a
  given level Q using the algorithms described in Benjamini and Hochberg (1995)
  and Storey (2002).  The FDR is the expected proportion
  of false positives (erroneous rejections) among the significant tests (rejections).
  For a given vector of p-values and the desired FDR level Q the corresponding p-value
  cut-off and the q-values for each hypothesis (see Storey, 2002) are computed. 
}

\arguments{
  \item{p}{vector of p-values}
  \item{Q}{desired FDR level}
  \item{eta0}{proportion of null p-values (default: eta0=1).}
  \item{robust}{use small sample approximation for estimating q-values (default: robust=FALSE)}
}
\details{
Notes:
\enumerate{
 \item the default settings correspond to the step-up procedure to control the FDR 
    by Benjamini and Hochberg (1995).
    
 \item q-values for each hypothesis are computed as defined in Storey (2002).
 
 \item small sample approximation for q-value (robust=TRUE) is from Storey (2002).
 \item default eta0=0 is safe but also most conservative choice (for other possibilities
    see \code{\link{fdr.estimate.eta0}}).
}
}
\value{

  A list object with the following components:
  \item{qvalues}{a vector with the q-values for each hypothesis.} 
  \item{significant}{a vector with a TRUE/FALSE value for each hypothesis}
  \item{num.significant}{number of significant hypotheses.}
  \item{pvalue.cutoff}{cutoff level for the individual p-values to obtain the
        desired control of FDR.
        Hypotheses whose corresponding p-values are below or equal to this
        cuttoff level are rejected (i.e. significant).}   
}

\references{
 
 Benjamini, Y., and Y. Hochberg (1995)  Controlling the false
 discovery rate: a practical and powerful approach to multiple testing.
 \emph{J. Roy. Statist. Soc. B}, \bold{57}, 289--300.

 Storey, J. D. (2002) A direct approach to false
 discovery rates.
 \emph{J. Roy. Statist. Soc. B.}, \bold{64}, 479--498.
}
\author{
  Konstantinos Fokianos (\url{http://www.ucy.ac.cy/~fokianos/}) and
  Korbinian Strimmer (\url{http://www.stat.uni-muenchen.de/~strimmer/}).
  
  Adapted in part from S-PLUS code by  Y. Benjamini (\url{http://www.math.tau.ac.il/~roee/FDR_Splus.txt})
  and R code from J.D. Storey (\url{http://faculty.washington.edu/~jstorey/}). 
}
\seealso{\code{\link{fdr.estimate.eta0}}.}

\examples{
# load GeneTS library
library(GeneTS)

# load data set
data(caulobacter)

# how many genes and how many samples?
dim(caulobacter)

# p-values from Fisher's g test
pval.caulobacter <- fisher.g.test(caulobacter)

# FDR test on the level 0.05
fdr.control(pval.caulobacter, Q = 0.05)
}
\keyword{htest}

\eof
\name{fdr.estimate.eta0}
\alias{fdr.estimate.eta0}
\title{Estimate the Proportion of Null p-Values}
\usage{
fdr.estimate.eta0(p, method=c("conservative", "adaptive", "bootstrap", "smoother"),
    lambda=seq(0,0.95,0.05) )
}
\description{
  \code{fdr.estimate.eta0} estimates the proportion eta0 of null p-values in a given
  vector of p-values. This quantity is an important parameter 
  when controlling  the false discovery rate (FDR).  A conservative choice is
  eta0 = 1 but a choice closer to the true value will increase efficiency
  and power 
  - see   Benjamini and Hochberg (1995, 2000) and Storey (2002) for details.
}

\arguments{
  \item{p}{vector of p-values}
  \item{method}{algorithm used to estimate the proportion of null p-values.
     Available options are
    "conservative" (default), "adaptive", "bootstrap", and "smoother".}
   \item{lambda}{optional tuning parameter vector needed for "bootstrap"
     and "smoothing" methods (defaults to \code{seq(0,0.95,0.05)})
     - see Storey (2002) and Storey and Tibshirani (2003).}
  }

\details{
The function \code{fdr.estimate.eta0} provides four algorithms: the "conservative"
method always returns eta0 = 1  (Benjamini and Hochberg, 1995), "adaptive"
uses the approach suggested in Benjamini and Hochberg (2000), "bootstrap"
employs the method from Storey (2002), and "smoother" uses the smoothing spline
approach in Storey and Tibshirani (2003).
}
\value{
  The estimated proportion eta0 of null p-values. 
}

\references{
 
 \emph{"conservative" procedure:} Benjamini, Y., and Y. Hochberg (1995)  Controlling the false
 discovery rate: a practical and powerful approach to multiple testing.
 \emph{J. Roy. Statist. Soc. B}, \bold{57}, 289--300.

 \emph{"adaptive" procedure:} Benjamini, Y., and Y. Hochberg (2000) The adaptive control
 of the false discovery rate in multiple hypotheses testing with independent statistics.
 \emph{J. Behav. Educ. Statist.}, \bold{25}, 60--83.
 
 \emph{"bootstrap" procedure:} Storey, J. D. (2002) A direct approach to false
 discovery rates.
 \emph{J. Roy. Statist. Soc. B.}, \bold{64}, 479--498.
 
 \emph{"bootstrap" procedure:} Storey, J. D., and R. Tibshirani (2003)
  Statistical significance for genome-wide experiments. 
 \emph{Proc. Nat. Acad. Sci. USA}, \bold{100}, 9440-9445.

}
\author{
  Konstantinos Fokianos (\url{http://www.ucy.ac.cy/~fokianos/}) and
  Korbinian Strimmer (\url{http://www.stat.uni-muenchen.de/~strimmer/}).
  
  Adapted in part from S-PLUS code by  Y. Benjamini (\url{http://www.math.tau.ac.il/~roee/FDR_Splus.txt})
  and R code from J.D. Storey (\url{http://faculty.washington.edu/~jstorey/}). 
}
\seealso{\code{\link{fdr.control}}.}

\examples{
# load GeneTS library
library(GeneTS)

# load data set
data(caulobacter)

# how many genes and how many samples?
dim(caulobacter)

# p-values from Fisher's g test
pval.caulobacter <- fisher.g.test(caulobacter)

# Proportion of null p-values for different methods
fdr.estimate.eta0(pval.caulobacter)
fdr.estimate.eta0(pval.caulobacter, method="adaptive")
fdr.estimate.eta0(pval.caulobacter, method="bootstrap")
fdr.estimate.eta0(pval.caulobacter, method="smoother")
}
\keyword{htest}

\eof
\name{fisher.g.test}
\alias{fisher.g.test}
\title{Fisher's Exact g Test for Multiple (Genetic) Time Series}
\description{
  \code{fisher.g.test} calculates the p-value(s) according to Fisher's
  exact g test for one or more time series. This test is useful to detect hidden
  periodicities of unknown frequency in a data set.  For an application to
  microarray data see Wichert, Fokianos, and Strimmer (2004). 
}
\usage{
fisher.g.test(x, \dots)
}
\arguments{
  \item{x}{vector or matrix with  time series data (one time series per column).}
  \item{\dots}{arguments passed to \code{\link{periodogram}}}
}
\details{
  Fisher (1929) devised an exact procedure to test the null hypothesis of Gaussian
  white noise against the alternative of an added deterministic periodic component
  of unspecified frequency.  The basic idea behind the test is to reject the 
  null hypothesis if the periodogram contains a value significantly larger
  than the average value (cf. Brockwell and Davis, 1991).  
  This test is useful in the context of microarray genetic time series
  analysis as a gene selection method - see Wichert, Fokianos and Strimmer (2004)
  for more details.   Note that in the special case of a constant time series
  the  p-value returned by \code{fisher.g.test} is 
  exactly 1 (i.e. the null hypothesis is not rejected).
}
\value{
  A vector of p-values (one for each time series).  Multiple testing
  may then be done using the the false discover rate approach
  (function \code{\link{fdr.control}}).
}
\author{
  Konstantinos Fokianos (\url{http://www.ucy.ac.cy/~fokianos/}) and
  Korbinian Strimmer (\url{http://www.stat.uni-muenchen.de/~strimmer/}).
}
\references{
  Fisher, R.A. (1929). Tests of significance in harmonic analysis. \emph{Proc. Roy. Soc. A}, \bold{125}, 54--59.

  Brockwell, P.J., and Davis, R.A.  (1991). Time Series: Theory and Methods (2nd ed).  Springer Verlag.
  (the g-test is discussed in section 10.2).

  Wichert, S., Fokianos, K., and Strimmer, K. (2004).  Identifying periodically expressed transcripts
   in microarray time series data. \emph{Bioinformatics} \bold{20}:5-20.
}
\seealso{\code{\link{fdr.control}}.}

\examples{
# load GeneTS library
library(GeneTS)

# load data set
data(caulobacter)

# how many samples and and how many genes?
dim(caulobacter)

# p-values from Fisher's g test
pval.caulobacter <- fisher.g.test(caulobacter)
pval.caulobacter

# test with FDR controlled at on the level 0.05
fdr.control(pval.caulobacter, Q = 0.05)
}
\keyword{htest}

\eof
\name{ggm.estimate.pcor}
\alias{ggm.estimate.pcor}

\title{Graphical Gaussian Models: Small Sample Estimation of Partial Correlation}

\usage{
ggm.estimate.pcor(x, method = c("observed.pcor", "partial.bagged.cor", "bagged.pcor"), R = 1000, \dots)
}

\arguments{
  \item{x}{data matrix (each rows corresponds to one multivariate observation)}
  \item{method}{method used to estimate the partial correlation matrix.
     Available options are "observed.pcor" (default),
    "partial.bagged.cor", and "bagged.pcor". }
  \item{R}{number of bootstrap replicates (bagged estimators only)}
  \item{\dots}{options passed to \code{\link{partial.cor}}, \code{\link{bagged.cor}},
        and \code{\link{bagged.pcor}}. }
   
}
\description{
  \code{ggm.estimate.pcor} implements various small-sample point estimators of partial
  correlation that can be employed also for small sample data sets. Their statistical
  properties are investigated in detail in Schaefer and Strimmer (2003).


}

\details{
  The result can be summarized as follows (with N being the sample size, and G being the
  number of variables): 

\emph{observed.pcor:} Observed partial correlation (Pi-1). Should be used preferentially for N >> G. 
 In this region  the other two estimators perform equally well but are slower due to bagging.

\emph{partial.bagged.cor:}  Partial bagged correlation (Pi-2). Best used for small sample applications with N < G.
 Here the advantages of Pi-2 are its small variance, its high accuracy as a point estimate,
 and its overall best power and positive predictive value (PPV). In addition it is computationally less expensive than Pi-3.

\emph{bagged.pcor:} Bagged partial correlation (Pi-3). May be used in the critical zone (N = G) and for sample sizes N slightly
 larger than the number of variables G.

As a result, this  particularly promotes the partial bagged correlation Pi-3 as estimator of choice for the inference of GGM networks from small-sample
 (gene expression) data.
}

\value{
  An estimated partial correlation matrix.
}
\author{
  Juliane Schaefer (\url{http://www.stat.uni-muenchen.de/~schaefer/}) and
  Korbinian Strimmer (\url{http://www.stat.uni-muenchen.de/~strimmer/}).
}
\references{
  Schaefer, J., and Strimmer, K. (2003).  A practical approach to inferring
  large graphical models from sparse microarray data. Submitted to
  \emph{Bioinformatics} [preprint available online].
}
\seealso{\code{\link{ggm.simulate.data}},\code{\link{ggm.estimate.pcor}}.}

\examples{
# load GeneTS library
library(GeneTS)

# generate random network with 40 nodes 
# it contains 780=40*39/2 edges of which 5 percent (=39) are non-zero
true.pcor <- ggm.simulate.pcor(40)
  
# simulate data set with 40 observations
m.sim <- ggm.simulate.data(40, true.pcor)

# simple estimate of partial correlations
estimated.pcor <- partial.cor(m.sim)

# comparison of estimated and true model
sum((true.pcor-estimated.pcor)^2)

# a slightly better estimate ...
estimated.pcor.2 <- ggm.estimate.pcor(m.sim, method = c("bagged.pcor"))
sum((true.pcor-estimated.pcor.2)^2)
}
\keyword{htest}

\eof
\name{ggm.plot.graph}
\alias{ggm.plot.graph}
\alias{ggm.make.graph}
\alias{show.edge.weights}

\title{Graphical Gaussian Models: Plotting the Network}

\usage{
ggm.make.graph(edge.list, num.nodes)
show.edge.weights(gr)
ggm.plot.graph(gr, node.labels=NULL, show.edge.labels=TRUE, col.pos="black", col.neg="grey", \dots)
}

\arguments{
  \item{edge.list}{a data frame, as obtained by \code{\link{ggm.test.edges}}, listing all
          edges to be included in the graph}
  \item{num.nodes}{the total number of nodes in the network}
  \item{gr}{a graph object}
  \item{node.labels}{a vector with labels for each node (optional)} 
  \item{show.edge.labels}{plot correlation values as edge labels (default: TRUE)}
  \item{col.pos}{edge color for positive correlation (default: "black")}
  \item{col.neg}{edge color for positive correlation (default: "grey")}
  \item{\dots}{options passed to \code{plot.graph}}
}
\description{
  \code{ggm.make.graph} converts an edge list as obtained by \code{\link{ggm.test.edges}}
  into a graph object.

  \code{show.edge.weights} summarizes a graph object by prints a vector of weights for all
  edges contained in a graph.
  This function is convenient to gain a first impression of the graph (in particular if
  the "Rgraphviz" library is not installed). 

  \code{ggm.plot.graph} visualizes the network structure of the graphical Gaussian model 
  using the Rgraphviz network plot package. The correlation coefficients are
  printed as edge labels.
}

\details{
  The network plotting functions require the installation of the "graph" and "Rgraphviz" R
  packages. These are available from the Bioconductor website
  (\url{http://www.bioconductor.org}).  Note that it is not necessary to install the
  complete set of Bioconductor packages, only "graph" and "Rgraphviz" are needed by the
  GeneTS package (however, these may in turn require additional packages from Bioconductor).

  \code{ggm.plot.graph} is a simple utility function to plots the graph in "neato" format
  with ellipsoid node shapes.  See the documentation of \code{plot.graph} in the "Rgraphviz"
  package for many other options.
 
}
\value{
  \code{ggm.make.graph} returns a graph object, suitable for plotting with functions from
  the "Rgraphviz" library.
  
  \code{show.edge.weights} returns a vector of weights for all edges contained in a graph.
  
  \code{ggm.plot.graph} plots  the network on the current graphic device.
}
\author{
  Juliane Schaefer (\url{http://www.stat.uni-muenchen.de/~schaefer/}) and
  Korbinian Strimmer (\url{http://www.stat.uni-muenchen.de/~strimmer/}).
}
\seealso{
\code{\link{ggm.test.edges}},\code{plot.graph}.
}

\examples{
# load GeneTS library
library(GeneTS)
 
# generate random network with 20 nodes and 10 percent edges (=19 edges)
true.pcor <- ggm.simulate.pcor(20, 0.1)

# convert to edge list 
test.results <- ggm.test.edges(true.pcor, eta0=0.9, kappa=1000)[1:19,]

# generate graph object 
# NOTE: this requires the installation of the "graph" package
# (in the following "try" is used to avoid an error if the library is not installed)
try( gr <- ggm.make.graph( test.results, 20) )
try( gr )
try( show.edge.weights(gr) )

# plot network
# NOTE: this requires the installation of the "Rgraphviz" library
try ( ggm.plot.graph(gr))
nlab <- c("A","B","C","D","E","F","G","H","I","J","K",
            "L","M","N","O","P","Q","R","S", "T")
try( ggm.plot.graph(gr, nlab) )
}
\keyword{hplot}

\eof
\name{ggm.simulate.data}
\alias{ggm.simulate.data}

\title{Graphical Gaussian Models: Simulation of of Data}

\usage{
ggm.simulate.data(sample.size, pcor)
}

\arguments{
  \item{sample.size}{sample size of simulated data set}
  \item{pcor}{partial correlation matrix}
}
\description{
  \code{ggm.simulate.data} takes a positive definite partial correlation matrix and
  generates an iid sample from the corresponding standard multinormal distribution 
  (with mean 0 and variance 1).
}


\value{
  A multinormal data matrix.
}
\author{
  Juliane Schaefer (\url{http://www.stat.uni-muenchen.de/~schaefer/}) and
  Korbinian Strimmer (\url{http://www.stat.uni-muenchen.de/~strimmer/}).
}
\references{
  Schaefer, J., and Strimmer, K. (2003).  A practical approach to inferring
  large graphical models from sparse microarray data. Submitted to
  \emph{Bioinformatics} [preprint available online].
}
\seealso{\code{\link{ggm.simulate.pcor}}, \code{\link{ggm.estimate.pcor}}.}

\examples{
# load GeneTS library
library(GeneTS)

# generate random network with 40 nodes 
# it contains 780=40*39/2 edges of which 5 percent (=39) are non-zero
true.pcor <- ggm.simulate.pcor(40)
  
# simulate data set with 40 observations
m.sim <- ggm.simulate.data(40, true.pcor)

# simple estimate of partial correlations
estimated.pcor <- partial.cor(m.sim)

# comparison of estimated and true model
sum((true.pcor-estimated.pcor)^2)

# a slightly better estimate ...
estimated.pcor.2 <- ggm.estimate.pcor(m.sim, method = c("bagged.pcor"))
sum((true.pcor-estimated.pcor.2)^2)
}
\keyword{multivariate}

\eof
\name{ggm.simulate.pcor}
\alias{ggm.simulate.pcor}

\title{Graphical Gaussian Models: Simulation of Networks}

\usage{
ggm.simulate.pcor(num.nodes, etaA=0.05)
}

\arguments{
  \item{num.nodes}{number of nodes in the network}
  \item{etaA}{fraction of edges with non-zero partial correlation (default: 0.05)}
}
\description{
  \code{ggm.simulate.pcor} generates a random matrix of partial correlations that 
  corresponds to a GGM network of a given size (\code{num.nodes})
  with a specified fraction of non-zero edges.
}

\details{
  The output of \code{ggm.simulate.pcor} is always positive definite. This is ensured
  by using diagonally dominant matrices when generating the random GGM model.  For
  the full algorithm see Schaefer and Strimmer (2003).
}
\value{
  A positive definite partial correlation matrix.
}
\author{
  Juliane Schaefer (\url{http://www.stat.uni-muenchen.de/~schaefer/}) and
  Korbinian Strimmer (\url{http://www.stat.uni-muenchen.de/~strimmer/}).
}
\references{
  Schaefer, J., and Strimmer, K. (2003).  A practical approach to inferring
  large graphical models from sparse microarray data. Submitted to
  \emph{Bioinformatics} [preprint available online].
}
\seealso{\code{\link{ggm.simulate.data}},\code{\link{ggm.estimate.pcor}}.}

\examples{
# load GeneTS library
library(GeneTS)

# generate random network with 40 nodes 
# it contains 780=40*39/2 edges of which 5 percent (=39) are non-zero
true.pcor <- ggm.simulate.pcor(40)
  
# simulate data set with 40 observations
m.sim <- ggm.simulate.data(40, true.pcor)

# simple estimate of partial correlations
estimated.pcor <- partial.cor(m.sim)

# comparison of estimated and true model
sum((true.pcor-estimated.pcor)^2)

# a slightly better estimate ...
estimated.pcor.2 <- ggm.estimate.pcor(m.sim, method = c("bagged.pcor"))
sum((true.pcor-estimated.pcor.2)^2)
}
\keyword{multivariate}

\eof
\name{ggm.test.edges}
\alias{ggm.test.edges}

\title{Graphical Gaussian Models: Testing Edges}

\usage{
ggm.test.edges(r.mat, MAXKAPPA=5000, kappa=NULL, eta0=NULL)
}

\arguments{
  \item{r.mat}{matrix of partial correlations}
  \item{kappa}{the degree of freedom of the null distribution (will be estimated if left unspecified)}
  \item{eta0}{the proportion of true null values (will be estimated if left unspecified)}
  \item{MAXKAPPA}{upper bound for the estimated kappa - see \code{\link{cor.fit.mixture}} (default: MAXKAPPA=5000)}
}
\description{
  \code{ggm.test.edges} assigns statistical significance to the edges in a GGM network by computing
  p-values, q-values and posterior probabilities for each potential edge.
}

\details{
  A mixture model is fitted to the partial correlations using \code{\link{cor.fit.mixture}}
  (this estimate can be overridden if values for both \code{kappa} and \code{eta0} are specified).
  Subsequently, two-sided p-values to test non-zero correlation are computed for each edge using 
  \code{\link{cor0.test}}. In addition, corresponding posterior probabilities are
  computed using \code{\link{cor.prob.nonzero}}.  Finally, to simplify multiple testing q-values
  are computed via  \code{\link{fdr.control}} whith the specified value of \code{eta0} taken
  into account.

  Theoretical details are explained in Schaefer and Strimmer (2003), along with a simulation
  study and an application to gene expression data.
}
\value{
  A sorted data frame with the following columns:
  
  \item{pcor}{partial correlation (from r.mat)}
  \item{node1}{first node connected to edge}
  \item{node2}{second node connected to edge}
  \item{pval}{p-value}
  \item{qval}{q-value}
  \item{prob}{probability that edge is nonzero}

  Each row in the data frame corresponds to one edge, and the rows are sorted
  according the absolute strength of the correlation (from strongest to weakest)
}
\author{
  Juliane Schaefer (\url{http://www.stat.uni-muenchen.de/~schaefer/}) and
  Korbinian Strimmer (\url{http://www.stat.uni-muenchen.de/~strimmer/}).
}
\references{
  Schaefer, J., and Strimmer, K. (2003).  A practical approach to inferring
  large graphical models from sparse microarray data. Submitted to
  \emph{Bioinformatics} [preprint available online].
}
\seealso{
\code{\link{cor.fit.mixture}},
\code{\link{cor0.test}},
\code{\link{cor.prob.nonzero}},
\code{\link{fdr.control}},
\code{\link{ggm.estimate.pcor}}.
}

\examples{
# load GeneTS library
library(GeneTS)
 
# generate random network with 20 nodes and 5 percent edges
true.pcor <- ggm.simulate.pcor(20, 0.05)

# simulate data set of length 100
sim.dat <- ggm.simulate.data(100, true.pcor)

# estimate partial correlation matrix (simple estimator)
inferred.pcor <- ggm.estimate.pcor(sim.dat)

# p-values, q-values and posterior probabilities for each edge 
test.results <- ggm.test.edges(inferred.pcor)

# show best 20 edges
test.results[1:20,]

# how many are significant for Q=0.05 ?
num.significant <- sum(test.results$qval <= 0.05)
test.results[1:num.significant,]

# parameters of the mixture distribution used to compute p-values etc.
cor.fit.mixture(sm2vec(inferred.pcor))
}
\keyword{htest}

\eof
\name{is.constant}
\alias{is.constant}
\title{Simple Check for Constant Time Series}
\usage{
is.constant(x)
}
\arguments{
  \item{x}{vector or matrix with time series data (one time series per column)}
}
\description{
  \code{is.constant} is a utility function that
  checks whether a time series is constant.
}

\value{
  A vector with a boolean statement (\code{TRUE} or \code{FALSE}) for each time series.
}

\author{
  Korbinian Strimmer (\url{http://www.stat.uni-muenchen.de/~strimmer/}).
}

\examples{
# load GeneTS library
library(GeneTS)

# load data set
data(caulobacter)

# any constant genes?
sum(is.constant(caulobacter))

# but here:
series.1 <- rep(1, 10)
series.2 <- seq(1, 10)
is.constant( cbind(series.1, series.2) )

}
\keyword{ts}

\eof
\name{kappa2N}
\alias{kappa2N}
\alias{N2kappa}

\title{Determine the Sample Size from the Degree of Freedom of Correlation Distribution}
\usage{
kappa2N(kappa, G=2)
N2kappa(N, G=2)
}
\arguments{
  \item{kappa}{degree of freedom}
  \item{G}{number of variables (G=2 corresponds to simple correlation)}
  \item{N}{sample size}
}
\description{

      
  The degree of freedom kappa of the sample distribution of the correlation
  coefficient depends both on the sample size N and the number G of investigated variables, 
  i.e. whether simple or partial correlation coefficients are being considered.
  For G=2 (simple correlation coefficient) the degree of freedom equals kappa = N-1,
  whereas for arbitrary G (with G-2 variables eliminated in the partial correlation coefficient)
  kappa = N-G+1 (see also \code{\link{dcor0}}).
 
  The function \code{kappa2N} returns the sample size that
  corresponds to a given degree of freedom kappa, whereas \code{N2kappa}
  converts sample size to the corresponding degree of freedom. 
}

\value{
  The sample size N corresponding to a given kappa, or the degree of freedom
  kappa corresponding to a given N.
}


\author{
  Juliane Schaefer (\url{http://www.stat.uni-muenchen.de/~schaefer/}) and
  Korbinian Strimmer (\url{http://www.stat.uni-muenchen.de/~strimmer/}).
}

\seealso{\code{\link{dcor0}}, \code{\link{cor0.estimate.kappa}}.}

\examples{
# load GeneTS library
library(GeneTS)


# sample sizes corresponding to kappa=7
kappa2N(7)     # simple correlation
kappa2N(7, 40) # partial correlation with 40 variables

# degree of freedom corresponding to N=100
N2kappa(100)
N2kappa(100,40)
}
\keyword{univar}

\eof
\name{mat.convert}
\alias{sm2vec}
\alias{sm.indexes}
\alias{vec2sm}

\title{Convert Symmetric Matrix to Vector and Back}

\usage{
sm2vec(m, diag = FALSE)
sm.indexes(m, diag = FALSE)
vec2sm(vec, diag = FALSE, order = NULL)
}

\arguments{
  \item{m}{symmetric matrix}
  \item{diag}{logical. Should the diagonal be included?}
  \item{vec}{vector of unique elements from a symmetric matrix}
  \item{order}{order of the entries in \code{vec}}
}
\description{
  \code{sm2vec} takes a symmetric matrix and puts
  the lower triagonal entries into a vector (cf. \code{\link{lower.tri}}).

  \code{sm.indexes} gives the corresponding x-y-indexes for each entry
  in the vector produced by \code{sm2vec}.
  
  \code{vec2sm} reverses the operation by  \code{sm2vec} and turns the
  vector back in a symmetric matrix. Note that if \code{diag=FALSE} the
  diagonal of the resulting matrix will consist of NAs.  If \code{order}
  is given then the input vector \code{vec} is first sorted accordingly.
 }

\value{
  A vector (\code{sm2vec}), a two-column matrix with indexes (\code{sm.indexes}),
  or a symmetric matrix (\code{vec2sm}).
}


\author{
  Korbinian Strimmer (\url{http://www.stat.uni-muenchen.de/~strimmer/}).
}

\seealso{\code{\link{lower.tri}}.}

\examples{
# load GeneTS library
library(GeneTS)

# covariance matrix
m.cov <- rbind(
 c(3,1,1,0),
 c(1,3,0,1),
 c(1,0,2,0),
 c(0,1,0,2)
)
m.cov

# convert into vector (including diagonals
v <- sm2vec(m.cov, diag=TRUE)
v.idx <- sm.indexes(m.cov, diag=TRUE)
v
v.idx

# put back to symmetric matrix
vec2sm(v, diag=TRUE)

# vector not in the original order
sv <- sort(v)
sv
ov <- order(v)
ov
vec2sm(sv, diag=TRUE, order=ov)
}
\keyword{utilities}

\eof
\name{mat.util}
\alias{is.positive.definite}
\alias{rank.condition}
\alias{is.square}
\alias{is.symmetric}

\title{Various Matrix Utilities}
\usage{
is.positive.definite(m, eps = .Machine$double.eps)
is.square(m)
is.symmetric(m, eps = .Machine$double.eps)
rank.condition(m, tol = sqrt(.Machine$double.eps))
}
\arguments{
  \item{m}{matrix}
  \item{eps}{values smaller than < eps are considered zero (e.g., eigenvalues in is.positive.definite()
  and matrix differences in is.symmetric()}
  \item{tol}{relative tolerance - singular values larger than \code{tol} times
           the maximum singular value are considered non-zero}
}
\description{
  \code{is.positive.definite} tests whether all eigenvalues of a matrix
  are positive.
     
  \code{is.square} tests whether a matrix
  has squared form.
  
  \code{is.symmetric} tests whether a matrix is symmetric.
  
   \code{rank.condition} estimates the rank and the condition
  of a matrix by 
  computing its singular values D[i] (using  \code{\link{svd}}).
  The rank of the matrix is the number of singular values D[i] > tol*max(D)
  and the condition is the ratio  of the largest and the smallest
  singular value.

}

\value{
  For \code{is.positive.definite}, \code{is.square}, and \code{is.symmetric}
  a logical value (\code{TRUE} or \code{FALSE}).
  
  For \code{rank.condition} a list object with the following components:
  \item{rank}{Rank of the matrix.}
  \item{condition}{Condition number.}
}


\author{
  Korbinian Strimmer (\url{http://www.stat.uni-muenchen.de/~strimmer/}).
}
\seealso{\code{\link{svd}}, \code{\link{pseudoinverse}}.}

\examples{
# load GeneTS library
library(GeneTS)

# Hilbert matrix
hilbert <- function(n) { i <- 1:n; 1 / outer(i - 1, i, "+") }

# positive definite ?
m <- hilbert(8)
is.positive.definite(m)

# numerically ill-conditioned
m <- hilbert(15)
rank.condition(m)

# square and symmetric ?
is.square(m)
is.symmetric(m)
}
\keyword{array}

\eof
\name{partial.cor}
\alias{partial.cor}
\alias{cor2pcor}
\alias{pcor2cor}

\title{Partial Correlation from Correlation Matrix (and Vice Versa)}
\usage{
cor2pcor(m, exact.inversion=FALSE, \dots)
pcor2cor(m, exact.inversion=FALSE, \dots)
partial.cor(x, use=c("all.obs", "complete.obs", "pairwise.complete.obs"),
   method=c("pearson", "kendall", "spearman"), exact.inversion=FALSE, \dots)
}
\arguments{
  \item{m}{covariance matrix or (partial) correlation matrix}
  \item{x}{data matrix or data frame}
  \item{exact.inversion}{determines whether the inverse is computed
        exactly (using \code{\link{solve}}) or via \code{\link{pseudoinverse}}  }
  \item{use}{an optional character string giving a method for computing covariances in the
             presence of missing values. This must be one of the strings "all.obs" (default),
	     "complete.obs" or "pairwise.complete.obs".}
  \item{method}{a character string indicating which correlation coefficient (or covariance) is to be computed.
                One of "pearson" (default), "kendall", or "spearman".}
  \item{\dots}{options passed to \code{\link{pseudoinverse}}}
}
\description{
  \code{cor2pcor} computes the pairwise 
  \emph{partial} correlation coefficients from either a correlation 
  or a covariance matrix. The partial correlations represent the direct
  interactions between two variables, with the indirect effects of all
  remaining variables removed.

  \code{pcor2cor} takes a partial correlation matrix and computes
  the corresponding correlation matrix.
  
  \code{partial.cor} computes a partial correlation matrix directly from the
  data (\code{partial.cor(x)} is the same as \code{cor2pcor(cor(x))}).
  
  The underlying algorithms are based on computing the inverse of the
  covariance or correlation matrix - see Whittaker (1990) for details.
  For stability reasons and to allow near-singular matrices  the default
  matrix inversion is obtained via the function  \code{\link{pseudoinverse}} 
  rather than using \code{\link{solve}}.
}

\value{
  A matrix with the pairwise partial correlation coefficients
  (\code{cor2pcor} and \code{pcor}) or with pairwise
  correlations (\code{pcor2cor})
}


\author{
  Juliane Schaefer (\url{http://www.stat.uni-muenchen.de/~schaefer/}) and
  Korbinian Strimmer (\url{http://www.stat.uni-muenchen.de/~strimmer/}).
}
\references{
  Whittaker J. (1990).  Graphical Models in Applied Multivariate Statistics.
   John Wiley, Chichester.
}

\seealso{\code{\link{cor}}, \code{\link{pseudoinverse}}.}

\examples{
# load GeneTS library
library(GeneTS)

# covariance matrix
m.cov <- rbind(
 c(3,1,1,0),
 c(1,3,0,1),
 c(1,0,2,0),
 c(0,1,0,2)
)
m.cov

# corresponding correlation matrix
m.cor.1 <- standardize.cov(m.cov)
m.cor.1

# compute partial correlations (from covariance matrix)
m.pcor.1 <- cor2pcor(m.cov)
m.pcor.1

# compute partial correlations (from correlation matrix)
m.pcor.2 <- cor2pcor(m.cor.1)
m.pcor.2

zapsmall( m.pcor.1 ) == zapsmall( m.pcor.2 )

# backtransformation
m.cor.2 <- pcor2cor(m.pcor.1)
m.cor.2
zapsmall( m.cor.1 ) == zapsmall( m.cor.2 )
}
\keyword{multivariate}

\eof
\name{periodogram}
\alias{periodogram}

\title{Periodogram Power Spectral Density}
\usage{
periodogram(x, method = "builtin")
}
\arguments{
  \item{x}{vector or matrix containing the time series data
         (one time series per column)}
  \item{method}{a string that specifies which method should be used to
     compute the spectral density: "builtin" employs the function
     \code{\link{spectrum}} with the options 
     taper=0, plot=FALSE, fast=FALSE, detrend=FALSE, and demean=TRUE; 
     "clone" employs directly the Fourier transform function \code{\link{fft}}
     (with sames results as "builtin"); and "smooth" uses the
     function \code{\link{spectrum}} with options as above plus span=3.
     
     }
}
\description{
  \code{periodogram} is a wrapper function for \code{\link{spectrum}}
  with some special options set. It
   returns the power spectral density, i.e. the
  squared modulus of the Fourier coefficient divided by the length
  of the series, for multiple time series as well as the corresponding 
  Fourier frequencies. The frequencies range between 
   0 and the Nyquist critical frequency fc = \code{\link{frequency}}(x)/2. 
  
  \code{periodogram}  is used by the functions
  \code{\link{avgp}} and \code{\link{fisher.g.test}}.
  For general periodogram functions
  please refer to \code{\link{spectrum}}.
}

\value{

  A list object with the following components:
  \item{spec}{A vector or matrix with the estimated power spectral densities
             (one column per time series).}
  \item{freq}{A vector with frequencies f ranging from 0 to fc 
              (if the sampling rate \code{\link{frequency}}(x)) equals 1 then fc = 0.5).
              Angular frequencies may be obtained by multiplication with 2*pi
	      (i.e. omega = 2*pi*f).}
}


\author{
  Konstantinos Fokianos (\url{http://www.ucy.ac.cy/~fokianos/}) and
  Korbinian Strimmer (\url{http://www.stat.uni-muenchen.de/~strimmer/}).
}
\seealso{\code{\link{spectrum}}, \code{\link{avgp}}, \code{\link{fisher.g.test}}.}

\examples{
# load GeneTS library
library(GeneTS)

# load data set
data(caulobacter)

# how many genes and how many samples?
dim(caulobacter)

# periodograms of the first 10 genes
periodogram(caulobacter[,1:10])
}
\keyword{ts}

\eof
\name{pseudoinverse}
\alias{pseudoinverse}

\title{Pseudoinverse of a Matrix}
\usage{
pseudoinverse(m, tol = sqrt(.Machine$double.eps))
}
\arguments{
  \item{m}{matrix}
  \item{tol}{tolerance - singular values larger than
             tol times the maximum singular value are considered non-zero}
}
\description{
  The standard definition for the inverse of a matrix fails 
  if the matrix is not square or singular. However, one can
  generalize the inverse using singular value decomposition.
  Any rectangular real matrix M can be decomposed as

                             M = U diag(D[i] V',

  where U and V are orthogonal, V' means V transposed, and 
  D is a diagonal matrix with the singular values (see \code{\link{svd}}).
  The pseudoinverse (also known as MoorePenrose inverse) is then
  obtained as
  
                            IM = V diag(1/D[i]) U' .
			    
  If the matrix M is singular or ill-conditioned the inverse is approximated
  by setting 1/D[i] <- 0 for small singular values (D[i] <= tol*max(D)).  
  The pseudoinverse has the property that the sum of the squares of all
  the entries in (IM \%*\% M - I), where I is an appropriate
  identity matrix, is minimized. For non-singular matrices the
  pseudoinverse is equivalent to the standard inverse.
}

\value{
  A matrix (the pseudoinverse of m).
}


\author{
  Korbinian Strimmer (\url{http://www.stat.uni-muenchen.de/~strimmer/}).
}
\seealso{\code{\link{svd}}, \code{\link{solve}}, \code{\link{ginv}}.}

\examples{
# load GeneTS library
library(GeneTS)

# a singular matrix
m <- rbind(
c(1,2),
c(1,2)
)

# not possible to invert exactly
try(solve(m))

# pseudoinverse
p <- pseudoinverse(m)
p

# characteristics of the pseudoinverse
zapsmall( m \%*\% p \%*\% m )  ==  zapsmall( m )
zapsmall( p \%*\% m \%*\% p )  ==  zapsmall( p )
zapsmall( p \%*\% m )  ==  zapsmall( t(p \%*\% m ) )
zapsmall( m \%*\% p )  ==  zapsmall( t(m \%*\% p ) )


# example with an invertable matrix
m2 <- rbind(
c(1,1),
c(1,0)
)
zapsmall( solve(m2) ) == zapsmall( pseudoinverse(m2) )
}
\keyword{algebra}

\eof
\name{robust.boot}
\alias{robust.boot}

\title{Robust Error Resistant Bootstrap Algorithm}

\usage{
robust.boot(data, statistic, R)
}

\arguments{
  \item{data}{data matrix or data frame (each row is considered as one multivariate observation)}
  \item{statistic}{A function which when applied to data returns a vector
          containing the statistic(s) of interest}
  \item{R}{number of bootstrap replicates}
}
\description{
  \code{robust.boot} generates ordinary nonparametric bootstrap replicates. If an error occurs during the
  function evaluation (e.g., due to numerical problems) the bootstrap draw is repeated. 

  \code{robust.boot} offers only very limited bootstrap support, for much more advanced bootstrapping methods
   use \code{\link{boot}}.
 
}

\details{
  \code{robust.boot} is used in the functions \code{\link{bagged.cov}}, \code{\link{bagged.cov}},
  and \code{\link{bagged.pcor}}.
  
}
\value{
  A list with one component:
  \item{t}{a matrix with 'R' rows each of which is a bootstrap replicate of 'statistic'.}


  
}
\author{
  Korbinian Strimmer (\url{http://www.stat.uni-muenchen.de/~strimmer/}).
}

\seealso{\code{\link{boot}}, \code{\link{bagged.pcor}}.}

\examples{
# load GeneTS library
library(GeneTS)

# small example data set 
data(caulobacter)
dat <- caulobacter[,1:15]
dim(dat)

# test statistic: vector of means 
test.fun <- function(data, i)
{
  res <- apply(data[i,], 2, mean) 
  if (runif(1) < .01) stop("Error!") # in 1 percent of cases an error occurs ... 
  return(res)
}

# perform bootstrap
b.out <- robust.boot(dat, test.fun, 1000)

# despite the errors bootstrapping has finished
dim(b.out$t)

# bootstrap means
bag <- apply(b.out$t, 2, mean)
bag
}
\keyword{nonparametric}

\eof
\name{standardize.cov}
\alias{standardize.cov}
\alias{rebuild.cov}

\title{Standardize Covariance Matrix}
\usage{
standardize.cov(m)
rebuild.cov(r, v)
}
\arguments{
  \item{m}{covariance matrix}
  \item{r}{correlation matrix}
  \item{v}{variance vector}
}
\description{
  \code{standardize.cov} takes a covariance matrix and turns it into
  a correlation matrix by standardizing each entry with the product of the 
  corresponding standard deviations (so that all diagonal entries equal 1).
  
  \code{rebuild.cov} takes a correlation matrix and a vector with variances
  and reconstructs the corresponding covariance matrix.
}

\value{

  A matrix (correlation or covariance matrix).
}


\author{
  Korbinian Strimmer (\url{http://www.stat.uni-muenchen.de/~strimmer/}).
}
\seealso{\code{\link{cor}}, \code{\link{cov}}}

\examples{
# load GeneTS library
library(GeneTS)

# first 10 genes from the caulobacter data set
data(caulobacter)
m <- caulobacter[,1:10]

# covariance matrix
m.cov <- cov(m)
m.cov

# variances
m.var <- diag(m.cov)
m.var

# correlation matrix
m.cor.1 <- cor(m)
m.cor.1

# correlation matrix via covariance matrix
m.cor.2 <- standardize.cov(m.cov)
m.cor.2

zapsmall(m.cor.1) == zapsmall(m.cor.2)

# reconstruct covariance matrix
rebuild.cov(m.cor.1, m.var)

}
\keyword{multivariate}

\eof
\name{z.transform}
\alias{z.transform}
\alias{hotelling.transform}

\title{Variance-Stabilizing Transformations of the Correlation Coefficient}

\usage{
z.transform(r)
hotelling.transform(r, kappa)
}

\arguments{
  \item{r}{vector of sample correlations}
  \item{kappa}{degrees of freedom of the distribution of the correlation coefficient}
}
\description{
  \code{z.transform} implements Fisher's (1921)  first-order and Hotelling's (1953) 
  second-order transformations to stabilize the distribution of the correlation coefficient.
  After the transformation the data follows approximately a
  normal distribution  with constant variance (i.e. independent of the mean). 
  
  Hotelling's transformation requires the specification of the degree of freedom kappa of
  the underlying distribution. This depends on the sample size N used to compute the
  sample correlation and whether simple ot partial correlation coefficients are considered.
  If there are G variables, with G-2 variables eliminated, the degree of freedom is kappa=N-G+1.
  (cf. also \code{\link{cor0.estimate.kappa}} and \code{\link{dcor0}}). 
}


\value{
  The vector of transformed sample correlation coefficients.
}
\author{
  Korbinian Strimmer (\url{http://www.stat.uni-muenchen.de/~strimmer/}).
}
\references{
  Fisher, R.A. (1921). On the 'probable error' of a coefficient of correlation deduced from
  a small sample. \emph{Metron}, \bold{1}, 1--32.
 
  Hotelling, H. (1953). New light on the correlation coefficient and its transformation.
   \emph{J. Roy. Statist. Soc. B}, \bold{15}, 193--232.

}
\seealso{\code{\link{cor0.estimate.kappa}}, \code{\link{dcor0}},  \code{\link{kappa2N}}.}

\examples{
# load GeneTS library
library(GeneTS)

# small example data set 
r <- c(-0.26074194, 0.47251437, 0.23957283,-0.02187209,-0.07699437,
       -0.03809433,-0.06010493, 0.01334491,-0.42383367,-0.25513041)

# transformed data
z1 <- z.transform(r)
z2 <- hotelling.transform(r,7)
z1
z2
}
\keyword{univar}

\eof
