\name{khan}
\docType{data}
\alias{khan}
\alias{khanEset}
\alias{khan.geneNames}
\title{Khan microarray data}
A text file containing the Khan micorarray data}
\description{ A text file containing the Khan micorarray data}
\usage{
   data(khan)
   data(khanEset)
}
\format{
 The data set \code{khan} consists of 2310 rows and 65 columns. Row 1 has the 
  sample labels, Row 2 has the class labels.
  The remaining rows are gene expression. Column 1 is a dummy gene number. 
  Column 2 is the gene name. Remaining columns are gene expression.

 The data set \code{khanEset} contains an \code{exprSet} representation of the 
 same data. There is additionally a vector named, \code{khan.geneNames}
 containing the 2308 gene names. Note that there are 2308 genes and 64 
 samples (the numbers given in the preceeding paragraph include labels).
}

\details{}

\source{
Khan, J.  and  Wei, J.S.  and
 Ringner, M. and  Saal, L. and  Ladanyi, M. and
Westermann, F.  and  Berthold, F. and Schwab, M. and  Antonescu, C. and
Peterson, C. and and Meltzer, P. (2001) Classification and diagnostic prediction of cancers using gene expression
profiling and artificial neural network. Nature Medicine 7, 673-679.
}
\references{
Robert Tibshirani, Trevor Hastie, Balasubramanian Narasimhan, and Gilbert Chu  (2002).
  Diagnosis of multiple cancer types by shrunken centroids of gene expression 
  PNAS 99: 6567-6572.   Available at www.pnas.org
}
\example{
  data{khan}
}

\keyword{datasets}


\eof
\name{pamr.adaptthresh}
\alias{pamr.adaptthresh}
    \title{ A function  to adaptive choose threshold scales, for
use in pamr.train}
\description{A function  to adaptive choose threshold scales, for
use in pamr.train}

   
}
\usage{
pamr.adaptthresh(object, ntries = 10, reduction.factor = 0.9, full.out = F)
}

\arguments{
    
\item{object}{The result of a call to pamr.train }
\item{ntries}{Number of iterations to use in algorithm}
\item{reduction.factor}{Amount by which a scaling is reduced in one
step of the algorithm}
\item{full.out}{Provide more detailed  output}
}    

    
\details{
    \code{pamr.adaptthresh} Adaptively searches for  set of
good threshold scales.  The baseline (default) scale is 1 for
each class. The idea is that for easy to classify classes,
the threshold scale can be increased without increasing the error
rate for that class, and resulting in fewer genes needed for the
classification rule. The scalings from pamr.adaptthresh are then used
in pamr.train, and pamr.cv. The results may be better than those obtained
with the default values of threshold.scale.
}


   \references{

Robert Tibshirani, Trevor Hastie, Balasubramanian Narasimhan, and Gilbert Chu. "Diagnosis of multiple cancer types by shrunken centroids of gene expression"  PNAS 2002 99:6567-6572 (May 14). 

Robert Tibshirani,  Trevor Hastie, Balasubramanian Narasimhan, and Gilbert Chu (2002).
Class prediction by  
nearest shrunken centroids,with applications
to DNA microarrays. Stanford tech report.
 }

   
 

\author{ Trevor Hastie, Robert Tibshirani, Balasubramanian Narasimhan, and Gilbert Chu  }

\examples{
set.seed(120)
x <- matrix(rnorm(1000*20),ncol=20)
y <- sample(c(1:4),size=20,replace=TRUE)
mydata <- list(x=x,y=y)
mytrain <-   pamr.train(mydata)
new.scales <- pamr.adaptthresh(mytrain)

 
mytrain2 <- pamr.train(mydata, threshold.scale=new.scales)

myresults2 <- pamr.cv(mytrain2, mydata)

}
\keyword{ }


\eof
\name{pamr.batchadjust}
\alias{pamr.batchadjust}
\title{ A function to mean-adjust microarray data by batches}
\description{
 A function to mean-adjust microarray data by batches
}
\usage{
pamr.batchadjust(data)
}

\arguments{
    \item{data}{The input data. A list with components: x- an expression
	genes in the rows, samples in the columns, and y-  a vector of
	the class labels for each sample, and batchlabels- a vector of batch
labels for each sample.}
 This object if the same form as that produced by pamr.from.excel.}


\details{
    \code{pamr.batchadjust} 
does a genewise one-way ANOVA adjustment for expression values.
Let x(i,j) be the expression for gene i in sample j.
Suppose sample j in in batch b, and let B be the set of all samples
in batch b. Then \code{pamr.batchadjust}  adjusts  x(i,j) to
x(i,j) - mean[x(i,j)]  where the mean is taken over all samples j in B
}

\value{ A data object  of the same form as the input data, with x
replaced by the adjusted x}

   \references{}
   


\author{ Trevor Hastie,Robert Tibshirani, Balasubramanian Narasimhan, and Gilbert Chu  }

\examples{
set.seed(120)
#generate some data
x <- matrix(rnorm(1000*20),ncol=20)
y <- sample(c(1:4),size=20,replace=TRUE)
batchlabels <- sample(c(1:5),size=20,replace=TRUE)
mydata <- list(x=x,y=factor(y),batchlabels=factor(batchlabels))

mydata2 <- pamr.batchadjust(mydata)
}

\keyword{ }


\eof
\name{pamr.confusion}
\alias{pamr.confusion}
    \title{ A function giving a table of true versus predicted values,
 from a nearest shrunken centroid fit.}
\description{ A function giving a table of true versus predicted values,
 from a nearest shrunken centroid fit.}
   
}
\usage{
pamr.confusion(fit, threshold, extra=TRUE)
}

\arguments{
    
\item{fit}{The result of a call to pamr.train or pamr.cv}
\item{threshold}{The desired threshold value}
\item{extra}{If TRUE, class confusion rates are also computed}

    
}    
    
\details{
    \code{pamr.confusion} Gives a cross-tabulation of true versus
    predicted classes for the fit returned by pamr.train or pamr.cv,
    at the specified threshold.
}


   \references{}

   
 

\author{ Trevor Hastie, Robert Tibshirani, Balasubramanian Narasimhan, and Gilbert Chu  }

\examples{
set.seed(120)
x <- matrix(rnorm(1000*20),ncol=20)
y <- sample(c(1:4),size=20,replace=TRUE)
mydata <- list(x=x,y=y)
mytrain <-   pamr.train(mydata)
mycv <- pamr.cv(mytrain,mydata)
pamr.confusion(mytrain,  threshold=2)
pamr.confusion(mycv,  threshold=2)
 
}
\keyword{ }


\eof
\name{pamr.cv}
\alias{pamr.cv}
\title{ A function to cross-validate the nearest shrunken centroid
                         classifier}
\description{A function to cross-validate the nearest shrunken centroid
                         classifier produced by pamr.train}
   
\usage{
pamr.cv(fit, data,  nfold = min(table(data$y)), folds = balanced.folds(data$y), ...)}

\arguments{
    \item{fit}{The result of a call to pamr.train}
    \item{data}{A list with at least two components: x- an expression
        genes in the rows, samples in the columns, and y-  a vector of
        the class labels for each sample. Same form as data object
         used by pamr.train.}
    \item{nfold}{Number of cross-validation folds. Default is the
	smallest
	class size}
    \item{folds}{A list with nfold components, each component a vector of
	indices of the samples in that fold. By default a (random) balanced
	cross-validation is used}.
    \item{...} {Any additional arguments that are to be passed to pamr.train}
}  
    
\details{
    \code{pamr.cv} carries out cross-validation for a nearest shrunken
    centroid classifier.
}
\value{
    A list with components
    \item{threshold} {A vector of the thresholds tried in the shrinkage}
    \item{errors}{The number of cross-validation  errors for each threshold value}
    \item{loglik}{The cross-validated multinomial log-likelihood value
 	for each threshold value}
  \item{size}{A vector of the number of genes that survived the
      thresholding, for each threshold value tried.}.
  \item{yhat}{A matrix of size n by nthreshold, containing the
      cross-validated
      class predictions for each threshold value,  in each column}
   \item{prob}{A matrix of size n by nthreshold, containing the
      cross-validated
      class probabilities for each threshold value,  in each column}
  \item{folds}{ The cross-validation folds used}
   \item{call}{The calling sequence used}
   
   }

   \references{}


\author{ Trevor Hastie,Robert Tibshirani, Balasubramanian Narasimhan, and Gilbert Chu  }

\examples{
set.seed(120)
x <- matrix(rnorm(1000*20),ncol=20)
y <- sample(c(1:4),size=20,replace=TRUE)
mydata <- list(x=x,y=y)
mytrain <-   pamr.train(mydata)
mycv <- pamr.cv(mytrain,mydata)
}

\keyword{ }


\eof
\name{pamr.from.excel}
\alias{pamr.from.excel}
\title{ A function to read in a text file saved from Excel}
\description{ A function to read in a text file saved from Excel.
The spreadsheet is assumed to be of the format used by the SAM program.}
   
\usage{
pamr.from.excel(file, ncols, sample.labels = FALSE, batch.labels = FALSE) 
}

\arguments{
    \item{file}{Character name of a text file. This is assumed to be a tab-delimited text file saved from an excel spreadsheet from  "SAM". 
The spreadsheet has one row of expression values per gene. In addition
there is one information row and two information columns.  The first row
has class labels for each of the samples. The first column had gene identifiers,
and the second column has gene names. In the SAM program, for the multiclass
option, the samples must be labelled 1,2,3 etc. Here we allow general labels,
like "lymphoma", "colon cancer" etc 
}
\item{ncols}{Number of columns in file}
\item{sample.labels}{Optional argument. If true, "file" is assumed to
have an additional row at the top, consisting of two blank cells
followed by a sample labels for each of the columns. If available, these sample labels
are used by various plotting routines.} 
\item{batch.labels}{Optional argument. If true, "file" is assumed to
have an additional row at the top, consisting of two blank cells
followed by a batch labels for each of the columns.
If sample.labels=T as well, the row of batch labels are assumed to 
come after the row of sample labels.
The batch labels are used by the function pamr.batchadjust.
} 
}  
\details{
    \code{pamr.from.excel} {Reads in the  text file "file", and creates an
object with components x (the matrix of expression values), y- a vector of
class labels for each sample, geneid- a vector of gene identifiers and
genenames- a vector of gene names}
}
\value{
    A list with components
  \item{x} {the matrix of expression values}
\item{y} {a vector of
class labels for each sample,} 
\item{geneid} {a vector of gene identifiers}
\item{genenames}{a vector of gene names}
\item{samplelabels}{a vector of sample labels, if provided in "file"}
\item{batchlabels}{a vector of batch labels, if provided in "file"}
   }

   \references{ }

   
 

\author{ Trevor Hastie, Robert Tibshirani, Balasubramanian Narasimhan, and Gilbert Chu  }

\keyword{ }


\eof
\name{pamr.geneplot}
\alias{pamr.geneplot}
\title{ A function to plot  genes found by
                          the nearest shrunken centroid
                         classifier} 
   
\description{ function to plot  genes found by
                          the nearest shrunken centroid
                         classifier}

\usage{
pamr.geneplot(fit, data, threshold)
}

\arguments{
    \item{fit}{The result of a call to pamr.train}
    \item{data}{A data object, like that passed to pamr.train}
    \item{threshold}{Threshold for selecting which genes to plot}
}    
    
\details{
    \code{pamr.geneplot} 
plots the gene expression from a pamr fit, for genes which survive the "threshold".
The genes are plotted from strongest to weakest.
}
\value{}

   \references{}


\author{ Trevor Hastie,Robert Tibshirani, Balasubramanian Narasimhan, and Gilbert Chu  }


\keyword{ }


\eof
\name{pamr.internal}
\alias{pamr.internal}

\title{Internal functions used by pamr}
\alias{balanced.folds}
\alias{softmax}
\alias{descendants}
\alias{diag.disc}
\alias{enlist}
\alias{error.bars}
\alias{error.nsc}
\alias{misreg.simple}
\alias{nnmiss}
\alias{nsc}
\alias{nsccv}
\alias{pamr.cube.root}
\alias{pamr.pairscore}
\alias{pamr.predictmany}
\alias{pamr.xl.compute.confusion}
\alias{pamr.xl.compute.cv.confusion}
\alias{pamr.xl.compute.offset}
\alias{pamr.xl.derive.adjusted.prior}
\alias{pamr.xl.get.default.training.parameters}
\alias{pamr.xl.get.offset}
\alias{pamr.xl.get.sample.prior}
\alias{pamr.xl.get.uniform.prior}
\alias{pamr.xl.is.a.subset}
\alias{pamr.xl.listgenes.compute}
\alias{pamr.xl.plot.test.probs.compute}
\alias{pamr.xl.plot.training.error.compute}
\alias{pamr.xl.plotcen.compute}
\alias{pamr.xl.plotcv.compute}
\alias{pamr.xl.plotcvprob.compute}
\alias{pamr.xl.predict.test.class}
\alias{pamr.xl.predict.test.class.only}
\alias{pamr.xl.predict.test.probs}
\alias{pamr.xl.process.data}
\alias{pamr.xl.test.data.impute}
\alias{pamr.xl.test.errors.compute}
\alias{pamr.xl.transform.class.labels}
\alias{pamr.xl.transform.data}
\alias{pamr.xl.transform.test.data}
\alias{permute.rows}
\alias{print.nsc}
\alias{print.nsccv}
\alias{roc.nsc}
\alias{soft.shrink}
\alias{softmax}
}
\description{Internal functions used by pamr}
   
\usage{}

\arguments{}
    
\details{
These
functions are internal to the package and not meant to be called by
users. 
}

\value{}

\references{}


\author{ Trevor Hastie, Robert Tibshirani, Balasubramanian Narasimhan, and Gilbert Chu  }


\keyword{ }


\eof
\name{pamr.knnimpute}
\alias{pamr.knnimpute}
\title{ A function to impute missing expression data}
\description{ A function to impute missing expression data}

   
\usage{
pamr.knnimpute(data ,k)
}

\arguments{
\item{data}{The input data. A list with components: x- an expression
        genes in the rows, samples in the columns, and y-  a vector of
        the class labels for each sample. Same form as used by pamr.train,
 and same as that produced by pamr.from.excel}

\item{k}{Number of neighbors to be used in the imputation.Default=10 }
 } 
    
\details{
    \code{pamr.knnimpute} 
used k-nearest neighbors in the space of genes to impute missing
expression values.
For large data matrices with lots of missing values, this function can take a while
to run.}

\value{
  \item{data} {The input data list, with x replaced by the 
   imputed version of x}
   }

   \references{
Olga Troyanskaya, Michael Cantor, Gavin Sherlock, Pat Brown, Trevor Hastie, Robert Tibshirani, David Botstein and Russ B. Altman, Missing value estimation methods for DNA microarrays BIOINFORMATICS Vol. 17 no. 6, 2001 Pages 520-525 
 }

   
 

\author{ Trevor Hastie, Robert Tibshirani, Balasubramanian Narasimhan, and Gilbert Chu  }

\keyword{ }


\eof
\name{pamr.listgenes}
\alias{pamr.listgenes}
    \title{ A function to list the genes that survive the thresholding,
 	from the nearest shrunken centroid classifier}
\description{ A function to list the genes that survive the thresholding,
 	from the nearest shrunken centroid classifier produced by pamr.train}
   
}
\usage{
pamr.listgenes(fit, data, threshold, genenames=F)
}

\arguments{
\item{fit}{The result of a call to pamr.train}
    \item{data}{The input data.  In the same format as the input data for
                  pamr.train}

\item{threshold}{The desired threshold value}
\item{genenames}{Include genenames in the list? If yes, they
    are taken from "data". Default is false (geneid is always included
    in the list).}
}    
    
\details{
    \code{pamr.listgenes} List the geneids, and standardized centroids for
    each class, for genes surviving at the given threshold.
}


   \references{}

   
 

\author{ Trevor Hastie, Robert Tibshirani, Balasubramanian Narasimhan, and Gilbert Chu  }

\examples{
set.seed(120)
x <- matrix(rnorm(1000*20),ncol=20)
y <- sample(c(1:4),size=20,replace=TRUE)
mydata <- list(x=x,y=y,geneid=as.character(1:1000))
mytrain <-   pamr.train(mydata)
mycv <- pamr.cv(mytrain,mydata)
pamr.listgenes(mytrain, mydata, threshold=1.6)
 
}
\keyword{ }


\eof
\name{pamr.makeclasses}
\alias{pamr.makeclasses}
\title{A function to interactively define classes from a clustering tree}
\description{function to interactively define classes from a clustering tree}
\usage{
pamr.makeclasses(data,  sort.by.class=F,...)
}

\arguments{
    \item{data}{The input data. A list with components: x- an expression
	genes in the rows, samples in the columns, and y-  a vector of
	the class labels for each sample, and batchlabels- a vector of batch
labels for each sample.
 This object if the same form as that produced by pamr.from.excel.}
\item{sort.by.class}{Optional argument. If true, the clustering tree
is forced to put all samples in the same class  (as defined by the
class labels y in `data') together in the tree. This is useful
if a regrouping of classes is desired. Eg: given classes 1,2,3,4
you want to define new classes [1,3] vs [2,4]  or 2 vs [1,3]}
\item{...} {Any additional arguments to be passed to hclust}
}

\details{
    \code{pamr.makeclasses} 
Using this function the user interactively defines a new set of classes,
to be used in pamr.train, pamr.cv etc. 
After invoking pamr.makeclasses, a clustering tree is drawn.
This callss the R function \code{hclust}, and any arguments for
\code{hclust} can be passed to it.
Using the left button, the user clicks at the junction point defining
the subgroup 1. More groups can be added to class 1 by clicking
on further junction points. The user ends the definition of class 1 by
clicking on the rightmost button [in Windows,  an additional menu appears
and he chooses Stop] . This process is continued for classes 2,3 etc.
Note that some sample may be left out of the new classes.
Two consecutive clicks of the right button ends the definition for all classes.

At the end, the clustering is redrawn, with the new class labels shown.

Note: this function is "fragile". The user must click close to the junction
point, to avoid confusion with other junction points. Classes 1,2,3..
cannot have samples in common (if they do, an Error message will appear).
If the function is confused about the desired choices, it will
complain and ask the user to rerun pamr.makeclasses. The user should
also check that the labels on the  final redrawn cluster tree agrees with the desired 
classes.
}

\value{A vector of class labels 1,2,3...
 If a component is NA (missing), then the
sample is not assigned to any class.
This vector should be assigned to  the newy component of data, for use in pamr.train etc.
Note that pamr.train uses the class labels in the component ``newy'' if
it is present. Otherwise it uses the data labels ``y''.
 }

   \references{}
   
 

\author{ Trevor Hastie, Robert Tibshirani, Balasubramanian Narasimhan, and Gilbert Chu  }

\examples{
set.seed(120)
#generate some data
x <- matrix(rnorm(1000*20),ncol=20)
y <- sample(c(1:4),size=20,replace=TRUE)
batchlabels <- sample(c(1:5),size=20,replace=TRUE)
mydata <- list(x=x,y=factor(y),batchlabels=factor(batchlabels))

# mydata$newy <- pamr.makeclasses(mydata)    Run this and defien some new classes

train <- pamr.train(mydata)
results <- pamr.cv(train, mydata)
}

\keyword{ }


\eof
\name{pamr.menu}
\alias{pamr.menu}
\title{A function that interactively leads the user through a PAM analysis}
\description{A function that interactively leads the user through a PAM analysis}

   
\usage{
pamr.menu(data)
}

\arguments{
    \item{data}{A list with at least two components: x- an expression
        genes in the rows, samples in the columns), and y-  a vector of
        the class labels for each sample. Same form as data object
         used by pamr.train.}
}    
    
\details{
    \code{pamr.menu} provides a menu for training, cross-validating
and plotting a nearest shrunken centroid analysis.
}
\value{
   }

   \references{}


\author{ Trevor Hastie, Robert Tibshirani, Balasubramanian Narasimhan, and Gilbert Chu  }

\examples{
set.seed(120)
x <- matrix(rnorm(1000*20),ncol=20)
y <- sample(c(1:4),size=20,replace=TRUE)
mydata <- list(x=x,y=y)
#  pamr.menu(mydata)
}

\keyword{ }


\eof
\name{pamr.plotcen}
\alias{pamr.plotcen}
\title{ A function to plot the shrunken class centroids, from the nearest shrunken centroid classifier}
\description{ A function to plot the shrunken class centroids, from the
    nearest shrunken centroid classifier produced by pamr.train}
   
}
\usage{
pamr.plotcen(fit, data, threshold)
}

\arguments{
    \item{data}{The input data, in the same form as that used
 by pamr.train},
\item{fit}{The result of a call to pamr.train}
\item{threshold}{The desired threshold value}
}    
    
\details{
    \code{pamr.plotcen} plots the shrunken class centroids for each
    class,
    for genes surviving the threshold for at least once class. If
    genenames
    are included in "data", they are added to the plot. Note: for many
    classes
    and long gene names, this plot may need some manual prettying.
}


   \references{
 }

   
 

\author{ Trevor Hastie, Robert Tibshirani, Balasubramanian Narasimhan, and Gilbert Chu  }

\examples{
set.seed(120)
x <- matrix(rnorm(1000*20),ncol=20)
y <- sample(c(1:4),size=20,replace=TRUE)
mydata <- list(x=x,y=y,genenames=as.character(1:1000))
mytrain <-   pamr.train(mydata)
mycv <- pamr.cv(mytrain,mydata)
pamr.plotcen(mytrain, mydata,threshold=1.6)
 
}
\keyword{ }


\eof
\name{pamr.plotcv}
\alias{pamr.plotcv}
\title{ A function to plot the cross-validated  error curves from
                          the nearest shrunken centroid
                         classifier} 
\description{A function to plot the cross-validated error curves
                          the nearest shrunken centroid
                         classifier}
   
\usage{
pamr.plotcv(fit)
}

\arguments{
    \item{fit}{The result of a call to pamr.cv}
}    
    
\details{
    \code{pamr.plotcv} plots the cross-validated misclassification error
curves,   from nearest shrunken
    centroid classifier. An overall plot, and a plot by class, are produced.
}
\value{
   }

   \references{}


\author{ Trevor Hastie,Robert Tibshirani, Balasubramanian Narasimhan, and Gilbert Chu  }

\examples{
set.seed(120)
x <- matrix(rnorm(1000*20),ncol=20)
y <- sample(c(1:4),size=20,replace=TRUE)
mydata <- list(x=x,y=y)
mytrain <-   pamr.train(mydata)
mycv <-  pamr.cv(mytrain, mydata)
pamr.plotcv(mycv)
}

\keyword{ }


\eof
\name{pamr.plotcvprob}
\alias{pamr.plotcvprob}
\title{ A function to plot the cross-validated sample probabilities from
                          the nearest shrunken centroid
                         classifier} 
\description{A function to plot the cross-validated sample probabilities from 
                          the nearest shrunken centroid
                         classifier}
   
\usage{
pamr.plotcvprob(fit, data, threshold)
}

\arguments{
    \item{fit}{The result of a call to pamr.cv}
    \item{data}{A list with at least two components: x- an expression
        genes in the rows, samples in the columns), and y-  a vector of
        the class labels for each sample. Same form as data object
         used by pamr.train.}
    \item{threshold}{Threshold value to be used}
}    
    
\details{
    \code{pamr.plotcvprob} plots the cross-validated sample probabilities  the from nearest shrunken
    centroid classifier, stratified by the true classses.
}
\value{
   }

   \references{}


\author{ Trevor Hastie,Robert Tibshirani, Balasubramanian Narasimhan, and Gilbert Chu  }

\examples{
set.seed(120)
x <- matrix(rnorm(1000*20),ncol=20)
y <- sample(c(1:4),size=20,replace=TRUE)
mydata <- list(x=x,y=y)
mytrain <-   pamr.train(mydata)
mycv <-  pamr.cv(mytrain,mydata)
pamr.plotcvprob(mycv,mydata,threshold=1.6)



}

\keyword{ }


\eof
\name{pamr.predict}
\alias{pamr.predict}
    \title{ A function producing predicted information,
 from a nearest shrunken centroid fit.}
\description{A function producing predicted information,
 from a nearest shrunken centroid fit}
   
}
\usage{
pamr.predict(fit, newx, threshold, type= c("class", "posterior", "centroid", "nonzero"), prior = object$prior, threshold.scale = object$
        threshold.scale)
}

\arguments{
    
\item{fit}{The result of a call to pamr.train }
\item{newx}{Input (feature) matrix for which predictions are desired}
\item{threshold}{The desired threshold value}
\item{type}{Type of prediction desired: class predictions, posterior
    probabilities, (unshrunken) class centroids, vector of  genes
    surviving the threshold}
\item{prior}{Prior probabilities for each class. Default is that
specified in "fit"}
 \item{threshold.scale}{Additional scaling factors to be applied
        to the thresholds. Vector of length equal to the number of
        classes.
Default is that
specified in "fit".}
}

    
\details{
    \code{pamr.predict} Give a cross-tabulation of true versus
    predicted classes for the fit returned by pamr.train or pamr.cv,
    at the specified threshold
}


   \references{ }

   
 

\author{ Trevor Hastie, Robert Tibshirani, Balasubramanian Narasimhan, and Gilbert Chu  }

\examples{
set.seed(120)
x <- matrix(rnorm(1000*20),ncol=20)
y <- sample(c(1:4),size=20,replace=TRUE)
mydata <- list(x=x,y=y)
mytrain <-   pamr.train(mydata)
mycv <- pamr.cv(mytrain,mydata)
pamr.predict(mytrain, mydata$x , threshold=1)
 
}
\keyword{ }


\eof
\name{pamr.to.excel}
\alias{pamr.to.excel}
\title{ A function to write out a data object into a tab-delimited text file}
\description{ A function to write out a data object into a tab-delimited text file}

   
\usage{
pamr.to.excel(data,file,trace=TRUE)
}

\arguments{
\item{data}{A data object, of the same form as is read in by pamr.from.excel.
Must have components x (the matrix of expression values), y- a vector of
class labels for each sample, geneid- a vector of gene identifiers and
genenames- a vector of gene names. Optional components: samplelabels and
batchlabels, both character vectors}.
    \item{file}{Character name of a text file. }
\item{trace}{Optional argument. If true, progress in writing out file is reported.
}    }
    
\details{
    \code{pamr.to.excel} 
writes out the data object into a tab-delimited text file, of the same
form as is read in by pamr.from.excel.
Useful for writing out data that has been imputed by pamr.knnimpute or
adjusted by pamr.batchadjust. Note- this function writes the file out
one line at a time, and hence can take a while for big datasets.
}


   
 

\author{ Trevor Hastie, Robert Tibshirani, Balasubramanian Narasimhan, and Gilbert Chu  }

\keyword{ }


\eof
\name{pamr.train}
\alias{pamr.train}
\title{ A function to train a nearest shrunken centroid
                         classifier}
\description{
    A function that computes a nearest shrunken centroid for gene
    expression
    (microarray) data
}
\usage{
pamr.train(data, gene.subset=1:nrow(data$x), sample.subset=1:ncol(data$x),
         threshold = NULL, n.threshold = 30, 
        scale.sd = T, threshold.scale = NULL, se.scale = NULL, offset.percent = 50,
         prior = n.class/n, remove.zeros = T, sign.contrast="both")
}

\arguments{
    \item{data}{The input data. A list with components: x- an expression
	genes in the rows, samples in the columns), and y-  a vector of
	the class labels for each sample}
      Optional components-
	genenames,
    a vector of gene names, and geneid- a vector of gene identifiers.
    \item{gene.subset}{Subset of genes to be used.  Can be either
      a logical vector of length total number of genes, or a list
     of integers of the row numbers of the genes to be used}
   \item{sample.subset} {Subset of samples to be used.  Can be either
      a logical vector of length total number of samples, or a list
  of integers of the column numbers of the samples to be used.}
    \item{threshold}{A vector of threshold values for the centroid
	shrinkage.
	Default is a set of 30 values chosen by the software}
    \item{n.threshold}{Number of threshold values desired (default 30)}
    \item{scale.sd}{Scale each threshold by the wthin class standard
	deviations? Default: true}
    \item{threshold.scale}{Additional scaling factors to be applied
	to the thresholds. Vector of length equal to the number of
	classes.
    Default- a vectors of ones.}
    \item{se.scale}{Vector of scaling factors for the within class
	standard errors. Default is sqrt(1/n.class-1/n), where n is
	the overall sample size and n.class is the sample sizes in each
	class. This default adjusts for different class sizes.}
    \item{offset.percent}{Fudge factor added to  the denominator of each t-statistic,
expressed as a percentile of the gene standard deviation values.
	This is a small positive quantity to penalize genes with
	expression
	values near zero, which can result in very large ratios. This
	factor
	is expecially impotant for Affy data. Default
	is the median of the standard deviations of each gene.}
    \item{prior}{Vector of length the number of classes, representing
	prior probabilities for each of the classes. The prior is used
	in Bayes rule for making class prediction. Default is n.class/n,
	where  n is the overall sample size and n.class is the sample
	sizes in   each class.}
    \item{remove.zeros}{Whether to remove multiple solutions
         having zero genes}
      \item{sign.contrast}{Directions of allowed deviations of class-wise average gene  expression from the 
overall average  gene expression. Default is ``both'' (positive or negative).
Can also  be set to ``positive'' or ``negative''.}
}
\details{
    \code{pamr.train} fits a nearest shrunken centroid classifier to gene
    expression data. Details may be found in the PNAS paper referenced
below. One feature not described there is "heterogeneity analysis".
Suppose there are two classes labelled "A" and "B".
CLass "A" is considered a normal class, and "B" an abnormal class.
Setting hetero="A" transforms   expression values x[i,j] to
|x[i,j]- mean(x[i,j])| where the mean is taken only over samples in
class "A". The transformed feature values are then used in Pam.
This is useful when the abnormal class "B" is heterogeneous, i.e.
a given gene might have higher expresion than normal for some
class "B" samples, and lower for others.
With more than 2 classes, each class is centered on the class specified
by hetero.
}

\value{
A list with components
  \item{y}{The outcome classes.} 
  \item{yhat}{A matrix of predicted classes, each column representing
      the results from one threshold.}.
  \item{prob}{A array of predicted class probabilities. of dimension
      n by nclass by n.threshold. n is the number samples, nclass is the number
      of classes, n.threshold is the number of thresholds tried}
  \item{centroids}{A matrix of (unshrunken) class centroids, n by
      nclass}
\item{hetero}{Value of hetero used in call} to pamr.train
\item{norm.cent}{Centroid of "normal group, if hetero was specified}
  \item{centroid.overall} {A vector containing the (unshrunken) overall
      centroid (all classes together)}
  \item{sd}{A vector of the standard deviations for each gene}
  \item{threshold} {A vector of the threshold tried in the shrinkage}
  \item{nonzero}{A vector of the number of genes that survived the
      thresholding, for each threshold value tried}
  \item{threshold.scale}{A vector of threshold scale factors that were
      used}
   \item{se.scale}{A vector of standard error scale factors that were
       used}
   \item{call}{The calling sequence used}
   \item{prior}{The prior probabilities used}
   \item{errors}{The number of trainin errors for each threshold value}
   }

   \references{
Robert Tibshirani, Trevor Hastie, Balasubramanian Narasimhan, and Gilbert Chu 
  Diagnosis of multiple cancer types by shrunken centroids of gene expression 
  PNAS 99: 6567-6572.   Available at www.pnas.org}

   
 

\author{ Trevor Hastie,Robert Tibshirani, Balasubramanian Narasimhan, and Gilbert Chu  }

\examples{
set.seed(120)
#generate some data
x <- matrix(rnorm(1000*20),ncol=20)
y <- sample(c(1:4),size=20,replace=TRUE)
mydata <- list(x=x,y=factor(y))

#run classifier
results<-   pamr.train(mydata)

# run classifier on all  data except class 4
results2 <- pamr.train(mydata,sample.subset=(mydata$y!=4))
 
# run classifier on  only the first 500 genes
results3 <- pamr.train(mydata,gene.subset=1:500)

}
\keyword{ }


\eof
