\name{bslnoff}
\alias{bslnoff}
\title{Baseline Substraction}
\description{
This function estimates the baseline and then removes baseline from the raw spectrum.
}
\usage{
bslnoff(f, breaks = 200, qntl = 0, method = c("loess", "approx"), bw = 0.005, plot = FALSE, ...)
}
\arguments{
  \item{f}{a matrix with M/Z values in the first column and intensities in the second column }
  \item{breaks}{number of breaks set to M/Z values for finding the local minima or points below a centain quantile of intensities; breaks -1 equally spaced intervals on the log M/Z scale. }
  \item{qntl}{if 0, find local minima; if >0 find intensities < qntl*100th quantile locally.}
  \item{method}{"loess" or "approx" (linear interpolation).}
  \item{bw}{the bandwidth to be passed to loess.}
  \item{plot}{TRUE or FALSE, if true, it will plot the raw spectrum, theestimated baseline and the baseline substracted spectrum.}
  \item{\dots}{parameters for plot.}
}
\value{
a matrix of two columns: the first column being the M/Z values same as 
the input, and the second column being the baseline substracted spectra.
}
\author{Xiaochun Li}
\examples{
fdat <- system.file("Test", package = "PROcess")
fs <- list.files(fdat, pattern="*csv*", full.names=TRUE)
f1 <- read.files(fs[1])
fcut <- f1[f1[,1]>0,]
bseoff <-bslnoff(fcut,method="loess",plot=TRUE, bw=0.1)
title(basename(fs[1]))
}
\keyword{nonparametric}

\eof
\name{getMzs}
\alias{getMzs}
\title{Extract M/Z values from the biomarker dataframe. }
\description{
Turn column names of the biomarker dataframe to numeric
M/Z values.
}
\usage{
getMzs(df)
}
\arguments{
  \item{df}{The biomarker dataframe with rows as spectra and columns as biomarkers.}
}
\value{
A numeric vector.
}
\keyword{utilities}

\eof
\name{getPeaks}
\alias{getPeaks}
\title{Peak Detection}
\description{
For given threshold criteria, find peaks.
}
\usage{
getPeaks(bseoffM, peakinfofile,SoN = 2,span = 81,sm.span=11, 
zerothrsh=2, area.w = 0.003, ratio = 0.2)
}
\arguments{
  \item{bseoffM}{a matrix holding the baseline-substracted
          spectra, with  row-names as the m/z values and
          column-names  as the spectrum names.}
  \item{peakinfofile}{a `.csv' file in the same format as  
		Ciphergen's peak info file, with 5 columns 
		data.  More details later.}
  \item{SoN}{see isPeak().}
  \item{span}{see isPeak().}
  \item{sm.span}{see isPeak().}
  \item{zerothrsh}{ignore peaks whose intensity values are below zerothrsh.}
   \item{area.w}{see isPeak().}
  \item{ratio}{see isPeak().}
}
\details{
For given threshold criteria, detect peaks and write the 
following columns of information into 'peakinfofile', 
spectrum name (Spectrum.Tag), spectrum sequential number
(Spectrum.), peak sequential number within a spectrum (Peak.),
relative intensity (Intensity) and the m/z value where the 
relative intensity occurs (Substance.Mass).
}
\author{Xiaochun Li}
\seealso{\code{\link{rmBaseline}}}
\examples{
example(renorm)
peakfile <- paste(tempdir(),"testpeakinfo.csv", sep="/")
getPeaks(rtM, peakfile)
}
\keyword{nonparametric}

\eof
\name{intg}
\alias{intg}
\title{Integration}
\description{
This function calculates the integration of y with respect to x.
}
\usage{
intg(y, x)
}
\arguments{
\item{x}{a vector of real values, not necessarily sorted.} 
\item{y}{a vector of function values at x.}
}
\details{
 (x,y)s are sorted according to ascending x values and then
 the integration is calculated as sum of the products of average of adjacent y values and the difference of adjacent x values.
}
\author{Xiaochun Li}
\examples{
x <- seq(0, 1,length=100)
y <- x^2
intg(y, x)
}
\keyword{math}

\eof
\name{isPeak}
\alias{isPeak}
\title{Locate Peaks in a Spectrum}
\description{
Find local maxima of a spectrum.
}
\usage{
isPeak(f,SoN = 2,span = 81,sm.span=11,plot=FALSE,add = FALSE,
zerothrsh=2,area.w = 0.003, ratio = 0.2) }
\arguments{
  \item{f}{a matrix of two columns representing a spectrum, with the 
		first column the m/z value and second the intensity}
  \item{SoN}{signal to noise ratio criterion for peak detection}
  \item{span}{parameter for estimating local variance before 
	peak detection; default is 81 points, that is, 40 
	points to the left and right of a point of which the 
	variance is being estimated.}
  \item{sm.span}{parameter for smoothing the spectrum before 
	peak detection; default is 11 points, that is, 5 
	points to the left and right of a point being smoothed}
  \item{plot}{logical, plot the smoothed spectrum and peaks? }
  \item{add}{add to the existing raw and baseline-substracted plot?}
  \item{zerothrsh}{ignore peaks whose intensity values are below zerothrsh.}
  \item{area.w}{the neighbourhood of a peak m/z,  mz*(1-area.w, 1+area.w).} to calculate area of the peak,
  \item{ratio}{if area/max(area) > ratio, the peak is retained.} 
}
\details{
A spectrum is smoothed first using the nearest `span' 
neighbours. A larger span `sm.span' is needed to estimate
the local variation so that it is not overestimated due to
the peaks nearby.  Then potential peaks are identified using 
Ripley's `peaks' function with `span' points.

Peaks that satify the conditions that the (smoothed) signal 
to noise ratio is greater than `SoN' and that the smoothed 
signal is greater than 1.64 * mad(sm) are returned.
}
\value{
A data frame with five components, `peak',`smooth',`mz' and
     `sigmas', each of length the number of rows in `f'. 
     `peak' is logical indicating whethere there is a peak or not 
     (Y/N), `smooth' the smooth of the spectrum, `mz' the same as 
     `f[,1]', `sigmas' the estimates of local variation and `area' the area associated with each peak after the first pass.  
}
\author{Xiaochun Li}
\examples{
example(bslnoff)
pkobj <- isPeak(bseoff,span=81,sm.span=11,plot=TRUE)

}
\keyword{nonparametric}

\eof
\name{lnn}
\alias{lnn}
\title{Estimate Signal and Variation of a Spectrum }
\description{
Estimate the signal and the variation of a spectrum.
}
\usage{
lnn(x, span = 21, sm.span = 11)
}
\arguments{
  \item{x}{a vector of real values.}
  \item{span}{the window widith for estimation of local variation.}
  \item{sm.span}{the window width for estimation of the signal of x.}
}
\details{
The signal of a spectrum is estimated by moving average and its local variation is estimated by moving `mad', possibly in a large window.
}
\value{A list with two components:
  \item{fitted}{estimated signal,}
  \item{sigma}{estimated local variation.}
}
\keyword{nonparametric}

\eof
\name{peaks}
\alias{peaks}
\alias{noise}
\alias{sigma}
\title{Peak Detection}
\description{
Finds the local maxima, local noise and its associated standard 
deviations in a vector.
}
\usage{
peaks(x, span = 3)
noise(x, span = 5)
sigma(x, span = 5)
}
\arguments{
\item{x}{a vector.}
\item{span}{a local miximum is defined as an element in a sequence which is greater than all other elements within a window of width `span' centered at that element. The default value is 3, meaning that a peak is bigger than both of its neighbors. Local noise is definedas an element minus the mean of all elements within a window of width `span' centered at that element. Local standard deviation of an element is defined as the standard deviation of all elements within a window of width `span' centered at that element.}
}
\value{
a logical vector of the same length as `series' indicating where the peaks are.
}
\author{Xiaochun Li}
\examples{
x <- seq(0, 10*pi, by=0.1)
y <- sin(x)*x
plot(x,y, type="l")
is.max <- peaks(y)
points(x[is.max],y[is.max], pch=21, bg="red")
legend(2, 25, legend = "Peaks",pch = 19, col="red", bty = "n")

# can be used for local minima too:
# is.min <- peaks(-y)
# points(x[is.min],y[is.min], pch=21, bg="blue")
}
\keyword{nonparametric}

\eof
\name{pk2bmkr}
\alias{pk2bmkr}
\title{Find Biomarkers.}
\description{
Align peaks of spectra in `peakinfofile' and find biomarkers by a procedure described in Gentleman and Geyer (1994).
}
\usage{
pk2bmkr(peakinfofile, bseoffM, bmkfile, eps = 0.003, binary = F,p.fltr = 0.1)
}
\arguments{
  \item{peakinfofile}{a `.csv' file in the same format as   
	Ciphergen's peakinfo file with 5 columns  data, 
	Spectrum.Tag, Spectrum., Peak., Intensity and  
     	Substance.Mass.}
  \item{bseoffM}{a matrix holding the baseline-substracted 
	spectra, with row-names as the m/z values and 
	column-names  as the spectrum names.
}
  \item{bmkfile}{a `.csv' file in the same format as   
        Ciphergen's biomarker file, with spectra (samples) 
	as columns, and biomarkers as rows.}
  \item{eps}{expected experimental variation in the m/z values.}
  \item{binary}{output intensity or binary peak 
	presence/absence signals. }
  \item{p.fltr}{a number between 0 and 1. If a proto-biomarker 
	is identified as peak in > p.fltr x 100 percent of spectra, 
	it's kept in 'bmkfile'. }
}
\value{A dataframe with spectra as rows and biomarkers as 
columns. Spectrum labels and biomarker positions may be in 
the names of the dataframe.
}
\references{Gentleman, R. and Geyer, C.J. (1994). Maximum 
likelihood for interval censored data: Consistency and 
computation. Biometrika, 81:618--623.}
\author{Xiaochun Li}
\seealso{ \code{\link{rmBaseline}},\code{\link{getPeaks}}
}
\examples{
example(getPeaks)
bmkfile <- paste(tempdir(),"testbiomarker.csv",sep="/")
testBio <- pk2bmkr(peakfile, rtM, bmkfile)

## plot biomarker intensities of the 2 spectra

mzs <- as.numeric(rownames(rtM))
matplot(mzs, rtM, type="l", xlim=c(1000, 10000))

bks <- getMzs(testBio)
abline(v=bks, col="green")
}
\keyword{nonparametric}

\eof
\name{quality}
\alias{quality}
\title{Quality Check on a Set of Spectra}
\description{
Compute three quality parameters for a set of spectra.
}
\usage{
quality(csvfldr, peakinfofile, cutoff)
}
\arguments{
  \item{csvfldr}{a path to where the baseline-subtracted spectra are stored.}
  \item{peakinfofile}{a `.csv' file in the same format as 
	Ciphergen's peak info file, with 5 columns data. See Details of \code{getPeaks}.}
  \item{cutoff}{The point in m/z below which spectra are cutoff.}
}
\details{
The quality parameters are computed a la fashion de Mani.
1. Estimate noise by moving average with a 5 point window.
2. Estimate the noise envelop by 3 times the standard deviation of noise in a 251 point moving window.
3. Compute the area under the baseline-subtracted curve, area0.
4. Compute the area after subtracting the noise envelop from the baseline-subtracted curve, area1.
5. Parameter 'Quality' is defined as area1/area0.
6. Parameter 'Retain' is defined as the number of points with height above 5 times the noise envelop over total number of points in the spectrum.
7. Detect peaks in each spectrum by \code{getPeaks} or Ciphergen software.
8. Parameter 'peak' is defined as the number of peaks in a spectrum divided by the mean number of peaks across spectra.

A spectrum is considered to be of poor quality if Quality<0.4, Retain<0.1 and peak<0.5 simultaneously.
}
\value{
A matrix with three named columns, 'Quality','Retain' and 'peak', with spectrum file names as row names.
}
\references{Mani Refn? }
\author{Xiaochun Li}
\examples{
example(getPeaks)
qualRes <- quality(testdir, peakfile, cutoff=1500)
}
\keyword{math}

\eof
\name{read.files}
\alias{read.files}
\title{Read a Spectrum from a Comma Deliminated File}
\description{
Read a Spectrum from a Comma Deliminated File, maybe compressed.
}
\usage{
read.files(fn)
}
\arguments{
  \item{fn}{path to a `.csv' file, possibly compressed.}
}
\keyword{utilities}

\eof
\name{renorm}
\alias{renorm}
\title{Renormalize Spectra}
\description{
Renormalize spectra for m/z values greater than `cutoff'.
}
\usage{
renorm(Ma, cutoff)
}
\arguments{
  \item{Ma}{a matrix, with rows the m/z values and the columns the samples.}
  \item{cutoff}{a real value, before which the portion of a spectrum will be ignored.}
}
\details{
A sample of spectra will be normalized to have the same AUC,
the median of the AUCs of spectra. Each AUC is calculated as
the sum of the intensities whose m/z values are greater than
`cutoff'.
}
\value{
A matrix, with rows the m/z values and the columns the samples.
Only rows with m/z values greater than `cutoff' are kept.
}
\examples{
example(rmBaseline)
rtM <- renorm(testM, cutoff=1500)
}
\keyword{utilities}

\eof
\name{rmBaseline}
\alias{rmBaseline}
\title{Batch Baseline Subtraction.}
\description{
Baseline subtraction from each raw spectrum in 'fldr'.
}
\usage{
rmBaseline(fldr, outputname = "baseoffM", bseoffrda = NULL, breaks = 200, qntl = 0, method = "loess", bw = 0.1, SpecNames = list.files(fldr, pattern = "*csv*"))
}
\arguments{
  \item{fldr}{a path to where the raw spectra are stored}
  \item{outputname}{name of the matrix holding the
          baseline-substracted spectra, a matrix with 
	  row-names as the m/z values and column-names 
	  as the spectrum names.}
  \item{bseoffrda}{ optional; name of the file (with 
	extension .rda) where the baseline-substracted 
	spectra, a matrix with row-names as the m/z  values 
	and column-names as the spectrum tags, will be saved 
	to. }
  \item{breaks}{see bslnoff().}
  \item{qntl}{see bslnoff().}
  \item{method}{see bslnoff().}
  \item{bw}{see bslnoff().}
  \item{SpecNames}{a vector of character strings as spectrum 
	names.}
}
\value{ A matrix whose columns correspond to 
	baseline-subtracted spectra with row-names as 
 	the m/z values and column-names as the spectrum 
        names.
}
\author{Xiaochun Li}
\seealso{`bslnoff'.}
\examples{
testdir <- system.file("Test", package = "PROcess")
rmBaseline(testdir, "testM")
}
\keyword{nonparametric}

\eof
\name{specZoom}
\alias{specZoom}
\title{Plotting a Spectrum with Peaks}
\description{
Function for plotting an object returned by \code{isPeak}.
}
\usage{
specZoom(pks, xlim = NULL, cols = c("cyan", "red", "black"))
}
\arguments{
  \item{pks}{an object (a list) returned by \code{isPeak}.}
  \item{xlim}{a range of m/z values over which a zoomed-in view of the spectrum is desired.}
  \item{cols}{a vector of color specification for the smooth (signal), peaks and local noise.}
}
\examples{
example(isPeak)
specZoom(pkobj, xlim=c(5000, 10000))
}
\keyword{hplot}

\eof
