normalizeFragmentLength     package:aroma.light     R Documentation

_N_o_r_m_a_l_i_z_e_s _s_i_g_n_a_l_s _f_o_r _P_C_R _f_r_a_g_m_e_n_t-_l_e_n_g_t_h _e_f_f_e_c_t_s

_D_e_s_c_r_i_p_t_i_o_n:

     Normalizes signals for PCR fragment-length effects. Some or all
     signals are used to estimated the normalization function.  All
     signals are normalized.

_U_s_a_g_e:

     ## Default S3 method:
     normalizeFragmentLength(y, fragmentLengths, targetFcns=NULL, subsetToFit=NULL, .isLogged=TRUE, ..., .returnFit=FALSE)

_A_r_g_u_m_e_n_t_s:

       y: A 'numeric' 'vector' of length K of signals to be normalized
          across E enzymes.

fragmentLengths: An 'integer' KxE 'matrix' of fragment lengths.

targetFcns: A 'list' of E 'function's - one per enzyme.

subsetToFit: The subset of data points used to fit the normalization
          function. If 'NULL', all data points are considered.

.isLogged: A 'logical'.

     ...: Additional arguments passed to 'lowess'.

.returnFit: A 'logical'.

_V_a_l_u_e:

     Returns a 'numeric' 'vector' of the normalized signals.

_M_u_l_t_i-_e_n_z_y_m_e _n_o_r_m_a_l_i_z_a_t_i_o_n:

     It is assumed that the fragment-length effects from multiple
     enzymes added (with equal weights) on the intensity scale. The
     fragment-length effects are fitted for each enzyme separately
     based on units that are exclusively for that enzyme. _If there are
     no or very such units for an enzyme, the assumptions of the model
     are not met and the fit will fail with an error._ Then, from the
     above single-enzyme fits the average effect across enzymes is the
     calculated for each unit that is on multiple enzymes.

_A_u_t_h_o_r(_s):

     Henrik Bengtsson (<URL: http://www.braju.com/R/>)

_R_e_f_e_r_e_n_c_e_s:

     [1] H. Bengtsson, R. Irizarry, B. Carvalho, and T.P. Speed.
     _Estimation and assessment of raw copy numbers at the single locus
     level_, Bioinformatics, 2008. 

_E_x_a_m_p_l_e_s:

        # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
     # Example 1: Single-enzyme fragment-length normalization of 6 arrays
     # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
     # Number samples
     I <- 9;

     # Number of loci
     J <- 1000;

     # Fragment lengths
     fl <- seq(from=100, to=1000, length.out=J);

     # Simulate data
     y <- matrix(0, nrow=J, ncol=I);
     maxY <- 12;
     for (kk in 1:I) {
       k <- runif(n=1, min=3, max=5);
       mu <- function(fl) {
         maxY - fl^{1/k};
       }
       eps <- rnorm(J, mean=0, sd=1);
       y[,kk] <- mu(fl) + eps;
     }

     # Normalize data (to a zero baseline)
     yN <- apply(y, MARGIN=2, FUN=function(y) {
       normalizeFragmentLength(y, fragmentLengths=fl);
     })

     # Plot raw data
     layout(matrix(1:9, ncol=3));
     xlim <- c(0,max(fl));
     ylim <- c(0,max(y));
     xlab <- "Fragment length";
     ylab <- expression(log2(theta));
     for (kk in 1:I) {
       plot(fl, y[,kk], xlim=xlim, ylim=ylim, xlab=xlab, ylab=ylab);
       lines(lowess(fl, y[,kk]), col="red", lwd=2);
     }

     # Plot normalized data
     layout(matrix(1:9, ncol=3));
     ylim <- c(-1,1)*max(y)/2;
     for (kk in 1:I) {
       plot(fl, yN[,kk], xlim=xlim, ylim=ylim, xlab=xlab, ylab=ylab);
       lines(lowess(fl, yN[,kk]), col="blue", lwd=2);
     }

        # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
     # Example 2: Two-enzyme fragment-length normalization of 6 arrays
     # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
     set.seed(0xbeef);

     # Number samples
     I <- 5;

     # Number of loci
     J <- 2000;

     # Fragment lengths (two enzymes)
     fl <- matrix(0, nrow=J, ncol=2);
     fl[,1] <- seq(from=100, to=1000, length.out=J);
     fl[,2] <- seq(from=1000, to=100, length.out=J);

     # Let 1/2 of the units be on both enzymes
     fl[seq(from=1, to=J, by=4),1] <- NA;
     fl[seq(from=2, to=J, by=4),2] <- NA;

     # Sty/Nsp mixing proportions:
     rho <- rep(1, I);
     rho[1] <- 1/3;  # Less Sty in 1st sample
     rho[3] <- 3/2;  # More Sty in 3rd sample

     # Simulate data
     z <- array(0, dim=c(J,2,I));
     maxLog2Theta <- 12;
     for (ii in 1:I) {
       # Common effect for both enzymes
       mu <- function(fl) {
         k <- runif(n=1, min=3, max=5);
         maxLog2Theta - fl^{1/k};
       }

       # Calculate the effect for each data point
       for (ee in 1:2) {
         z[,ee,ii] <- mu(fl[,ee]);
       }

       # Update the Sty/Nsp mixing proportions
       ee <- 2;
       z[,ee,ii] <- rho[ii]*z[,ee,ii];

       # Add random errors
       for (ee in 1:2) {
         eps <- rnorm(J, mean=0, sd=1/sqrt(2));
         z[,ee,ii] <- z[,ee,ii] + eps;
       }
     }

     hasFl <- is.finite(fl);

     unitSets <- list(
       nsp  = which( hasFl[,1] & !hasFl[,2]),
       sty  = which(!hasFl[,1] &  hasFl[,2]),
       both = which( hasFl[,1] &  hasFl[,2])
     )

     # The observed data is a mix of two enzymes
     theta <- matrix(NA, nrow=J, ncol=I);

     # Single-enzyme units
     for (ee in 1:2) {
       uu <- unitSets[[ee]];
       theta[uu,] <- 2^z[uu,ee,];
     }

     # Both-enzyme units (sum on intensity scale)
     uu <- unitSets$both;
     theta[uu,] <- (2^z[uu,1,]+2^z[uu,2,])/2;

     # Calculate target array
     thetaT <- rowMeans(theta, na.rm=TRUE);
     targetFcns <- list();
     for (ee in 1:2) {
       uu <- unitSets[[ee]];
       fit <- lowess(fl[uu,ee], log2(thetaT[uu]));
       class(fit) <- "lowess";
       targetFcns[[ee]] <- function(fl, ...) {
         predict(fit, newdata=fl);
       }
     }

     # Normalize data (to a target baseline)
     thetaN <- matrix(NA, nrow=J, ncol=I);
     fits <- vector("list", I);
     for (ii in 1:I) {
       lthetaNi <- normalizeFragmentLength(log2(theta[,ii]), targetFcns=targetFcns, fragmentLengths=fl, .returnFit=TRUE);
       fits[[ii]] <- attr(lthetaNi, "modelFit");
       thetaN[,ii] <- 2^lthetaNi;
     }

     # Plot raw data
     xlim <- c(0, max(fl, na.rm=TRUE));
     ylim <- c(0, max(log2(theta), na.rm=TRUE));
     Mlim <- c(-1,1)*4;
     xlab <- "Fragment length";
     ylab <- expression(log2(theta));
     Mlab <- expression(M==log[2](theta/theta[R]));

     layout(matrix(1:(3*I), ncol=I, byrow=TRUE));
     for (ii in 1:I) {
       plot(NA, xlim=xlim, ylim=ylim, xlab=xlab, ylab=ylab, main="raw");

       # Single-enzyme units
       for (ee in 1:2) {
         # The raw data
         uu <- unitSets[[ee]];
         points(fl[uu,ee], log2(theta[uu,ii]), col=ee+1);
       }

       # Both-enzyme units (use fragment-length for enzyme #1)
       uu <- unitSets$both;
       points(fl[uu,1], log2(theta[uu,ii]), col=3+1);

       for (ee in 1:2) {
         # The true effects
         uu <- unitSets[[ee]];
         lines(lowess(fl[uu,ee], log2(theta[uu,ii])), col="black", lwd=4, lty=3);

         # The estimated effects
         fit <- fits[[ii]][[ee]]$fit;
         lines(fit, col="orange", lwd=2);

         muT <- targetFcns[[ee]](fl[uu,ee]);
         lines(fl[uu,ee], muT, col="purple", lwd=1);
       }
     }

     # Calculate log-ratios
     thetaR <- rowMeans(thetaN, na.rm=TRUE);
     M <- log2(thetaN/thetaR);

     # Plot normalized data
     for (ii in 1:I) {
       plot(NA, xlim=xlim, ylim=Mlim, xlab=xlab, ylab=Mlab, main="normalized");
       # Single-enzyme units
       for (ee in 1:2) {
         # The normalized data
         uu <- unitSets[[ee]];
         points(fl[uu,ee], M[uu,ii], col=ee+1);
       }
       # Both-enzyme units (use fragment-length for enzyme #1)
       uu <- unitSets$both;
       points(fl[uu,1], M[uu,ii], col=3+1);
     }

     ylim <- c(0,1.5);
     for (ii in 1:I) {
       data <- list();
       for (ee in 1:2) {
         # The normalized data
         uu <- unitSets[[ee]];
         data[[ee]] <- M[uu,ii];
       }
       uu <- unitSets$both;
       if (length(uu) > 0)
         data[[3]] <- M[uu,ii];
       plotDensity(data, col=1:3+1, xlim=Mlim, xlab=Mlab, main="normalized");
       abline(v=0, lty=2);
     }

      

