## Options:
presFile       <- "../reports/fig.040424/pres.Sat-Apr-24-00:06:44-2004.rda"
proteinsFile   <- "JezDoritWolfgang.csv" ## "Passay_ExPro_Data.txt"

outdir   <- "../reports/betwell.040424"
outFile  <- paste("betwell_", gsub(" ", "-", date()), sep="")
logFile  <- paste("betLog_", gsub(" ", "-", date()), ".txt", sep="")
logFile  <- file.path(outdir, logFile)

## load package and data
options(error=NULL)
options(warn=0)
if ("package:prada" %in% search()) detach("package:prada")
library(prada)
source("mapId.R")

load(presFile)
proteins <- read.table(proteinsFile, sep="\t", header=TRUE, fill=TRUE, as.is=TRUE)
orfidCol <- "ORFID"
stopifnot(orfidCol %in% colnames(proteins), !any(duplicated(proteins[[orfidCol]])))
orfids   <- proteins[[orfidCol]]

throwOutCol <- match(c("ProlifYFP", "ProlifCFP"), colnames(proteins))
stopifnot(!any(is.na(throwOutCol)))
proteins <- proteins[, -throwOutCol]

cat("proteins:", nrow(proteins), "rows,", ncol(proteins), "columns\n")

## prepare results table
controls <- c("all" , "GFP",   "CA",   "PP2", "CDI")
proteins <- rbind(proteins[1:length(controls), ], proteins)
for(i in 1:ncol(proteins))
  proteins[1:length(controls), i] <- NA

rownames(proteins) <- c(controls, mapId(orfids, withTag=FALSE))
proteins[[orfidCol]][1:length(controls)] <- I(controls)

proteins$Sphase.effect <- character(nrow(proteins))
proteins$Sphase.pvalue <- numeric(nrow(proteins))

## plot colors for cfp and yfp
cols        <- c("#1D267B", "#ceffc0")
names(cols) <- c("cfp",     "yfp")

## only use reasonably good plates
throw <- ( (pres$expId=="dorit9"  & as.integer(pres$expRepeat)==as.integer(4)) |
           (pres$expId=="dorit10" & as.integer(pres$expRepeat) %in% as.integer(c(3,4))) |
            regexpr("Exon", as.character(pres$cloneId))>0 |
            regexpr("pdYFPhamy2_1p4_1E1", as.character(pres$cloneId))>0 )

spres <- pres[!throw,]
cat("Plates that were used:", unique(paste(spres$expId, spres$expRepeat, sep="_")), "\n\n", file=logFile)

## prepare graphics output
options(error=recover)
## options(warn=2)
graphics.off()
x11()

allclones <- as.character(spres$cloneId)
dye       <- getDye(allclones)
breaks    <- seq(-9, 9, by=2)

## The reference distribution
ref <- spres[!is.na(allclones) , "zscore"]

## Wrapper for Wilcoxon test
wt <- function(x, y) {
  p.value <- 1
  what <- ""
  if (min(sum(!is.na(x)), sum(!is.na(y))) > 2) {
    res     <- wilcox.test(x, y, conf.int=TRUE)
    p.value <- res$p.value
    if(p.value < 0.05)
      what <- c("rep", "act")[1.5+sign(res$estimate)/2]
  }
  return(list(p.value=p.value, what=what))
}

whichloc <- match(proteins$TrueLoc, c("CFP-ORF = ORF-YFP", "CFP-ORF", "ORF-YFP"))
stopifnot(!is.null(proteins$TrueLoc),
          !any(is.na(whichloc[!is.na(proteins$TrueLoc)])))
whichloc[seq(along=controls)] <- 1

for (i in 1:nrow(proteins)) {
  ic <- proteins$ORFID[i]
  switch(ic,
         all = { sel <- rep(TRUE, nrow(spres))},
         GFP = { sel <- allclones %in% c("YFP", "CFP")},
               { sel <- regexpr(ic, allclones) > 0 }) ## default
  sel <- sel & !is.na(allclones)
  
  if(ic!="all")
    cat(ic, ":", unique(allclones[sel]), "\n", file=logFile, append=TRUE)

  sc  <- lapply(names(cols), function(d) {
    rv <- spres[sel & dye==d, "zscore"]   ## by dye
    rv[rv<min(breaks)] <- min(breaks)
    rv[rv>max(breaks)] <- max(breaks)
    return(rv)
  } )
  names(sc) <- names(cols)
  
  switch(whichloc[i],
    { usecols <- cols              ## both
      tt <- wt(unlist(sc), ref)
    },  
    { usecols <- cols["cfp"]       ## CFP only
      tt <- wt(sc[["cfp"]], ref)
    },   
    { usecols <- cols["yfp"]       ## YFP only
      tt <- wt(sc[["yfp"]], ref)
    },
    stop("Baeh")
  )

  pval <- strsplit(paste(signif(tt$p.value, 2)), "e")[[1]]
  if(length(pval)==1) {
    xlab <- paste("z-score (p=", pval, ")", sep="")
  } else {
    ## deal with scientific notation
    xlab <- substitute(z-score~~(p==mantissa*x*10^exponent),
                       list(mantissa=pval[1],
                            exponent=gsub("^-0*", "-", pval[2])))
    ## cat(signif(tt$p.value, 2), "\n")
  }
  
  histStack(sc[names(usecols)], col=usecols, breaks=breaks, main="", 
          xlab=xlab, ylab="frequency",
          midsFun=function(z) {z[seq(2, length(z), by=2)] <- NA ; return(z) } )
  savetiff(paste(rownames(proteins)[i], sep="_"), width=3.3, density=540, dir=outdir)
  proteins[i, "Sphase.effect"] <- tt$what
  proteins[i, "Sphase.pvalue"] <- tt$p.value
} ## for i

iup   <- grep("up", proteins$ExProData)
idown <- grep("down", proteins$ExProData)

iact <- which(proteins$Sphase.effect == "act")
irep <- which(proteins$Sphase.effect == "rep")

cat("Activators: ", length(iact), "Inhibitors: ", length(irep), "\n") 
cat("Activators with Expro: ", rownames(proteins)[intersect(iact, iup)], "\n")
cat("Repressors with Expro: ", rownames(proteins)[intersect(irep, idown)], "\n")

write.table(proteins, file=file.path(outdir, paste(outFile, ".txt", sep="")),
            sep="\t", row.names=TRUE, col.names=NA, quote=FALSE)
save(proteins, file=file.path(outdir, paste(outFile, ".rda", sep="")))
