.packageName <- "PCpheno"
## ===========================================================================
## testResult
## ---------------------------------------------------------------------------
## virtual class 
## ---------------------------------------------------------------------------
setClass("testResult",
         representation("VIRTUAL", Observed="numeric", Expected="ANY"),
         prototype=list(Observed=numeric(0), Expected=matrix(0)))
         
## ===========================================================================
## deResult
## ---------------------------------------------------------------------------
## A container for the results after applying a density Estimate test 
## ---------------------------------------------------------------------------
setClass("deResult",
         contains="testResult",
         representation(Size="numeric"),       
         prototype=list(Size=numeric(0)))

## ===========================================================================
## gtResult
## ---------------------------------------------------------------------------
## A container for the results after applying a density Estimate test 
## ---------------------------------------------------------------------------
setClass("gtResult",
         contains="testResult",
         representation(Pvalue="numeric"),
         prototype=list(Pvalue=numeric(0)))


## ===========================================================================
## HyperG parameters 
## ---------------------------------------------------------------------------
## A container for the parameters to use for a hyperGtest 
## ---------------------------------------------------------------------------

setClass("CoHyperGParams",
         contains="HyperGParams",
         prototype=prototype(categoryName="character"))

## ===========================================================================
## HyperG parameters 
## ---------------------------------------------------------------------------
## A container for the results after applying a HyperGTest 
## ---------------------------------------------------------------------------

setClass("CoHyperGResult",
         contains="HyperGResultBase",
         representation=representation(
           pvalues="numeric",
           oddsRatios="numeric",
           expectedCounts="numeric",
           geneCounts="numeric",
           universeCounts="numeric",
           catToGeneId="list"))
## ===========================================================================
## Searching for overlap between interactomes ex KEGG and ScISI
## ---------------------------------------------------------------------------
## 
## ---------------------------------------------------------------------------
KEGG2SCISI <-function(pw, pc, pcMat, pwMat){
    
    ##pw list of pathway names; pwMat pathway incidence matrix
    ##pc list of complex names; pcMat complex incidence matrix
    stopifnot(all(is.character(pw)))
    stopifnot(all(is.character(pc)))

    ## resize matrix to the interesting columns 
    keggMatint <-  pwMat[,pw]
    scISIMatint <- pcMat[,pc]

    ## resize matrix to the commun rows
    cr <- intersect(rownames(keggMatint),rownames(scISIMatint))
    keggMatint <- keggMatint[cr,]
    scISIMatint <- scISIMatint[cr,]
    
    ## count nb genes overlaping
    mapp <-t(keggMatint) %*% scISIMatint
    return(mapp)
}


summaryMap <- function(mapp, pcMat, pwMat, phenotype){

    ## mapp incidence matrix of pw x pc
    ## pwMat pathway incidence matrix
    ## pcMat complex incidence matrix
    ## phenotype e.g. essential

    ## resize matrix
    nr <- rowSums(mapp)
    nc <-  colSums(mapp)
    pcacross <- mapp[nr>0, , drop=FALSE]
    pcacross <- pcacross[, nc>0, drop=FALSE]
   
    ## count genes overlaping; return a vector or list
    pcacross2 <- apply(pcacross, 1, function(x){ g <- which(x>0)
                                             return(x[g])})
    
    if(is.list(pcacross2)){    
        ##call annotatePC and format results
        comp2Kegg <- lapply(pcacross2, function(x) annotatePC(x, pcMat, phenotype))
        comp2Kegg <-data.frame(format(comp2Kegg))
    }
    
    if(!is.list(pcacross2)){
        y <- pcacross[,colSums(pcacross)>0]
        comp2Kegg <- annotatePC(y, pcMat, phenotype)
        comp2Kegg <- matrix(format(comp2Kegg))
        rownames(comp2Kegg) <- names(pcacross2)
    }
    return(comp2Kegg)
}

annotatePC <- function(x, pcMat, phenotype){
    
    ## complex names 
    cn = names(x)
    if(is.null(cn))
      cn = colnames(x)

    ## size of the pathways and number of gene in the category/phenotype
    if(length(cn) == 1){
        sizePC <- sum(pcMat[, cn])
        nomen <- names(which(pcMat[, cn]>0))
        inC <- length(intersect(nomen, phenotype))
        
    } else {
        sizePC <- colSums(pcMat[, cn])
        inC <- apply(pcMat[, cn], 2, function(y) length(intersect(names(y[y>0]), phenotype)))
    }
    ans <- paste(cn, " (", sizePC, "," , inC, ")", " [", x[x>0], "]", sep="")
    return(ans)
}
## ===========================================================================
##  search overlapping definition for the component of biological organizational units
## interactome must be matrix
## ---------------------------------------------------------------------------
## biology (cellular, tissular) Organizational Units  methods
## ---------------------------------------------------------------------------

overlap <-  function(interactome){

  if(!is.matrix(interactome))
    stop("the interactome must be a matrix")
  
    overlapM <- crossprod(interactome, interactome)
    overlapM[upper.tri(overlapM, diag=TRUE)] <- NA
    overlap <- which(overlapM>0, TRUE)
    nbSharedProt <- cbind(rownames(overlapM)[overlap[, 1]],
                          colnames(overlapM)[overlap[, 2]], overlapM[overlap])
    row.names(nbSharedProt) <- c(paste(rownames(overlapM)[overlap[, 1]], "-",
                                       colnames(overlapM)[overlap[, 2]], sep=""))
    colnames(nbSharedProt) <- c("C1", "C2", "nbSharedProt")
    return(nbSharedProt)
}
## ===========================================================================
## Annotation function
## ---------------------------------------------------------------------------
## Functions to complete cellular organizational units annotation  
## ---------------------------------------------------------------------------

getDescr <- function(x, database="GO"){
    
    Descr <- vector(length=length(x))
    names(Descr) <- x
    if(any(database == "KEGG")){
        if(length(x)>0){
            if(require("KEGG.db", character.only=TRUE,quietly=TRUE))
            kegg <- mget(x, KEGGPATHID2NAME, ifnotfound=NA)            
            Descr[x] <- unlist(kegg)
        }
    }
    
    if(any(database == "GO")){
        GOtermX <- grep("GO", x)
        if(length(GOtermX)>0){
            if(require("GO.db"))    
            gocomplex = x[GOtermX]
            xx = as.list(GOTERM)
            annot =  xx[gocomplex]
            termsGO = sapply(annot, function(x) if(!is.null(x)){Term(x)}else{NA})
            Descr[GOtermX] <- termsGO
        }
    }
    
    if(any(database =="MIPS")){
        MIPStermX <- grep("MIPS", x)
        if(length(MIPStermX)>0){
            MIPSterm <- x[MIPStermX]
            mips <- getMipsInfo()
            termsMIPS <- mips[MIPSterm]
            termsMIPS <- sapply(termsMIPS, function(x) attr(x, "desc"))
            Descr[MIPStermX] <- termsMIPS
        }
        
    }
   
    return(Descr)
}


## ===========================================================================
## universeBuilder method
## ---------------------------------------------------------------------------
## Methods for HyperG test for cellular organizational units
## ---------------------------------------------------------------------------
setMethod("universeBuilder", signature(p="CoHyperGParams"),
          function(p) {
              p@universeGeneIds
          })

## ===========================================================================
## categoryToEntrezBuilder method
## ---------------------------------------------------------------------------
## Methods for HyperG test for cellular organizational units
## ---------------------------------------------------------------------------
setMethod("categoryToEntrezBuilder",
          signature(p="CoHyperGParams"),
          function(p) {
              getCompToEntrezMap(p)
          })



getCompToEntrezMap <- function(p) {
    
    category <- get(p@categoryName)
    apply( category, 2, function(x)  names(which(x>0)))
}
## ===========================================================================
## Score (classify) complexes according pvalue and the distribution of the
## genes associated to the observed phenotype
## ---------------------------------------------------------------------------
complexStatus <- function(data, phenotype, interactome, threshold=0.05){

    interactomeP <- interactome[intersect(phenotype,rownames(interactome)),]
    
    significant <- names(data@pvalues[pvalues(data) <= threshold])
    non <-  names(data@pvalues[pvalues(data) > threshold])
    noPheno <- names(data@geneCounts[geneCounts(data)==0])

    d1 <- interactomeP[,significant]
    d2 <- interactomeP[,non[!non%in%noPheno]]
   
    res <- vector(mode="list")
    res$A <- significant
    res$B <- colnames(d2)
    res$C <- setdiff(colnames(interactome), c(res$A,res$B))

    return(res)
}
## ===========================================================================
## Get Fitness Defect gene list based on a cutoff over the fitness defect score
## can be applied by conditions (media) or generation time
## ---------------------------------------------------------------------------

getFDgene <- function(data, condition, cutoff=c(20,100,100), mode="generation", subset=c(5,15,20)){
    if((mode!="condition")&(mode!="generation"))
      stop("mode wrongly defined or unspecified, mode must be either 'condition' or 'generation'.")
    
    fitnessG <- vector(mode="list")
    if (mode=="generation"){
        if(length(data)!=nrow(condition)) stop ("length of data must equal the number of condition")
        
        for(j in 1:length(subset)){
            idx <- which(condition[,2]== subset[j])
            dat <- data[names(data)%in%condition[idx,3]]
            temp <- lapply(dat,function(val) res <- val[which(val> cutoff[j])])
            fitnessG <- c(fitnessG,temp)
        }
    }
    if (mode=="condition"){
        for(i in 1:length(subset)){
            dat <- data[unlist(subset[i])]
            temp <- lapply(dat,function(val) res <- val[which(val> cutoff[i])])
            fitnessG <- c(fitnessG,temp)
        }
    }
    return(fitnessG)
}

## ---------------------------------------------------------------------------
## Build Fitness Defect Matrix from the Giaever dataset
## ---------------------------------------------------------------------------

buildFDMat <- function(data, genenames, condition){
  GiaeverPhenoM <- matrix(0, nrow = length(genenames), ncol = length(condition),
                          dimnames = list(genenames, condition))
  
  genefitness <- lapply(data, names)
  for(i in 1:length(data)){
    id <- which(colnames(GiaeverPhenoM) == names(genefitness)[i])
    GiaeverPhenoM[genefitness[[i]], id] <- 1
  }
  return(GiaeverPhenoM)
}
## ===========================================================================
## Methods for the CoHyperGResult object
## 
## ---------------------------------------------------------------------------
setMethod("pvalues", signature(r="CoHyperGResult"),
          function(r) r@pvalues)

setMethod("oddsRatios", signature(r="CoHyperGResult"),
          function(r) r@oddsRatios)

setMethod("expectedCounts", signature(r="CoHyperGResult"),
          function(r) r@expectedCounts)

setMethod("geneCounts", signature(r="CoHyperGResult"),
          function(r) {
              sapply(r@catToGeneId, function(x) {
                  sum(geneIds(r) %in% x)
              })
          })

setMethod("universeCounts", signature(r="CoHyperGResult"),
          function(r) {
              ans <- listLen(r@catToGeneId)
              names(ans) <- names(r@catToGeneId)
              ans
          })


setMethod("hyperGTest",
          signature(p="CoHyperGParams"), 
          function(p) {
              origGeneIds <- geneIds(p)
              p@geneIds <- p@geneIds[p@geneIds %in% p@universeGeneIds]
              cat2Entrez <- categoryToEntrezBuilder(p)
              stats <- .doHyperGTest(p, cat2Entrez, list(),
                                     p@geneIds)
              ord <- order(stats$p)
              new("CoHyperGResult",
                  pvalues=stats$p[ord],
                  oddsRatios=stats$odds[ord],
                  expectedCounts=stats$expected[ord],
                  catToGeneId = cat2Entrez[ord],
                  annotation=annotation(p),
                  geneIds= p@geneIds,
                  testName=categoryName(p),
                  pvalueCutoff=pvalueCutoff(p),
                  testDirection=testDirection(p))
          })


setMethod("summary", signature(object="CoHyperGResult"),
          function(object, pvalue, htmlLinks=TRUE) {
              
              AMIGO_URL <- "http://www.godatabase.org/cgi-bin/amigo/go.cgi?view=details&search_constraint=terms&depth=0&query=%s"
              MIPS_URL <-"http://mips.gsf.de/genre/proj/yeast/searchCatalogAction.do?style=catalog.xslt&table=CELLULAR_COMPLEXES&num=%s&db=CYGD"
              
              
              if (missing(pvalue))
                pvalue <- pvalueCutoff(object)
              
              ## Filter Complex based on p-value and size
              pvals <- pvalues(object)
              ucounts <- universeCounts(object)
              wanted <- pvals < pvalue
              
              pvals <- pvals[wanted]
              ucounts <- ucounts[wanted]
              
              complexIds <- names(pvals)
              goID <- grep("GO",complexIds)
              mipsID <- grep("MIPS",complexIds)
              
              goIdUrls <- sapply(complexIds[goID], function(x) sprintf(AMIGO_URL, x))
              mipsIdUrls <- sapply(complexIds[mipsID], function(x) sprintf(MIPS_URL, x))
              
              odds <- oddsRatios(object)[wanted]
              ecounts <- expectedCounts(object)[wanted]
              counts <- geneCounts(object)[wanted]
              if (htmlLinks) {
                  categoryTerm <- complexIds
                  categoryTerm[goID] <- paste('<a href="', goIdUrls, '">', complexIds[goID],
                                   '</a>', sep="")
                  categoryTerm[mipsID] <- paste('<a href="', mipsIdUrls, '">', complexIds[mipsID],
                                   '</a>', sep="")
              }
          
              df <- data.frame(ID=complexIds, Pvalue=pvals, OddsRatio=odds,
                               ExpCount=ecounts, Count=counts,
                               Size=ucounts, Term=categoryTerm,
                               stringsAsFactors=FALSE,
                               row.names=NULL)
              df
          })


## ===========================================================================
## Work in progress - function not exported
## 
## ---------------------------------------------------------------------------
## 
## ---------------------------------------------------------------------------
logistPheno <- function(genename,interactome,pval,iter=FALSE){
    gI <- row.names(interactome)
    EGs <- intersect(genename, gI)
    v1 <-  rep(0, nrow(interactome))
    names(v1) <-  gI
    v1[EGs] <-  1
    
    reg1 <-  glm(v1 ~ interactome, family = binomial)

    sum1 <-  summary(reg1)
    ScISIs <- interactome[, !sum1$aliased[-1]]
    d2 <- sum1$coef[,4] < 0.001
    comp <-  ScISIs[,d2[-1]]

    if(iter==TRUE){
        if (ncol(comp)!=0){
            reg <-  glm(v1 ~ comp, family= binomial)
            sum1 <-  summary(reg)
        }
    }
    res <- row.names(sum1$coef)[sum1$coef[,4] < pval]

    if(length(res)!=0){
        result <- sum1$coef[res,4]
        names(result) <-  gsub("interactome","",res)
        
    }
    else{
         result <- "Null"
    }
    return(result)
}
## ===========================================================================
## Compute density estimate to test whether  genes (genename) inducing a phenotype
## are randomly distributed in the interactome
## ---------------------------------------------------------------------------
##  Observed ratio vs Expected ratios
## ---------------------------------------------------------------------------
densityEstimate <- function(genename, interactome, perm){

    if(length(genename) == 0 && !is.character(genename))
      stop("genename must be a character vector")
    if(length(genename) == 0 && !is.matrix(interactome))
      stop("interactome must be a binary matrix")  
    if(perm < 0 && perm == 0)
      stop("The number of permutation must be positive and different from zero")
    
    geneInteractome <- rownames(interactome)
    phenoInteractome <- interactome[geneInteractome%in%genename, ]
    nrpheno <- nrow(phenoInteractome)

    if(nrpheno == 0)
      stop("No overlap between the genenames and the interactome")

    nbPhenoGene <- colSums(phenoInteractome)
    sizeC <- colSums(interactome)
    ratio <- nbPhenoGene / sizeC

    ratioX <- matrix(0, nrow=ncol(interactome), ncol=perm)
    for(i in 1:perm) {
        u <-  sample(geneInteractome, nrpheno)
        nbPhenoGeneX <- colSums(interactome[u, ])
        ratioX[, i] <- nbPhenoGeneX / sizeC
    }
    return(new("deResult", "Observed"=ratio, "Expected"=ratioX, "Size"=sizeC))
}

## ===========================================================================
## Graph theory approach to test whether  genes (genename) inducing a phenotype
## are randomly distributed in the interactome
## ---------------------------------------------------------------------------
##  Observed edges vs Expected edges
## ---------------------------------------------------------------------------
graphTheory <-function(genename,interactome,perm){
    ginteractome <- row.names(interactome)
    
    ##restrict the genename list to those for which we have complex co-membership data
    interest <-  intersect(genename, ginteractome)
    
    PPIg <- interactome %*% t(interactome)
    
    v1 <- rep(0, nrow(interactome))
    names(v1) <-  row.names(interactome)
    v1[interest] <- 1
    
    ##compute the graph where all interesting genes have edges to each other
    interestG <-  outer(v1, v1)        
    ##drop the self-loops
    diag(interestG) <-  0
    g <-  PPIg * interestG
    edgeCount <- sum(g)

    ##now for the simulation
    ans <- rep(NA, perm)
    for(i in 1:perm) {
        vx <-  sample(v1, nrow(interactome))
        Gx <-  outer(vx, vx)
        diag(Gx) <- 0
        ans[i] <-  sum(PPIg*Gx)
    }
    up <- length(which(ans>edgeCount))
    pval <- up/perm

    return(new("gtResult", "Observed"=edgeCount, "Expected"=sort(ans),"Pvalue"=pval))
}
## ===========================================================================
## plot
## ---------------------------------------------------------------------------
## Plot function for the densityEstimate results 
## ---------------------------------------------------------------------------
setMethod("plot",
          signature=signature(x="deResult", y="missing"),
          function(x, exp.col="grey", obs.col="black", main="", ylim=NULL, ...){
            perm <- ncol(x@Expected)
            a <- x@Observed 
            b <- x@Expected
            
            ## ylim calculation
            if(is.null(ylim)){
                d <- density(b)
                maxb <- max(d$y)
                ylim = c(0, maxb)
            }
            
            
            plot(density(a[!is.na(a)]), col=obs.col, ylim=ylim, main=main, ...)
            for(i in 1:perm){
              lines(density(b[,i]), col= exp.col, type="l", pch=20)
            }      
          }
          )

## ===========================================================================
## plot
## ---------------------------------------------------------------------------
## Plot function for the graphTheory results  
## ---------------------------------------------------------------------------

setMethod("plot",
          signature=signature(x="gtResult", y="missing"),
          function(x, exp.col="grey", obs.col="red", main="",...){
            a <- x@Observed 
            b <- x@Expected
            maxb <- max(b)
            hist(b, xlim= c(0, a+a*.25),  axes=TRUE, col=exp.col, main=main, xlab="Edges",...)
            lines(c(a,  a), c(0, 10*maxb), lty=2, lwd=3, col=obs.col,...)
          }
          )      
## ===========================================================================
##  Test the association between AP-MS data and phenotype
## ---------------------------------------------------------------------------
##  via a graph  and permutation model
## ---------------------------------------------------------------------------
ppiInteract <- function(genename, expGraph, bait, prey, perm=10){
  ##we need to work with the underlying
  ##undirected graph
  graphData <- new("graphNEL", nodes=nodes(expGraph), edgeL=edges(expGraph), edgemode="directed")
  graphData <- ugraph(graphData)
  nodeNames <- nodes(graphData)
  canUse <- intersect(genename, nodeNames)
  
  bp <- intersect(bait, prey)
  bonly <- setdiff(bait, prey)
  ponly <- setdiff(prey, bait)
  
  communBP <-intersect(canUse, bp)
  sharedB <-intersect(canUse, bonly)
  sharedP <-intersect(canUse, ponly)
  
  nBP <- length(communBP)
  nB <- length(sharedB)
  nP <- length(sharedP)
  stopifnot(nBP + nB + nP == length(canUse))
  
  ##create a cluster graph - check to make sure we have everything here
  others = as.list(setdiff(nodeNames, canUse))
  others$cU = canUse
  cG = new("clusterGraph", clusters = others)
  obsInt = numEdges(intersection(cG, graphData))
  
  rval = rep(NA, perm)
  
  mysample = function(x, size, replace = FALSE, prob = NULL) {
    if(size == 0 && length(x)==0) 
      return(x)
    else return(sample(x, size, replace, prob))
  }
  genSample = function() 
    c( mysample(bp, nBP), mysample(bonly, nB), mysample(ponly, nP))
  
  for(i in 1:perm) {
    newS = genSample()
    others = as.list(setdiff(nodeNames, newS))
    others$cU = newS 
    cG = new("clusterGraph", clusters = others)
    rval[i] = numEdges(intersection(cG, graphData))
  }
  
  return(list(obVal = obsInt, permVals = rval))
}

## Truncate character strings
truncName <- function(x,n){
    if (nchar(x)> n) {
        ans <- paste(substr(x,0,n),"...",sep="")
    } 
    else{ 
        ans <- x
    } 
    return(ans)
}


## Reduce a binary matrix to the number of commun rows with a vector 
## Reduce Interactome
reduceM <- function(x, mat, threshold=0){

    if(!is.null(x) && !is.character(x) && !is.numeric(x))
      stop("x must be a vector")

    rN <- rownames(mat)
    common <- intersect(x, rN)

    if(length(common) == 0)
      stop("no intersection between x vector and the matrix rownames")

    res <- mat[common, ]
    res[ , colSums(res)> threshold]
}

##Same function that in Rintact: list2Matrix, therefore not exported
list2matrix <- function(x){
    allrows <- unlist(x)
    rows <- unique(allrows)
    cols <- names(x)
    mat <- matrix(0,nrow=length(rows),ncol=length(cols),dimnames=list(rows,cols))

    for(i in 1:length(cols)){
        idx <- rows%in%x[[i]]
        mat[idx, cols[i]] <- 1
    }
    return(mat)
}
