.packageName <- "CoCiteStats"
"actorAdjTable" <-
function (twT, eps = 1e-08) 
{
    twT = as.double(twT)
    m1 = twT[1] * twT[4]
    m2 = twT[2] * twT[3]
    D = m1 - m2
    if (abs(D) < eps) 
        return(c(u11 = 0.5, u12 = 0.5, u21 = 0.5, u22 = 0.5))
    v = (m1 - sqrt(m1 * m2))/D
    return(c(u11 = v, u12 = 1 - v, u21 = 1 - v, u22 = v))
}

"gene.gene.statistic" <-
function(g1, g2, numPapers, PaperLen)
{
   ## no adjustment
   twT <- twowayTable(g1, g2, weights=FALSE, numPapers)
   original <- twTStats(twT)
   
   ## Gene size adjusted
   twT <- actorAdjTable(twT)
   gs <- twTStats(twT)
   
   ## Paper size adjusted
   twT <- twowayTable(g1, g2, weights=TRUE, numPapers, PaperLen)
   ps <- twTStats(twT)
   
   ## both
   twT <- actorAdjTable(twT)
   both <- twTStats(twT)
 
   return(list(original=original, gs=gs, ps=ps, both=both))
 }

gene.geneslist.sig <-
function(gene, geneslist, numPapers, PaperLen, n.resamp=100)
 {
   require("humanLLMappings")
   if (missing(PaperLen)) 
       PaperLen <- unlist(eapply(humanLLMappingsPMID2LL, length))

   n <- length(geneslist)
    gene.geneslist.stat <- gene.geneslist.statistic(gene, geneslist, 
                                numPapers, PaperLen)
    gene.geneslist.stat <- sapply(gene.geneslist.stat,function(x) x)
    
    gene.geneslist.stat.null <- list()
    length(gene.geneslist.stat.null) <- n.resamp
    
    gN = ls(humanLLMappingsLL2PMID)
    for (i in 1:n.resamp)
    {
       geneslist.null <- sample(gN, n, replace=FALSE)
       gene.geneslist.stat.null[[i]] <- gene.geneslist.statistic(gene,
                geneslist.null, numPapers, PaperLen)
    }
    
    temp <- lapply(gene.geneslist.stat.null, function(x) 
                  gene.geneslist.stat < sapply(x, function(y) y))
    
    temp1 <- temp[[1]]
    
    for ( i in 2:length(temp))
    {
       temp1 <- temp1 + temp[[i]]
    }   
    
    return(list(statistic=t(gene.geneslist.stat), pval=apply(temp1,1, function(x) x/n.resamp)))
  }

"gene.geneslist.statistic" <-
function(gene, geneslist, numPapers, PaperLen)
{
   gene.geneslist.stat <- list()
   n <- length(geneslist)
   length(gene.geneslist.stat) <- n
   names(gene.geneslist.stat) <- geneslist
   
   for ( i in geneslist)
    {
      gene.geneslist.stat[[i]] <- gene.gene.statistic(gene, i,
             numPapers,PaperLen)
     }
    
    temp <- NULL
    for ( i in names(gene.geneslist.stat[[1]]))
     temp[[i]] <- apply(sapply(gene.geneslist.stat, function(x) x[[i]]),1,sum)
    return(temp) 
 }

paperLen <- function (X) 
{
    require("humanLLMappings") || stop("can't match without data")
    papersByLL = mget(X, humanLLMappingsLL2PMID, ifnotfound = NA)
    papers = unique(unlist(papersByLL))
    inap = is.na(papers) | papers == "NA"
    if (any(inap)) 
        papers = papers[!inap]
    if (length(papers) == 0) 
        return(list(Counts = numeric(0), papers = numeric(0)))
    paperForLL = mget(papers, humanLLMappingsPMID2LL)
    paperCts = sapply(paperForLL, length)
    return(list(Counts = paperCts, papers = papersByLL))
}
twTStats <- function (twT) 
{
    twT = as.double(twT)
    m1 = twT[1] + twT[2]
    m2 = twT[1] + twT[3]
    M = sum(twT)
    ##check to see if either citation count is zero
    if( (m1 == 0) || (m2 == 0 ) )
      return(c(Concordance = 0, Jaccard  = 0, Hubert = 0)) 
    else
     return(c(Concordance = twT[1], Jaccard = twT[1]/(sum(twT[1:3])), 
        Hubert = (M * twT[1] - m1 * m2)/sqrt(m1 * m2 * (M - m1) * 
            (M - m2))))
}

twowayTable <- function (g1, g2, weights = TRUE, numPapers, PaperLen) 
{
    if (missing(numPapers)) 
        numPapers = length(unique(unlist(eapply(
              humanLLMappingsLL2PMID,function(x) x))))
    if (missing(PaperLen) && (weights == TRUE))
      PaperLen <- unlist(eapply(humanLLMappingsPMID2LL, length))
    
    wh = paperLen(c(g1, g2))
    if (!length(wh$papers))  # no papers found
      return(c(n11=0, n12=0, n21=0, n22=numPapers))
    g1pp = wh$papers[[g1]]
    g2pp = wh$papers[[g2]]
    ##FIXME: defensive programming as string NA's seem to appear at times
    ina = is.na(g1pp) | g1pp == "NA"
    g1pp = g1pp[!ina]
    ina = is.na(g2pp) | g2pp == "NA"
    g2pp = g2pp[!ina]
    matches = intersect(g1pp, g2pp)
    unions = union(g1pp, g2pp)
    just1 = setdiff(g1pp, g2pp)
    just2 = setdiff(g2pp, g1pp)
    if (weights) {
        n11 = sum(1/wh$Counts[matches])
        n12 = sum(1/wh$Counts[just1])
        n21 = sum(1/wh$Counts[just2])
        n22 = sum(1/PaperLen[!(names(PaperLen) %in% unions)])
       }
    else {
        n11 = length(matches)
        n12 = length(just1)
        n21 = length(just2)
        n22 = numPapers - n11 - n12 - n21
    }
    c(n11 = n11, n12 = n12, n21 = n21, n22 = n22)
}

