.packageName <- "apComplex"

#A function to decide whether or not to combine complexes based on changes
# in the product of L and C

#currently this function only includes the Fisher's exact portion for complexes with less than 20 proteins. For larger complexes, the binomial criteria is adequate.

LCdelta <- function(comp1,comp2,cMat,dataMat,baitList,simMat,mu,alpha,Beta,wsVal=20000000){

N <- dim(dataMat)[1]
M <- dim(dataMat)[2]-N


#compute likelihood contribution for separate complexes

#first for comp1
tP1 <- names(which(cMat[,comp1]==1))   
tB1 <- tP1[which(tP1 %in% baitList)]

nB1 <- length(tB1)
nH1 <- length(tP1) - nB1

temp1 <- matrix(dataMat[tB1,tP1],ncol=(nB1+nH1))

sim1 <- matrix(simMat[tB1,tP1],ncol=(nB1+nH1))
adjBin1 <- sum(temp1*(mu+alpha+Beta*sim1)-log(1+exp(mu+alpha+Beta*sim1)))-
		nB1*(mu+alpha+Beta-log(1+exp(mu+alpha+Beta)))

X1 <- sum(temp1)-nB1

b11 <- colSums(matrix(temp1[,1:nB1],ncol=nB1))
b11[1:nB1] <- b11-1
b01 <- (nB1-1)-b11
h11 <- NULL
h01 <- NULL

if(nH1>0){
	h11 <- colSums(matrix(temp1[,(nB1+1):(nB1+nH1)],ncol=nH1))
	h01 <- nB1 - h11
}

bh01 <- c(b01,h01)
bh11 <- c(b11,h11)



#repeat for comp2
tP2 <- names(which(cMat[,comp2]==1))
tB2 <- tP2[which(tP2 %in% baitList)]

nB2 <- length(tB2)
nH2 <- length(tP2) - nB2

temp2 <- matrix(dataMat[tB2,tP2],ncol=(nB2+nH2))

sim2 <- matrix(simMat[tB2,tP2],ncol=(nB2+nH2))
adjBin2 <- sum(temp2*(mu+alpha+Beta*sim2)-log(1+exp(mu+alpha+Beta*sim2)))-
		nB2*(mu+alpha+Beta-log(1+exp(mu+alpha+Beta)))



X2 <- sum(temp2)-nB2

b12 <- colSums(matrix(temp2[,1:nB2],ncol=nB2))
b12[1:nB2] <- b12-1
b02 <- (nB2-1)-b12
h12 <- NULL
h02 <- NULL

if(nH2>0){
	h12 <- colSums(matrix(temp2[,(nB2+1):(nB2+nH2)],ncol=nH2))
	h02 <- nB2 - h12
}

bh02 <- c(b02,h02)
bh12 <- c(b12,h12)



cX1 <- lgamma(nB1*(nB1+nH1-1)+1) - lgamma(X1+1) - lgamma(nB1*(nB1+nH1-1)-X1+1)
cX2 <- lgamma(nB2*(nB2+nH2-1)+1) - lgamma(X2+1) - lgamma(nB2*(nB2+nH2-1)-X2+1)


#compute likelihood contribution if left as 2 complexes

lK <- cX1+cX2+adjBin1+adjBin2


#combine complexes and compute likelihood contribution

tP <- names(which(rowSums(cMat[,c(comp1,comp2)])>0))
tB <- tP[which(tP %in% baitList)]

nB <- length(tB)
nH <- length(tP) - nB

temp <- matrix(dataMat[tB,tP],ncol=(nB+nH))

sim <- matrix(simMat[tB,tP],ncol=(nB+nH))
adjBin <- sum(temp*(mu+alpha+Beta*sim)-log(1+exp(mu+alpha+Beta*sim)))-
		nB*(mu+alpha+Beta-log(1+exp(mu+alpha+Beta)))



X <- sum(temp)-nB

b1 <- colSums(matrix(temp[,1:nB],ncol=nB))
b1[1:nB] <- b1-1
b0 <- (nB-1)-b1
h1 <- NULL
h0 <- NULL

if(nH>0){
	h1 <- colSums(matrix(temp[,(nB+1):(nB+nH)],ncol=nH))
	h0 <- nB - h1
}


bh0 <- c(b0,h0)
bh1 <- c(b1,h1)


cX <- lgamma(nB*(nB+nH-1)+1) - lgamma(X+1) - lgamma(nB*(nB+nH-1)-X+1)

#compute likelihood contribution if combined into 1 complex
lKm1 <- cX+adjBin

#find adjustments for edges that used to be in "no complex" bin
#make sure dataMat and cMat have their rows and columns in the same order

theseComps <- which(colSums(cMat[tP,])>0)

exCadjMat <- (1*(cMat[tP,theseComps] %*% t(cMat[tP,theseComps])) >0)[tB,]
diag(exCadjMat) <- 1


part4 <-  sum((matrix(1,nB,(nB+nH))-exCadjMat)*(dataMat[tB,tP]*alpha-
	log(1+exp(mu+alpha+Beta*simMat[tB,tP])) + 
	log(1+exp(mu+Beta*simMat[tB,tP]))))


ans <- lKm1-lK+part4

#if total size of complex is less than 20 proteins, 
#include Fisher's exact component
if((nB+nH)<20){

	ans <- ans + log(fisher.test(rbind(bh0,bh1),workspace=wsVal)$p.value) - 
		log(fisher.test(rbind(bh01,bh11),workspace=wsVal)$p.value) -
		log(fisher.test(rbind(bh02,bh12),workspace=wsVal)$p.value)

}
return(ans)
}


 

#function to find bhmaxSubgraphs from a bait-hit adjacency matrix

#by default, unreciprocated bait-bait edges will be treated as observed

#adjMat has dimensions N by (N+M) corresponding to N baits and M hits
#adjMat is named with row and column names corresponding to proteins

#this function uses another function called reduceMat 
#any columns that are subsets of other columns are eliminated


bhmaxSubgraph <- function(adjMat,unrecip=1){

	!is.null(colnames(adjMat)) || stop("Columns of adjMat must be named")
	!is.null(rownames(adjMat))|| stop("Rows of adjMat must be named")

	Nb <- dim(adjMat)[1]
	Nh <- dim(adjMat)[2] - Nb
	
	identical(rownames(adjMat),colnames(adjMat)[1:Nb]) || stop("rownames
	and first Nb colnames of adjMat must be identical")
	
	#make adjMat[,1:Nb] symmetric
	if(unrecip==0){
	 adjMat[,1:Nb] <- pmin(adjMat[,1:Nb],t(adjMat[,1:Nb]))
	} else adjMat[,1:Nb] <- pmax(adjMat[,1:Nb],t(adjMat[,1:Nb]))

	#record the order of the columns of adjMat so the order of the
	#rows of the affiliation matrix will match the original column order
	rowOrder <- colnames(adjMat)
	hNames <- rowOrder[!rowOrder %in% rownames(adjMat)]

	#make diagonal entries equal to 1
	diag(adjMat) <- 1
	
	#first find baits that only have hit pairs  
	#will put these in at the end

	hitComps <- which(rowSums(adjMat[,1:Nb])==1)
	baitComps <- rownames(adjMat)
	if(length(hitComps)>0) 	baitComps <- baitComps[-hitComps]
	
	#now reorder by complex size - smaller first
	baitComps <- baitComps[order(rowSums(adjMat[baitComps,]))]


	Nbait <- length(baitComps)

	#reorder so that hit only complexes are last
	adjMat <- adjMat[c(baitComps,names(hitComps)),
				c(baitComps,names(hitComps),hNames)]


	
	M <- as.matrix(adjMat[1,])

	for (i in 2:Nbait){

	g <- as.matrix(c(rep(0,i-1),adjMat[i,i:(Nb+Nh)]))
	G <- NULL

	V <- which(M[i,]==1)
	n2c <- length(V)

	if(n2c>0){

	lose <- rep(FALSE,n2c)

	for (k in 1:n2c){
		
		v <- M[,V[k]]

		#na.rm=TRUE statement included 
		#for cases when all proteins are baits
		if(sum(v[(i+1):(Nb+Nh)]>g[(i+1):(Nb+Nh)],na.rm=TRUE)>0){

		lose[k] <- TRUE		

		v1 <- v
		v1[i] <- 0
		G <- cbind(G,v1)

		v2 <- c(v[1:i],pmin(v[(i+1):(Nb+Nh)],g[(i+1):(Nb+Nh)]))
		G <- cbind(G,v2)
		}

	}

	if(sum(lose)>0) M <- as.matrix(M[,-V[lose]])

	}

	G <- cbind(G,g)
	if(dim(G)[2]>1) G <- reduceMat(G,compare="less")
		
	for (g in 1:dim(G)[2]){
		
		if(dim(M)[2]>0){
		Msub <- as.matrix(M[,which(colSums(M)>=sum(G[,g]))])
					
		if(dim(Msub)[2]>0){
		if(!vecInMat(G[,g],Msub,compare="less")) M <- cbind(M,G[,g])
		} else M <- cbind(M,G[,g])
		} else M <- cbind(M,G[,g])
		
		
	}
	
	}


	if (Nbait<Nb){
	if (Nbait+1==Nb) { 
	   M <- cbind(M,as.matrix(adjMat[Nb,]))
	} else  M <- cbind(M,t(adjMat[(Nbait+1):Nb,]))
	}

	newcolnames <- paste("bhmax",1:dim(M)[2],sep="")
	colnames(M) <- newcolnames
	M <- M[rowOrder,]

	return(M)

}


# a function to run the entire algorithm at once


findComplexes <- function(adjMat,simMat=NULL,sensitivity=.75,specificity=.995, Beta=0, wsVal = NULL){

	##find number of baits and number of hits

	N <- dim(adjMat)[1]
	M <- dim(adjMat)[2]-N

	##set parameters for logistic regression model

	mu <- log((1-specificity)/specificity)
	alpha <- log(sensitivity/(1-sensitivity))-mu

	##create simMat of zeroes with diagonal of ones if one is not specified

	if(is.null(simMat)) {
		simMat <- matrix(0,N,N+M)
		diag(simMat) <- 1
		colnames(simMat) <- colnames(adjMat)
		rownames(simMat) <- rownames(adjMat)
	}

	##find maximal BH-complete subgraphs for initial 
	##protein complex membership graph estimate

	print("Finding Initial Maximal BH-complete Subgraphs")
	PCMG <- bhmaxSubgraph(adjMat,unrecip=1*(sensitivity<specificity))

	##combine complexes using LC measure

	#put PCMG in order by number of baits in complex
	
	baitOrder <- order(colSums(PCMG[1:N,]),decreasing=TRUE)
	PCMGo <- PCMG[,baitOrder]
	
	#merge complex estimates using LCdelta criteria
	print("Combining Complex Estimates")
	PCMG2 <-
	mergeComplexes(PCMGo,adjMat=adjMat,simMat=simMat,Beta=Beta,
			sensitivity=sensitivity,specificity=specificity, wsVal = wsVal)

	return(PCMG2)

}



 
##propose complex combinations and compare to LxC measure

##PCMG is an initial estimate for the PCMG, presumably obtained 
##by finding bhMaximal subgraphs using bhmaxSubgraph

mergeComplexes <- function(PCMG,adjMat,simMat=NULL,sensitivity=.75,specificity=.995,Beta=0, wsVal = NULL){

	   
	bNames <- rownames(adjMat)
	diag(adjMat) <- 1

	mu <- log((1-specificity)/specificity)
	alpha <- log(sensitivity/(1-sensitivity))-mu

	N <- dim(adjMat)[1]
	M <- dim(adjMat)[2]-N

	#make simMat with entries 0 and diagonal 1 if simMat not specified
	if(is.null(simMat)){
	simMat <- matrix(0,N,N+M)
	diag(simMat) <- 1
	rownames(simMat) <- rownames(adjMat)
	colnames(simMat) <- colnames(adjMat)}

	i <- 1 
	K <- dim(PCMG)[2]

	keepgoing <- i < K
   
 
	while(keepgoing){

	keepgoing2 <- TRUE

	while(keepgoing2){

	testset <- which(colSums(PCMG[,i]*PCMG)>0)
	testset <- testset[-which(testset==i)]
	Ktemp <- length(testset)

	
	if(Ktemp>0 & Ktemp!=0){
	LCIncs <- rep(0,Ktemp)

	for (m in 1:Ktemp){
	
	LCIncs[m] <- LCdelta(i,testset[m],PCMG,dataMat=adjMat,
				baitList=bNames,simMat=simMat,
				mu=mu,alpha=alpha,Beta=Beta)
	
	
	
	}
	
	same <- sum(LCIncs>0)>0
	
	if(same){
		thisone <- testset[which.max(LCIncs)]
		PCMG[,i] <- pmax(PCMG[,i],PCMG[,thisone])
		PCMG <- as.matrix(PCMG[,-thisone])
		K <- dim(PCMG)[2]
		if(thisone<i) i <- i-1
		

	}
	}else same <- FALSE
	keepgoing2 <- same
	}

	
	i <- i+1
	keepgoing <- i < K
}
nC <- dim(PCMG)[2]
colnames(PCMG) <- paste("Complex",1:nC,sep="")
return(PCMG)
}
#function to eliminate columns that are less than or identically equal 
#to other columns

#if working in identical mode, will keep the column that appears first 

reduceMat <- function(mat,compare="equal"){


	#take off colnames of mat
	colnames(mat) <- NULL
	
	if(compare=="equal"){
	
		cS <- colSums(mat)
		cSt <- table(cS)
		cStt <- as.numeric(names(cSt[cSt>1]))

		#find columns with singly affiliated members
		#these columns will not be tested or removed

		temp <- which(rowSums(mat)==1)

		if(length(temp)>0){			

			if(length(temp)==1){
			notest <- which(mat[temp,]>0)
			}
			if(length(temp)>1){
			notest <- which(colSums(mat[temp,])>0)
			}
		} else {notest <-  NULL}
				
	
		keep <- rep(TRUE,length(cS))
		if(length(notest)>0) keep[notest] <- TRUE

		for (j in cStt){

			testset <- which(cS==j)

			for (i in testset){
			k <- which(testset==i)
			if(keep[i]){
			
			l <- whichVecInMat(mat[,i],
				as.matrix(mat[,testset[-k]]))
 			if(length(l)>0){
				keep[testset[-k][l]] <- FALSE
			}
			}
			}
		}	

	matRed <- mat[,keep]

	}

	if(compare=="less"){

		#order columns from largest to smallest column sum

		ord <- order(colSums(mat),decreasing=TRUE)
		matOrd <- mat[,ord]

		nCols <- dim(mat)[2]

		#find columns with singly affiliated members
		#these columns will not be tested or removed
		temp <- which(rowSums(matOrd)==1)

		if(length(temp)>0){			

			if(length(temp)==1){
			testset <- which(matOrd[temp,]==0)
			}
			if(length(temp)>1){
			testset <- which(colSums(matOrd[temp,])==0)
			}
		} else {testset <- 1:nCols}
				
		#don't directly test first column
		if(1 %in% testset) testset <- testset[-which(testset==1)]
		lose <- NULL

		for (i in testset){

		Vec <- matOrd[,i]
		test <- vecInMat(Vec,as.matrix(matOrd[,1:(i-1)]),
							compare="less")
		if(test) lose <- c(lose,i) 

		}		
	
	if(length(lose)>0){
		matRed <- mat[,-ord[lose]]
	}else matRed <- mat
	}	


	return(matRed)
}
#function to sort complex estimates into MBME, SBMH, UnRBB

sortComplexes <- function(PCMG,adjMat){

   diag(adjMat) <- 0
   bNames <- rownames(adjMat)
   nComps <- dim(PCMG)[2]
	    

   SBMHs <- which(colSums(PCMG[bNames,])==1)

   UnRBBs <- which(colSums(PCMG)==2 & colSums(PCMG[bNames,])==2)
   keep <- rep(TRUE,length(UnRBBs))
   for (i in UnRBBs){
       tBs <- which(PCMG[,i]==1)
       keep[which(UnRBBs==i)] <- sum(adjMat[tBs,tBs])==1
   }   
   UnRBBs <- UnRBBs[keep]

   MBMEs <- c(1:nComps)[!(1:nComps) %in% c(SBMHs,UnRBBs)]

   if(length(MBMEs)>0){
   MBME <- as.matrix(PCMG[,MBMEs])
   colnames(MBME) <- paste("MBME",1:length(MBMEs),sep="")
   } else MBME <- NA

   if(length(SBMHs)>0){
   SBMH <- as.matrix(PCMG[,SBMHs])
   colnames(SBMH) <- paste("SBMH",1:length(SBMHs),sep="")
   } else SBMH <- NA

   if(length(UnRBBs)>0){
   UnRBB <- as.matrix(PCMG[,UnRBBs])
   colnames(UnRBB) <- paste("UnRBB",1:length(UnRBBs),sep="")
   } else UnRBB <- NA


   sComps <- list(MBME=MBME,SBMH=SBMH,UnRBB=UnRBB)

   return(sComps)

}
#function to see if a vector x is identical, less than or equal, or greater than or equal to at least one of the columns in a matrix mat


vecInMat <- function(x,mat,compare="equal"){

	(length(x)==dim(mat)[1]) || stop("vector length must equal row dimension of matrix")

	if(compare=="equal") f <- function(temp) sum(x != temp)==0
	if(compare=="less") f <- function(temp) sum(x > temp)==0
	if(compare=="greater") f <- function(temp) sum(x < temp)==0

	n <- dim(mat)[2]

	test <- FALSE
	i <- 1
	while (!test & i<=n){

	temp <- mat[,i]
	
	test <- f(temp)

	i <- i+1
	}

	return(test)
}

#function to report which of the columns in a matrix mat x is equal to (or strictly less than)


#will return the column name, or the index if the matrix is unnamed

whichVecInMat <- function(x,mat,compare="equal"){

	A <- colnames(mat)

	if(compare=="equal") compFun <- function(y) sum(x != y)==0
	if(compare=="less") compFun <- function(y) sum(x > y)==0
	if(compare=="greater") compFun <- function(y) sum(x < y)==0

	v <- apply(mat,2,FUN = compFun)

	w <- which(v==1)
	if(!is.null(A)) w <- A[w]

	return(w)
}


.First.lib <- function(libname,pkgname,where){
	require("graph") || stop("Need package graph")
}

