.packageName <- "MiPP"

##########################################################################
#
#        
#    MiPP(Misclassification Penalized Posterior)-based Classification
#
#                            by
#
#        Mat Soukup, HyungJun Cho, and Jae K. Lee
#
#                   Version 1.0.0 (2005-04-14)   
#
##########################################################################


.First.lib <- function(lib, pkg) { 
   cat("MiPP version 1.0.0 (2005-04-18)\n") 
   invisible()
   if(.Platform$OS.type=="windows" && require(Biobase) && interactive() 
   && .Platform$GUI=="Rgui") { addVigs2WinMenu("MiPP") }
}

######START#######################################################################################
mipp <- function(x, y, x.test=NULL, y.test=NULL, probe.ID=NULL, rule="lda", 
                 method.cut="t.test", percent.cut = 0.01, 
                 model.sMiPP.margin=0.01, min.sMiPP=0.85, n.drops=2,
                 nfold=5, p.test=1/3, n.split=20, n.split.eval=100){

     if(length(probe.ID)==0) probe.ID <- 1:nrow(x)
     nfold <- max(2, min(nfold, nrow(x))) # 2 ~ N
     if(rule=="lda" | rule=="qda") require(MASS)
     if(rule=="svmlin" | rule=="svmrbf") require(e1071)
     print("Please wait...")
     
     #when there is an indepedent test set
     if(length(x.test) > 0) {

        #Data manipulation
        colnames(x) <- 1:ncol(x)           
        rownames(x) <- 1:nrow(x)  
        colnames(x.test) <- 1:ncol(x.test)
        rownames(x.test) <- 1:nrow(x.test)  

        x <- t(x) #convert (gene x sample) into (sample x gene)
        x.test <- t(x.test)

        y <- factor(y) 
        y.test <- factor(y.test) 

        #pre-selection
        pre.model <- "ALL"
        ii <- 1:ncol(x)
        if(percent.cut < 1) {
           ii <- pre.select(x, y, percent.cut=percent.cut)
           pre.model <- ii
          
        }
        x.tr <- x[,ii]; y.tr <- y
        x.te <- x.test[,ii]; y.te <- y.test

        out <- mipp.rule(x.train=x.tr, y.train=y.tr, x.test=x.te, y.test=y.te, 
                         nfold=nfold, min.sMiPP=min.sMiPP, n.drops=n.drops, rule=rule) 
        out[,2] <- probe.ID[ii[out[,2]]]
     
        Select <- rep(" ", nrow(out))
        i <- min(which(out$sMiPP >= max(out$sMiPP))); Select[i] <- "*"
        j <- min(which(out$sMiPP >= out$sMiPP[i]-model.sMiPP.margin)); Select[j] <- "**"
        out <- cbind(out, Select)

        print("Done.")
        return(list(rule=rule, nfold=nfold, pre.model=pre.model, model=out)) 

     }


     #when there is no indepedent test set
     if(length(x.test)==0) {  

        #Data manipulation
        colnames(x) <- 1:ncol(x)           
        rownames(x) <- 1:nrow(x)  
        x <- t(x) #convert (gene x sample) into (sample x gene)
        y <- factor(y) 

        #pre-selection
        pre.model <- "ALL"
        ii <- 1:ncol(x)
        if(percent.cut < 1) {
           ii <- pre.select(x, y, percent.cut=percent.cut)
           pre.model <- ii
        }
        x.tr <- x[,ii]; y.tr <- y

        out <- cv.mipp.rule(x=x.tr, y=y.tr, nfold=nfold, p.test=p.test, n.split=n.split, n.split.eval=n.split.eval,
                               model.sMiPP.margin=model.sMiPP.margin, min.sMiPP=min.sMiPP, n.drops=n.drops, rule=rule)

        out$CV.out$Gene <- probe.ID[ii[out$CV.out$Gene]]

        for(i in 1:n.split) {
            k <- ncol(out$CVCV.out)-6
            k <- max(which(!is.na(out$CVCV.out[i,1:k])))
            kk <- as.numeric(out$CVCV.out[i,2:k])
            out$CVCV.out[i,2:k] <- probe.ID[ii[kk]]
        }

        rownames(out$CV.out) <- 1:nrow(out$CV.out)
        print("Done.")

        return(list(rule=rule, nfold=nfold, n.split=n.split, n.split.eval=n.split.eval, 
                    sMiPP.margin=model.sMiPP.margin, p.test=p.test,
                    pre.model=pre.model, model=out$CV.out, model.eval=out$CVCV.out)) 

     }

}



cv.mipp.rule <- function(x, y, nfold, p.test, n.split, n.split.eval, 
                         model.sMiPP.margin=0.01, min.sMiPP=0, n.drops=n.drops, rule="lda") {

    n.gene <- ncol(x)
    CV.out <- data.frame(matrix(NA, n.split, 7))
    colnames(CV.out) <- c("Split","Order","Gene","ErrorRate","MiPP","sMiPP","Select")

    u.y <- unique(y)
    n.y <- length(u.y)

    #Select genes from n.split splits
    gene.list <- data.frame(matrix(NA, n.split, n.gene))
    rownames(gene.list) <- paste("S",1:n.split, sep="")
    colnames(gene.list) <- paste("G",1:n.gene, sep="")

    for(iter in 1:n.split) {

print(iter)

        i.test  <- c()
        for(i in 1:n.y) {
            part <- sample(which(y==u.y[i])) 
            n.part <- round(length(part)*p.test)
            i.test <- c(i.test, part[1:n.part])
        }

        x.train <- x[-i.test,]
        x.test  <- x[ i.test,]
        y.train <- y[-i.test]
        y.test  <- y[ i.test]

        tmp <- mipp.rule(x.train=x.train,y.train=y.train,x.test=x.test,y.test=y.test,
                         nfold=nfold, min.sMiPP=min.sMiPP, n.drops=n.drops, rule=rule)

        Split <- rep(iter, nrow(tmp))
        Select <- rep(" ", nrow(tmp))
        i <- min(which(tmp$sMiPP >= max(tmp$sMiPP))); Select[i] <- "*"
        j <- min(which(tmp$sMiPP >= tmp$sMiPP[i]-model.sMiPP.margin)); Select[j] <- "**"
        gene.list[iter,1:j] <- tmp$Gene[1:j]
        tmp <- cbind(Split, tmp, Select)
        CV.out <- rbind(CV.out, tmp)

     }
     
     tmp <- apply(gene.list, 2, is.na)
     i <- which(apply(tmp, 2, sum) >= n.split)
     gene.list <- gene.list[,-i]
     CV.out <- CV.out[-c(1:n.split),]


     ###################################
     #Evaluate optimal models of splits 
     out.Er    <- matrix(NA, n.split, n.split.eval)
     out.MiPP  <- matrix(NA, n.split, n.split.eval)
     out.sMiPP <- matrix(NA, n.split, n.split.eval)
     out2 <- data.frame(matrix(NA, n.split, 6))
     rownames(out2) <- 1:n.split 
     colnames(out2) <- c("mean ErrorRate","mean MiPP","mean sMiPP","5% sMiPP","50% sMiPP","95% sMiPP")
     for(j in 1:n.split.eval) { #Splits for evaluation
        i.test  <- c()
        for(i in 1:n.y) {
            part <- sample(which(y==u.y[i])) 
            n.part <- round(length(part)*p.test)
            i.test <- c(i.test, part[1:n.part])
        }

        x.train <- x[-i.test,]
        x.test  <- x[ i.test,]
        y.train <- y[-i.test]
        y.test  <- y[ i.test]
        for(jj in 1:n.split) { #Split  
            k <- max(which(!is.na(gene.list[jj,])==TRUE))
            kk <- as.numeric(gene.list[jj,1:k])
            tmp2 <- get.mipp(x.train[,kk], y.train, x.test[,kk],  y.test, rule=rule)
            out.Er[jj,j]    <- tmp2$ErrorRate 
            out.MiPP[jj,j]  <- tmp2$MiPP
            out.sMiPP[jj,j] <- tmp2$sMiPP
        }  
     }
   
     out2[,1] <- apply(out.Er, 1, mean)
     out2[,2] <- apply(out.MiPP, 1, mean)
     out2[,3] <- apply(out.sMiPP, 1, mean)
     out2[,4:6] <- t(apply(out.sMiPP, 1, quantile, probs=c(0.05, 0.50, 0.95)))

    Split <- 1:n.split
    CVCV.out <- cbind(Split, gene.list, out2)

    return(list(genes=gene.list, CV.out=CV.out, CVCV.out=CVCV.out)) 

}



#Function to compute MiPP
mipp.rule <- function(x.train, y.train, x.test=NULL, y.test=NULL, nfold=5, min.sMiPP=0, n.drops=2, rule="lda") {
       
     n.gene <- ncol(x.train)
     n.sample.train <- nrow(x.train)
     n.sample.test  <- nrow(x.test)
     colnames(x.train) <- 1:n.gene
     colnames(x.test)  <- 1:n.gene

     tmp <- round(n.sample.train/nfold*2)
     id <- rep((1:nfold),tmp)[1:n.sample.train] #CHECK
     i <- (1:n.sample.train)[sort.list(y.train)]
     id <- id[sort.list(i)]
     #id <- sample(id, size=n.sample.train, replace=FALSE)
        
     opt.genes <-c()
     opt.Er    <-c()
     opt.MiPP    <-c()
     opt.sMiPP    <-c()

     #Pick 1-gene model
     out <- matrix(0, nfold, n.gene)
     for(i in 1:nfold) {
        y.tr <- y.train[id!=i]
        y.te <- y.train[id==i]
        for(j in 1:n.gene) {
             x.tr <- data.frame(x.train[id!=i,j])
             x.te <- data.frame(x.train[id==i,j]) 
             out[i,j] <- get.mipp(x.tr, y.tr, x.te, y.te, rule=rule)$MiPP
        }
     }
     out.sum <- apply(out, 2, sum)    
     pick.gene <- min(which(out.sum >= max(out.sum)))
     pick.gene <- as.numeric(colnames(x.train)[pick.gene])
     opt.genes <- c(opt.genes, pick.gene)
     x.train.opt  <- data.frame(x.train[, opt.genes])
     x.train.cand <- x.train[,-opt.genes]

     #Evaluate by DS
     xx.train <- data.frame(x.train[,opt.genes]); colnames(xx.train) <- opt.genes
     xx.test  <- data.frame(x.test[,opt.genes]) ; colnames(xx.test) <- opt.genes
     tmp <- get.mipp(xx.train, y.train, xx.test, y.test, rule=rule)
     opt.Er    <-c(opt.Er, tmp$ErrorRate)
     opt.MiPP  <-c(opt.MiPP, tmp$MiPP)
     opt.sMiPP <-c(opt.sMiPP, tmp$sMiPP)

     #Pick k-gene model
     i.stop <- 0 
     max.sMiPP <-  opt.sMiPP
     for(jj in 2:(n.gene-1)) {
        n.gene.cand <- n.gene-jj+1
        out <- matrix(0, nfold, n.gene.cand)
        for(i in 1:nfold) {
            y.tr <- y.train[id!=i]
            y.te <- y.train[id==i]
            for(j in 1:n.gene.cand) {
                x.tr <- data.frame(x.train.opt[id!=i,], x.train.cand[id!=i,j])
                x.te <- data.frame(x.train.opt[id==i,], x.train.cand[id==i,j])
                out[i,j] <- get.mipp(x.tr,y.tr, x.te, y.te, rule=rule)$MiPP
            }
        }

        out.sum <- apply(out, 2, sum)    
        pick.gene <- min(which(out.sum >= max(out.sum)))
        pick.gene <- as.numeric(colnames(x.train.cand)[pick.gene])
        opt.genes <- c(opt.genes, pick.gene)
        x.train.opt  <- x.train[, opt.genes]
        x.train.cand <- x.train[,-opt.genes]

        tmp <- get.mipp(x.train[,opt.genes], y.train, x.test[,opt.genes],  y.test, rule=rule)

        opt.Er    <-c(opt.Er, tmp$ErrorRate)
        opt.MiPP  <-c(opt.MiPP, tmp$MiPP)
        opt.sMiPP <-c(opt.sMiPP, tmp$sMiPP)
 
        #stopping rule: stop if two drops
        if(max.sMiPP < tmp$sMiPP) {
           max.sMiPP <- tmp$sMiPP
           i.stop <- 0
        }
        else i.stop <- i.stop + 1 

print("OK")
print(jj)
print(min(unique(table(y.train)))) 

        if((i.stop >= n.drops) & (max.sMiPP >= min.sMiPP)) break #NOTE
        if(min(unique(table(y.train))) <= (jj+4)) break          #NOTE

    }

    i <- 1:length(opt.genes)
    final.out <- data.frame(i,opt.genes, opt.Er, opt.MiPP, opt.sMiPP)
    colnames(final.out) <- c("Order","Gene","ErrorRate","MiPP","sMiPP")

    return(final.out)
}


######END############################################################################
#Choose rule
get.mipp <- function(x.train, y.train, x.test, y.test, rule){

     if(rule !="lda" & rule !="qda" & rule !="logistic" & rule !="svmlin" & rule !="svmrbf") 
     stop("No rule: ", rule)
     
     if(rule=="lda")      tmp <- get.mipp.lda(x.train, y.train, x.test,  y.test)
     if(rule=="qda")      tmp <- get.mipp.qda(x.train, y.train, x.test,  y.test)
     if(rule=="logistic") tmp <- get.mipp.logistic(x.train, y.train, x.test,  y.test)
     if(rule=="svmlin")   tmp <- get.mipp.svm.linear(x.train, y.train, x.test,  y.test)
     if(rule=="svmrbf")   tmp <- get.mipp.svm.rbf(x.train, y.train, x.test,  y.test)
     
     return(list(N.Miss=tmp$N.Miss, ErrorRate=tmp$ErrorRate, MiPP=tmp$MiPP, sMiPP=tmp$sMiPP))
}


#Computing Miss Error and MiPP after LDA
get.mipp.lda <- function(x.train, y.train, x.test, y.test){
     
     y <- y.train
     dat.train <- cbind(x.train, y)
     y <- y.test
     dat.test  <- cbind(x.test, y)

     if(is.data.frame(dat.train)==FALSE) dat.train <- data.frame(dat.train)
     if(is.data.frame(dat.test)==FALSE) dat.test <- data.frame(dat.test)
     colnames(dat.train) <- c(1:ncol(x.train), "y")
     colnames(dat.test)  <- c(1:ncol(x.test), "y")

     fit <- lda(y ~ ., dat.train)
     out <- predict(fit, dat.test)

     u.class <- unique(colnames(out$post))
     n.class <- length(u.class)

     True.class <- dat.test$y
     Pred.class <- out$class

     post.prob <-0
     for(j in 1:n.class) {
         i <- which(True.class == u.class[j]) 
         post.prob <- post.prob + sum(out$post[i,j])
     }

     N <- length(True.class) 
     nMiss <- N- sum(True.class == Pred.class) 
     Er <- nMiss/nrow(dat.test)
     MiPP <- post.prob - nMiss
     sMiPP <- MiPP/N

     return(list(N.Miss=nMiss, ErrorRate=Er, MiPP=MiPP, sMiPP=sMiPP))
}


get.mipp.logistic <- function(x.train, y.train, x.test, y.test){

	y.train <- factor(y.train)
	levels(y.train) <- c("1","0")
	y.test <- factor(y.test)
	levels(y.test) <- c("1","0")

        if(is.data.frame(x.train)) x.train <- as.matrix(x.train)
        if(is.data.frame(x.test))  x.test  <- as.matrix(x.test)

	fit <- glm(y.train ~ x.train, family="binomial")

	predx <- cbind(1, x.test)%*%t(matrix(fit$coef, nrow=1))
	prob <- 1/(1+exp(-predx))
	
	postdf <- data.frame(prob, y.test)
	post.prob <- ifelse(postdf$y.test=="1", 1-postdf$prob, postdf$prob)
	ind <- ifelse(post.prob > .5, 1, 0)

	N <- length(y.test)
	nMiss <- N - sum(ind)
	Er <- nMiss/N
	MiPP <- sum(post.prob)-nMiss
	sMiPP <- MiPP/N
	
	return(list(N.Miss=nMiss, ErrorRate=Er, MiPP=MiPP, sMiPP=sMiPP))
}

pre.select <- function(x,y, percent.cut=0.01){

     tstat <- function(a, dep){    
              aaa <- t.test(a~dep, alternative = "two.sided", 
                            var.equal = FALSE)$statistic
              aaa <- abs(aaa)
              return(aaa)
     }
     
     c <- rep(0, length(y)); c[y==y[1]] <- 1 
     bbb <- apply(x, 2, tstat, dep=c)      
     q <- quantile(bbb, probs = (1-percent.cut))
     id <- which(bbb >=q)
     return(id) 
}



###################################################################
###Preprocess: normalization, thesholding, and log2 transformation
################################################################### 
mipp.preproc <- function (x, data.type = "MAS5")
{

    #Refine!!!
    #Exclude cases with missing values
    x <- as.matrix(na.exclude(x))
 

    #IQR normalization
    if (data.type == "MAS4" || data.type == "MAS5") {
        x <- quant.normal(x, percent = 50)
    }

    #Tresholding
    if (data.type == "MAS4" || data.type == "dChip") {
        if (length(x[x < 1]) != 0) {
            x[x < 1] <- 1
        }
    }
    if (data.type == "MAS5") {
        if (length(x[x < 0.1]) != 0) {
            x[x < 0.1] <- 0.1
        }
    }
   
    #log2 transformation
    x <- logb(x, 2)


    #if 0, give a random number between -1 and 0
    if (data.type == "MAS4" || data.type == "dChip") {
        if (length(x[x < 1]) != 0) {
            n <- length(x[x <= 0])
            x[x <= 0] <- runif(n, -1, 0)
           
        }
    }
   

    return(x)
}


###IQR normaization
quant.normal <- function (x, percent = 50)
{
    quartile <- apply(x, 2, quant.normal2, percent = percent)
    max.quartile <- max(quartile)
    ratio <- (quartile/max.quartile)
    ratio.vect <- rep(ratio, nrow(x))
    adjusted <- matrix(ratio.vect, nrow = nrow(x), byrow = TRUE)
    normalized <- data.frame(x/adjusted)
    return(normalized)
}

quant.normal2 <- function (x, percent = 50)
{
    low <- 0.5 * (100 - percent)/100
    high <- 0.5 * (100 + percent)/100
    difference <- as.vector(diff(quantile(x, probs = c(low, high),
        na.rm = TRUE)))
    return(difference)
}
#Computing Miss Error and MiPP after QDA
get.mipp.qda <- function(x.train, y.train, x.test, y.test){
     

     colnames(x.train) <- c(1:ncol(x.train))
     colnames(x.test)  <- c(1:ncol(x.test))

     fit <- qda(x.train, y.train)
     out <- predict(fit, x.test)

     u.class <- unique(colnames(out$post))
     n.class <- length(u.class)

     True.class <- y.test
     Pred.class <- out$class

     post.prob <-0
     for(j in 1:n.class) {
         i <- which(True.class == u.class[j]) 
         post.prob <- post.prob + sum(out$post[i,j])
     }

     N <- length(True.class) 
     nMiss <- N- sum(True.class == Pred.class) 
     Er <- nMiss/nrow(x.test)
     MiPP <- post.prob - nMiss
     sMiPP <- MiPP/N

     return(list(N.Miss=nMiss, ErrorRate=Er, MiPP=MiPP, sMiPP=sMiPP))
}


# Linear Kernel Decision Function
linearkernel.decision.function <-function(newx, oldx, svmobj) {
    # oldx is the original training data matrix
    # svmobj is the name of the svm object

    # Extract y*alpha:
    	svcoefs <- svmobj$coefs
    # Extract b:
    	svconstant <- -1*svmobj$rho   
    # Get the support vectors
    	svdata <- oldx[svmobj$index,]
    # Reformat the new x
    	xt <- newx
    	nrowxt <- length(oldx[1,])
    	dim(xt) <- c(nrowxt,1)   
    # linear kernel:
    	prods <- svdata %*% xt   
    # compute h(x):
    	h <- t(prods) %*% svcoefs 
    # compute f(x):
    	#h + svconstant    
    	return(h + svconstant)    
}


get.mipp.svm.linear <- function(x.train, y.train, x.test, y.test){

        x.train <- as.matrix(x.train)
        x.test  <- as.matrix(x.test)

	y.train <- factor(y.train)
	y.test <- factor(y.test)

	fit <- svm(x.train, y.train, kernel="linear")

	True.class <- y.test
	Pred.class <- predict(fit, x.test)

	fofx <- numeric(length(y.test))
	for(i in 1:length(y.test)){
		xin <- x.test[i,]
		fofx[i] <- linearkernel.decision.function(xin, x.train, fit)
	}

	prob <- 1/(1+exp(-fofx))
	postdf <- data.frame(prob, True.class)
	post.prob <- ifelse(postdf$True.class==True.class[1], 1-postdf$prob, postdf$prob)

	N <- length(y.test)
	nMiss <- N - sum(True.class==Pred.class)
	Er <- nMiss/N
	MiPP <- sum(post.prob)-nMiss
	sMiPP <- MiPP/N
	
	return(list(N.Miss=nMiss, ErrorRate=Er, MiPP=MiPP, sMiPP=sMiPP))
}

rbfkernel.decision.function <- function(newx, oldx, svmobj) {
    # oldx is the original training data matrix
    # svmobj is the name of the svm object
    
    # Extract y*alpha:
    	svcoefs <- svmobj$coefs
    # Extract b:
    	svconstant <- -1*svmobj$rho
    # Extract gamma:
    	svgamma <- svmobj$gamma
    # Get the support vectors
    	svdata <- oldx[svmobj$index,]
    # How many support vectors?
    	numsv <- length(svmobj$index)
    # reformat newx
    	p <- length(oldx[1,])
    	xt <- matrix(0, nrow=numsv, ncol=p)
    	for(i in 1:p){
        	xt[,i] <- rep(newx[i], numsv)
    	}     
    # rbf kernel:
    	difs <- (svdata - xt)
    	difs2 <- apply(difs, 2, function(x)x^2)
    	difs3 <- apply(difs2, 1, sum)
    	ks <- exp(-1*svgamma*difs3) 
    # compute h(x):
    	h <- t(ks) %*% svcoefs
    # compute f(x):
    	#h + svconstant
    	return(h + svconstant)
}


get.mipp.svm.rbf <- function(x.train, y.train, x.test, y.test){

        x.train <- as.matrix(x.train)
        x.test  <- as.matrix(x.test) 
 
	y.train <- factor(y.train)
	y.test <- factor(y.test)

	gammap <- 1/length(ncol(x.train))
	fit <- svm(x.train, y.train, kernel="radial", gamma=gammap)
	
	True.class <- y.test
	Pred.class <- predict(fit, x.test)

	fofx <- numeric(length(y.test))
	for(i in 1:length(y.test)){
		xin <- x.test[i,]
		fofx[i] <- rbfkernel.decision.function(xin, x.train, fit)
	}

	prob <- 1/(1+exp(-fofx))
	lev.y <- levels(y.test)
	postdf <- data.frame(prob, True.class)
	post.prob <- ifelse(postdf$True.class==True.class[1], 1-postdf$prob, postdf$prob)

	N <- length(y.test)
	nMiss <- N - sum(True.class==Pred.class)
	Er <- nMiss/N
	MiPP <- sum(post.prob)-nMiss
	sMiPP <- MiPP/N
	
	return(list(N.Miss=nMiss, ErrorRate=Er, MiPP=MiPP, sMiPP=sMiPP))
}
