### Name: semsim
### Title: Compute semantic similarity measure for terms in an
###   object-ontology complex
### Aliases: semsim pms subsumers conceptProbs usageCount
### Keywords: models

### ** Examples

#
# we are given a graph of GOMF and the OOmap between LL and GOMF
# derived from humanLLMappings and stored as data resources in
# ontoTools -- these will have to be updated regularly
#
data(goMFgraphDemo)
data(ooMapLL2GOMFdemo)
#
# build the rooted DAG, the ontology, and the OOC objects
#
gomfrDAG <- new("rootedDAG", root="GO:0003674", DAG=goMFgraphDemo)
GOMFonto <- new("ontology", name="GOMF", version="bioc 1.3.1", rDAG=gomfrDAG)
LLGOMFOOC <- makeOOC(GOMFonto, ooMapLL2GOMFdemo)
#
# we are given the accessibility matrix for the GO MF graph as a 
# data resource, and we can compute some term probabilities
#
data(gomfAmat)
pc <- conceptProbs(LLGOMFOOC, gomfAmat, inds=1:20)
#
# now we will get a sample of GO MF terms and compute the
# semantic similarities of pairs of terms in the sample
#
data(LLGOMFcp) # full set of precomputed concept probabilities
library(GO)
library(Biobase)
library(combinat)
GO() # get the GO environments
GOMFtags <- ls(env=GOMFID2TERM)
GOMFterms <- unlist(multiget(GOMFtags,env=GOMFID2TERM))
ntags <- length(GOMFtags)
if (any(duplicated(GOMFterms)))
 {
 dups <- (1:ntags)[duplicated(GOMFterms)]
 GOMFterms[dups] <- paste(GOMFterms[dups],".2",sep="")
 }
names(GOMFterms) <- GOMFtags
set.seed(1234)
st <- sample(names(GOMFterms),size=10) # take the sample
pst <- combn(st,2)   # get a matrix with the pairs of terms in columns
npst <- ncol(pst)
ss <- rep(NA,npst)
for (i in 1:npst)  # compute semantic similarities
  {
  cat(i)
  ss[i] <- semsim( pst[1,i], pst[2,i], ooc=LLGOMFOOC, acc=gomfAmat, pc=LLGOMFcp )
  }
print(summary(ss))
top <- (1:npst)[ss==max(ss)][1]  # index of the most similar pair
print( GOMFterms[ as.character(pst[,top]) ] )
pen <- (1:npst)[ss==max(ss[-top])][1] # second most similar
print( GOMFterms[ as.character(pst[,pen]) ] )



