org.apache.ctakes.ytex.kernel.IntrinsicInfoContentEvaluatorImpl Maven / Gradle / Ivy
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.ctakes.ytex.kernel;
import org.apache.ctakes.ytex.kernel.dao.ClassifierEvaluationDao;
import org.apache.ctakes.ytex.kernel.dao.ConceptDao;
import org.apache.ctakes.ytex.kernel.model.ConcRel;
import org.apache.ctakes.ytex.kernel.model.ConceptGraph;
import org.apache.ctakes.ytex.kernel.model.FeatureEvaluation;
import org.apache.ctakes.ytex.kernel.model.FeatureRank;
import org.slf4j.LoggerFactory;
import org.slf4j.Logger;
import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.lang.ref.SoftReference;
import java.lang.reflect.Array;
import java.util.*;
public class IntrinsicInfoContentEvaluatorImpl implements
IntrinsicInfoContentEvaluator {
public static class IntrinsicICInfo {
private ConcRel concept;
private int leafCount = 0;
private int subsumerCount = 0;
public IntrinsicICInfo(ConcRel concept) {
this.concept = concept;
}
public ConcRel getConcept() {
return concept;
}
public int getLeafCount() {
return leafCount;
}
public int getSubsumerCount() {
return subsumerCount;
}
public void setConcept(ConcRel concept) {
this.concept = concept;
}
public void setLeafCount(int leafCount) {
this.leafCount = leafCount;
}
public void setSubsumerCount(int subsumerCount) {
this.subsumerCount = subsumerCount;
}
}
private static final Logger LOGGER = LoggerFactory.getLogger( "IntrinsicInfoContentEvaluatorImpl" );
private static final double log2adjust = 1d / Math.log(2);
/**
* @param args
* @throws IOException
*/
public static void main(String[] args) throws IOException {
Properties props = (Properties) KernelContextHolder
.getApplicationContext().getBean("ytexProperties");
props.putAll(System.getProperties());
if (!props.containsKey("org.apache.ctakes.ytex.conceptGraphName")) {
System.err.println("error: org.apache.ctakes.ytex.conceptGraphName not specified");
System.exit(1);
} else {
IntrinsicInfoContentEvaluator corpusEvaluator = KernelContextHolder
.getApplicationContext().getBean(
IntrinsicInfoContentEvaluator.class);
corpusEvaluator.evaluateIntrinsicInfoContent(props);
System.exit(0);
}
}
private ClassifierEvaluationDao classifierEvaluationDao;
private ConceptDao conceptDao;
private double computeIC(IntrinsicICInfo icInfo, int maxLeaves) {
// |leaves(c)|/|subsumers(c)| + 1
double denom = log2adjust
* Math.log((double) icInfo.getLeafCount()
/ (double) icInfo.getSubsumerCount() + 1d);
// max_leaves + 1
double num = log2adjust * Math.log((double) maxLeaves + 1d);
if (denom == Double.NaN || num == Double.NaN) {
LOGGER.error("IC = NaN for " + icInfo.getConcept().getConceptID()
+ ", leafCount=" + icInfo.getLeafCount()
+ ", subsumerCount = " + icInfo.getSubsumerCount());
return -1d;
} else
return num - denom;
}
/**
* recursively compute the number of leaves. fill in the icInfoMap as we go
* along
*
* @param concept
* concept for which we should get the leaves
* @param leafCache
* cache of concept's leaves
* @param icInfoMap
* to be updated with leaf counts
* @param cg
* @param w
* @param visitedNodes
* list of nodes that have already been visited - we don't need
* to revisit them when getting the leaves
* @return
* @throws IOException
*/
private HashSet getLeaves(ConcRel concept,
SoftReference>[] leafCache,
Map icInfoMap, ConceptGraph cg,
BufferedWriter w, HashSet visitedNodes) throws IOException {
// look in cache
SoftReference> refLeaves = leafCache[concept.getNodeIndex()];
if (refLeaves != null && refLeaves.get() != null) {
return refLeaves.get();
}
// not in cache - compute recursively
HashSet leaves = new HashSet();
leafCache[concept.getNodeIndex()] = new SoftReference>(leaves);
if (concept.isLeaf()) {
// for leaves, just add the concept id
leaves.add(concept.getNodeIndex());
} else {
IntrinsicICInfo icInfo = icInfoMap.get(concept.getConceptID());
// have we already computed the leaf count for this node?
// if yes, then we can ignore previously visited nodes
// if no, then compute it now and revisit previously visited nodes
// if we have to
boolean needLeaves = (icInfo != null && icInfo.getLeafCount() == 0);
HashSet visitedNodesLocal = visitedNodes;
if (needLeaves || visitedNodesLocal == null) {
// allocate a set to keep track of nodes we've already visited
// so that we don't revisit them. if we have already computed
// this node's leaf count then we reuse whatever the caller gave
// us if non null, else allocate a new one.
// if we haven't already computed this node's leaf count,
// allocate a new set to avoid duplications in the traversal for
// this node
visitedNodesLocal = new HashSet();
}
// for inner nodes, recurse
for (ConcRel child : concept.getChildren()) {
// if we've already visited a node, then don't bother adding
// that node's leaves - we already have them
if (!visitedNodesLocal.contains(child.getNodeIndex())) {
leaves.addAll(getLeaves(child, leafCache, icInfoMap, cg, w,
visitedNodesLocal));
}
}
// add this node to the set of visited nodes so we know not to
// revisit. This is only of importance if the caller gave us
// a non-empty set.
if (visitedNodes != null && visitedNodes != visitedNodesLocal) {
visitedNodes.add(concept.getNodeIndex());
visitedNodes.addAll(visitedNodesLocal);
}
// update the leaf count if we haven't done so already
if (needLeaves) {
icInfo.setLeafCount(leaves.size());
// output leaves if desired
if (w != null) {
w.write(concept.getConceptID());
w.write("\t");
w.write(Integer.toString(leaves.size()));
w.write("\t");
Iterator iter = leaves.iterator();
while (iter.hasNext()) {
w.write(cg.getConceptList().get(iter.next())
.getConceptID());
w.write(" ");
}
w.newLine();
}
}
}
return leaves;
}
// /**
// * add/update icInfoMap entry for concept with the concept's leaf count
// *
// * @param concept
// * @param icInfoMap
// * @param w
// * @param subsumerMap
// * @throws IOException
// */
// private void computeLeafCount(ConcRel concept,
// Map icInfoMap,
// SoftReference[] leafCache, ConceptGraph cg,
// BufferedWriter w) throws IOException {
// // see if we already computed this
// IntrinsicICInfo icInfo = icInfoMap.get(concept.getConceptID());
// if (icInfo != null && icInfo.getLeafCount() > 0) {
// return;
// }
// // if not, figure it out
// if (icInfo == null) {
// icInfo = new IntrinsicICInfo(concept);
// icInfoMap.put(concept.getConceptID(), icInfo);
// }
// // for leaves the default (0) is correct
// if (!concept.isLeaf()) {
// TIntSet leaves = this.getLeaves(concept, leafCache);
// icInfo.setLeafCount(leaves.size());
// if (w != null) {
// w.write(concept.getConceptID());
// w.write("\t");
// w.write(Integer.toString(leaves.size()));
// w.write("\t");
// TIntIterator iter = leaves.iterator();
// while (iter.hasNext()) {
// w.write(cg.getConceptList().get(iter.next()).getConceptID());
// w.write(" ");
// }
// w.newLine();
// }
// }
// // recurse to parents
// for (ConcRel parent : concept.getParents()) {
// computeLeafCount(parent, icInfoMap, leafCache, cg, w);
// }
// }
/**
* add/update icInfoMap entry for concept with the concept's subsumer count
*
* @param concept
* @param icInfoMap
* @param subsumerMap
* @param w
* @throws IOException
*/
private void computeSubsumerCount(ConcRel concept,
Map icInfoMap,
Map> subsumerMap, short[] depthArray,
BufferedWriter w) throws IOException {
// see if we already computed this
IntrinsicICInfo icInfo = icInfoMap.get(concept.getConceptID());
if (icInfo != null && icInfo.getSubsumerCount() > 0) {
return;
}
// if not, figure it out
if (icInfo == null) {
icInfo = new IntrinsicICInfo(concept);
icInfoMap.put(concept.getConceptID(), icInfo);
}
Set subsumers = this.getSubsumers(concept, subsumerMap,
depthArray);
if (w != null) {
w.write(concept.getConceptID());
w.write("\t");
w.write(Integer.toString(subsumers.size()));
w.write("\t");
w.write(subsumers.toString());
w.newLine();
}
icInfo.setSubsumerCount(subsumers.size());
// recursively compute the children's subsumer counts
for (ConcRel child : concept.getChildren()) {
computeSubsumerCount(child, icInfoMap, subsumerMap, depthArray, w);
}
}
/*
* (non-Javadoc)
*
* @see
* org.apache.ctakes.ytex.kernel.IntrinsicInfoContentEvaluator#evaluateIntrinsicInfoContent
* (java.lang.String)
*/
@Override
public void evaluateIntrinsicInfoContent(final Properties props)
throws IOException {
String conceptGraphName = props.getProperty("org.apache.ctakes.ytex.conceptGraphName");
String conceptGraphDir = props.getProperty("org.apache.ctakes.ytex.conceptGraphDir",
System.getProperty("java.io.tmpdir"));
ConceptGraph cg = this.conceptDao.getConceptGraph(conceptGraphName);
evaluateIntrinsicInfoContent(conceptGraphName, conceptGraphDir, cg);
}
@Override
public void evaluateIntrinsicInfoContent(String conceptGraphName,
String conceptGraphDir, ConceptGraph cg) throws IOException {
LOGGER.info("computing subsumer counts");
// compute the subsumer count
Map icInfoMap = new HashMap();
Map> subsumerMap = new WeakHashMap>();
short[] depthArray = new short[cg.getConceptList().size()];
BufferedWriter w = null;
try {
w = this.getOutputFile(conceptGraphName, conceptGraphDir,
"subsumer");
computeSubsumerCount(cg.getConceptMap().get(cg.getRoot()),
icInfoMap, subsumerMap, depthArray, w);
} finally {
if (w != null) {
try {
w.close();
} catch (IOException e) {
}
}
}
subsumerMap = null;
LOGGER.info("computing max leaves");
// get the leaves in this concept graph
Set leafSet = null;
try {
w = this.getOutputFile(conceptGraphName, conceptGraphDir, "allleaf");
leafSet = this.getAllLeaves(cg, w);
} finally {
if (w != null) {
try {
w.close();
} catch (IOException e) {
}
}
}
LOGGER.info("computing leaf counts");
@SuppressWarnings("unchecked")
SoftReference>[] leafCache = (SoftReference>[]) Array
.newInstance((new SoftReference>(new HashSet()))
.getClass(), cg.getConceptList().size());
// compute leaf count of all concepts in this graph
try {
w = this.getOutputFile(conceptGraphName, conceptGraphDir, "leaf");
// for (String leaf : leafSet) {
// computeLeafCount(cg.getConceptMap().get(leaf), icInfoMap,
// leafCache, cg, w);
// }
this.getLeaves(cg.getConceptMap().get(cg.getRoot()), leafCache,
icInfoMap, cg, w, null);
} finally {
if (w != null) {
try {
w.close();
} catch (IOException e) {
}
}
}
leafCache = null;
LOGGER.info("storing intrinsic ic");
storeIntrinsicIC(conceptGraphName, leafSet.size(), icInfoMap,
depthArray, cg);
LOGGER.info("finished computing intrinsic ic");
}
private BufferedWriter getOutputFile(final String conceptGraphName,
final String conceptGraphDir, String type) throws IOException {
if ("true".equalsIgnoreCase(System
.getProperty("org.apache.ctakes.ytex.ic.debug", "false"))) {
return new BufferedWriter(new FileWriter(FileUtil.addFilenameToDir(
conceptGraphDir, conceptGraphName + "-" + type + ".txt")));
} else
return null;
}
public Set getAllLeaves(ConceptGraph cg, BufferedWriter w)
throws IOException {
Set leafSet = new HashSet();
for (Map.Entry con : cg.getConceptMap().entrySet()) {
if (con.getValue().isLeaf()) {
leafSet.add(con.getValue().getConceptID());
}
}
if (w != null) {
w.write(Integer.toString(leafSet.size()));
w.write("\t");
w.write(leafSet.toString());
w.newLine();
}
return leafSet;
}
public ClassifierEvaluationDao getClassifierEvaluationDao() {
return classifierEvaluationDao;
}
public ConceptDao getConceptDao() {
return conceptDao;
}
// private TIntSet getLeaves(ConcRel concept,
// SoftReference[] leafCache) {
// // look in cache
// SoftReference refLeaves = leafCache[concept.getNodeIndex()];
// if (refLeaves != null && refLeaves.get() != null) {
// return refLeaves.get();
// }
// // not in cache - compute recursively
// TIntSet leaves = new TIntHashSet();
// leafCache[concept.getNodeIndex()] = new SoftReference(leaves);
// if (concept.isLeaf()) {
// // for leaves, just add the concept id
// leaves.add(concept.getNodeIndex());
// } else {
// // for inner nodes, recurse
// for (ConcRel child : concept.getChildren()) {
// leaves.addAll(getLeaves(child, leafCache));
// }
// }
// return leaves;
// }
/**
* recursively compute the subsumers of a concept
*
* @param concept
* @param subsumerMap
* @return
*/
private Set getSubsumers(ConcRel concept,
Map> subsumerMap, short depthArray[]) {
// look in cache
if (subsumerMap.containsKey(concept.getConceptID()))
return subsumerMap.get(concept.getConceptID());
// not in cache - compute recursively
Set subsumers = new HashSet();
boolean calcDepth = depthArray[concept.getNodeIndex()] == 0;
short parentMaxDepth = 0;
if (concept.getParents() != null && !concept.getParents().isEmpty()) {
// parents - recurse
for (ConcRel parent : concept.getParents()) {
subsumers.addAll(getSubsumers(parent, subsumerMap, depthArray));
// get the deepest parent
if (calcDepth) {
short parentDepth = depthArray[parent.getNodeIndex()];
if (parentDepth > parentMaxDepth)
parentMaxDepth = parentDepth;
}
}
}
if (calcDepth)
depthArray[concept.getNodeIndex()] = (short) (parentMaxDepth + 1);
// add the concept itself to the set of subsumers
subsumers.add(concept.getConceptID());
// add this to the cache - copy the key so that this can be gc'ed as
// needed
subsumerMap.put(concept.getConceptID(), subsumers);
return subsumers;
}
public void setClassifierEvaluationDao(
ClassifierEvaluationDao classifierEvaluationDao) {
this.classifierEvaluationDao = classifierEvaluationDao;
}
public void setConceptDao(ConceptDao conceptDao) {
this.conceptDao = conceptDao;
}
private void storeIntrinsicIC(String conceptGraphName, int maxLeaves,
Map icInfoMap, short depthArray[],
ConceptGraph cg) {
FeatureEvaluation fe = new FeatureEvaluation();
fe.setEvaluationType("intrinsic-infocontent");
fe.setParam2(conceptGraphName);
List listFeatureRank = new ArrayList(
icInfoMap.size());
double maxIC = 0d;
short maxDepth = 0;
for (IntrinsicICInfo icInfo : icInfoMap.values()) {
ConcRel cr = icInfo.getConcept();
short depth = depthArray[cr.getNodeIndex()];
cr.setDepth(depth);
if (depth > maxDepth)
maxDepth = depth;
double ic = computeIC(icInfo, maxLeaves);
cr.setIntrinsicInfoContent(ic);
if (ic > maxIC)
maxIC = ic;
if ( LOGGER.isDebugEnabled())
LOGGER.debug(icInfo.getConcept().getConceptID() + "=" + ic);
listFeatureRank.add(new FeatureRank(fe, icInfo.getConcept()
.getConceptID(), ic, depthArray[icInfo.getConcept()
.getNodeIndex()]));
}
cg.setDepthMax(maxDepth);
cg.setIntrinsicICMax(maxIC);
if ("true".equalsIgnoreCase(System
.getProperty("org.apache.ctakes.ytex.ic.debug", "false"))) {
this.classifierEvaluationDao.deleteFeatureEvaluation(null, null,
null, fe.getEvaluationType(), null, 0d, conceptGraphName);
this.classifierEvaluationDao.saveFeatureEvaluation(fe,
listFeatureRank);
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy