
org.apache.ctakes.ytex.kernel.InfoContentEvaluatorImpl Maven / Gradle / Ivy
The newest version!
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.ctakes.ytex.kernel;
import org.apache.commons.cli.*;
import org.apache.ctakes.ytex.kernel.dao.ClassifierEvaluationDao;
import org.apache.ctakes.ytex.kernel.dao.ConceptDao;
import org.apache.ctakes.ytex.kernel.model.ConcRel;
import org.apache.ctakes.ytex.kernel.model.ConceptGraph;
import org.apache.ctakes.ytex.kernel.model.FeatureEvaluation;
import org.apache.ctakes.ytex.kernel.model.FeatureRank;
import org.springframework.jdbc.core.JdbcTemplate;
import org.springframework.jdbc.core.RowCallbackHandler;
import javax.sql.DataSource;
import java.io.IOException;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.*;
/**
* Calculate the information content of each concept in a corpus wrt the
* specified concept graph. Required properties:
*
* - org.apache.ctakes.ytex.conceptGraphName - required - name of conceptGraph. @see ConceptDao
*
- org.apache.ctakes.ytex.corpusName - required - name of corpus
*
- org.apache.ctakes.ytex.conceptSetName - optional - you may want to experiment with
* different sets of concepts from a corpus, e.g. concepts from certain
* sections, or different ways of counting concepts.
*
- org.apache.ctakes.ytex.freqQuery - query to obtain raw concept frequencies for the corpus
*
* to execute, either specify these options via system properties (-D options)
* on the command line, or supply this class with the path to a properties file
* used for evaluation, or both (-D overrides properties file).
*
* The information content of each concept is stored in the feature_rank table.
* The related record in the feature_eval table has
*
* - type = infocontent
*
- feature_set_name = conceptSetName
*
- param1 = conceptGraphName
*
*
* @author vijay
*
*/
public class InfoContentEvaluatorImpl implements InfoContentEvaluator {
/**
* @param args
* @throws IOException
*/
@SuppressWarnings("static-access")
public static void main(String[] args) throws IOException {
Options options = new Options();
options.addOption(OptionBuilder
.withArgName("property file")
.hasArg()
.isRequired()
.withDescription(
"property file with queries and other parameters. todo desc")
.create("prop"));
try {
CommandLineParser parser = new GnuParser();
CommandLine line = parser.parse(options, args);
Properties props = (Properties) KernelContextHolder
.getApplicationContext().getBean("ytexProperties");
Properties propsArgs = FileUtil.loadProperties(
line.getOptionValue("prop"), true);
props.putAll(propsArgs);
if (!props.containsKey("org.apache.ctakes.ytex.conceptGraphName")
|| !props.containsKey("org.apache.ctakes.ytex.corpusName")
|| !props.containsKey("org.apache.ctakes.ytex.freqQuery")) {
System.err.println("error: required parameter not specified");
System.exit(1);
} else {
InfoContentEvaluator corpusEvaluator = KernelContextHolder
.getApplicationContext().getBean(
InfoContentEvaluator.class);
corpusEvaluator.evaluateCorpusInfoContent(
props.getProperty("org.apache.ctakes.ytex.freqQuery"),
props.getProperty("org.apache.ctakes.ytex.corpusName"),
props.getProperty("org.apache.ctakes.ytex.conceptGraphName"),
props.getProperty("org.apache.ctakes.ytex.conceptSetName"));
System.exit(0);
}
} catch (ParseException pe) {
printHelp(options);
System.exit(1);
}
}
private static void printHelp(Options options) {
HelpFormatter formatter = new HelpFormatter();
formatter.printHelp("java " + InfoContentEvaluatorImpl.class.getName()
+ " calculate information content of corpus wrt concept graph",
options);
}
private ClassifierEvaluationDao classifierEvaluationDao;
private ConceptDao conceptDao;
// private CorpusDao corpusDao;
private JdbcTemplate jdbcTemplate;
/*
* (non-Javadoc)
*
* @see
* org.apache.ctakes.ytex.kernel.CorpusEvaluator#evaluateCorpusInfoContent(java.lang.String,
* java.lang.String, java.lang.String, java.lang.String)
*/
@Override
public void evaluateCorpusInfoContent(final String freqQuery,
final String corpusName, final String conceptGraphName,
final String conceptSetName) {
ConceptGraph cg = conceptDao.getConceptGraph(conceptGraphName);
classifierEvaluationDao.deleteFeatureEvaluation(corpusName,
conceptSetName, null, INFOCONTENT, 0, 0d, conceptGraphName);
FeatureEvaluation eval = new FeatureEvaluation();
eval.setCorpusName(corpusName);
if (conceptSetName != null)
eval.setFeatureSetName(conceptSetName);
eval.setEvaluationType(INFOCONTENT);
eval.setParam2(conceptGraphName);
// CorpusEvaluation eval = corpusDao.getCorpus(corpusName,
// conceptGraphName, conceptSetName);
// if (eval == null) {
// eval = new CorpusEvaluation();
// eval.setConceptGraphName(conceptGraphName);
// eval.setConceptSetName(conceptSetName);
// eval.setCorpusName(corpusName);
// this.corpusDao.addCorpus(eval);
// }
Map rawFreq = getFrequencies(freqQuery);
double totalFreq = 0d;
// map of cui to cumulative frequency
Map conceptFreq = new HashMap(cg
.getConceptMap().size());
// recurse through the tree
totalFreq = getFrequency(cg.getConceptMap().get(cg.getRoot()),
conceptFreq, rawFreq);
List featureRankList = new ArrayList(
conceptFreq.size());
// update information content
double log2inv = -1d / Math.log(2);
for (Map.Entry cfreq : conceptFreq.entrySet()) {
if (cfreq.getValue() > 0) {
FeatureRank featureRank = new FeatureRank(eval, cfreq.getKey(),
log2inv * Math.log(cfreq.getValue() / totalFreq));
featureRankList.add(featureRank);
}
}
// the rank is irrelevant, but rank the features anyways
featureRankList = FeatureRank.sortFeatureRankList(featureRankList,
new FeatureRank.FeatureRankDesc());
classifierEvaluationDao.saveFeatureEvaluation(eval, featureRankList);
}
public ClassifierEvaluationDao getClassifierEvaluationDao() {
return classifierEvaluationDao;
}
public ConceptDao getConceptDao() {
return conceptDao;
}
public DataSource getDataSource(DataSource ds) {
return this.jdbcTemplate.getDataSource();
}
// public CorpusDao getCorpusDao() {
// return corpusDao;
// }
//
// public void setCorpusDao(CorpusDao corpusDao) {
// this.corpusDao = corpusDao;
// }
/**
* get the frequency of each term in the corpus.
*
* @param freqQuery
* query returns 2 columns. 1st column - concept id (string), 2nd
* column - frequency (double)
* @return
*/
@Override
public Map getFrequencies(String freqQuery) {
// get the raw frequency
final Map rawFreq = new HashMap();
jdbcTemplate.query(freqQuery, new RowCallbackHandler() {
@Override
public void processRow(ResultSet rs) throws SQLException {
rawFreq.put(rs.getString(1), rs.getDouble(2));
}
});
return rawFreq;
}
/**
* recursively sum frequency of parent and all its childrens' frequencies
*
* @param parent
* parent node
* @param conceptFreq
* results stored here
* @param conceptIdToTermMap
* raw frequencies here
* @return double sum of concept frequency in the subtree with parent as
* root
*/
double getFrequency(ConcRel parent, Map conceptFreq,
Map rawFreq) {
double dFreq = 0d;
if (conceptFreq.containsKey(parent.getConceptID())) {
dFreq = conceptFreq.get(parent.getConceptID());
} else {
// get raw freq
dFreq = rawFreq.containsKey(parent.getConceptID()) ? rawFreq
.get(parent.getConceptID()) : 0d;
// recurse
for (ConcRel child : parent.getChildren()) {
dFreq += getFrequency(child, conceptFreq, rawFreq);
}
conceptFreq.put(parent.getConceptID(), dFreq);
}
return dFreq;
}
public void setClassifierEvaluationDao(
ClassifierEvaluationDao classifierEvaluationDao) {
this.classifierEvaluationDao = classifierEvaluationDao;
}
public void setConceptDao(ConceptDao conceptDao) {
this.conceptDao = conceptDao;
}
public void setDataSource(DataSource ds) {
this.jdbcTemplate = new JdbcTemplate(ds);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy