
org.apache.ctakes.ytex.kernel.BaseClassifierEvaluationParser Maven / Gradle / Ivy
The newest version!
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.ctakes.ytex.kernel;
import com.google.common.collect.BiMap;
import com.google.common.collect.HashBiMap;
import org.apache.ctakes.ytex.kernel.dao.ClassifierEvaluationDao;
import org.apache.ctakes.ytex.kernel.model.ClassifierEvaluation;
import org.apache.ctakes.ytex.kernel.model.ClassifierInstanceEvaluation;
import org.slf4j.LoggerFactory;
import org.slf4j.Logger;
import java.io.*;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* miscellaneous methods used for parsing various output types
*
* @author vhacongarlav
*
*/
public abstract class BaseClassifierEvaluationParser implements
ClassifierEvaluationParser {
static private final Logger LOGGER = LoggerFactory.getLogger( "BaseClassifierEvaluationParser" );
public static Pattern wsPattern = Pattern.compile("\\s|\\z");
public static Pattern wsDotPattern = Pattern.compile("\\s|\\.|\\z");
private ClassifierEvaluationDao classifierEvaluationDao;
public static class InstanceClassInfo {
long instanceId;
boolean train;
String targetClassName;
public InstanceClassInfo() {
super();
}
public InstanceClassInfo(long instanceId, boolean train,
String targetClassName) {
super();
this.instanceId = instanceId;
this.train = train;
this.targetClassName = targetClassName;
}
public long getInstanceId() {
return instanceId;
}
public void setInstanceId(long instanceId) {
this.instanceId = instanceId;
}
public boolean isTrain() {
return train;
}
public void setTrain(boolean train) {
this.train = train;
}
public String getTargetClassName() {
return targetClassName;
}
public void setTargetClassName(String targetClassName) {
this.targetClassName = targetClassName;
}
}
public ClassifierEvaluationDao getClassifierEvaluationDao() {
return classifierEvaluationDao;
}
public void setClassifierEvaluationDao(
ClassifierEvaluationDao classifierEvaluationDao) {
this.classifierEvaluationDao = classifierEvaluationDao;
}
public static String extractFirstToken(String line, Pattern tokDelimPattern) {
Matcher wsMatcher = tokDelimPattern.matcher(line);
String token = null;
if (wsMatcher.find() && wsMatcher.start() > 0) {
token = line.substring(0, wsMatcher.start());
}
return token;
}
public List parseInstanceIds(String instanceIdFile)
throws IOException {
BufferedReader instanceIdReader = null;
List instanceIds = new ArrayList();
try {
instanceIdReader = new BufferedReader(
new FileReader(instanceIdFile));
String instanceId = null;
while ((instanceId = instanceIdReader.readLine()) != null)
instanceIds.add(Long.parseLong(instanceId));
return instanceIds;
} catch (FileNotFoundException e) {
LOGGER.warn(instanceIdFile
+ " not available, instance_ids will not be stored");
return null;
} finally {
if (instanceIdReader != null)
instanceIdReader.close();
}
}
/**
* parse a number out of the libsvm command line that matches the specified
* pattern.
*
* @param pCost
* @param options
* @return null if option not present
*/
protected Double parseDoubleOption(Pattern pCost, String options) {
Matcher m = pCost.matcher(options);
if (m.find()) {
String toParse = m.group(1);
try {
return Double.parseDouble(toParse);
} catch (NumberFormatException nfe) {
LOGGER.warn("could not parse: " + toParse, nfe);
}
}
return null;
}
/**
*
* parse a number out of the libsvm command line that matches the specified
* pattern.
*
* @param pKernel
* @param options
* @return null if option not present
*/
protected Integer parseIntOption(Pattern pKernel, String options) {
Matcher m = pKernel.matcher(options);
if (m.find())
return Integer.parseInt(m.group(1));
else
return null;
}
protected void initClassifierEvaluation(String instanceIdFile,
ClassifierEvaluation eval) {
eval.setFold(FileUtil.parseFoldFromFileName(instanceIdFile));
eval.setRun(FileUtil.parseRunFromFileName(instanceIdFile));
eval.setLabel(FileUtil.parseLabelFromFileName(instanceIdFile));
}
protected void initClassifierEvaluationFromProperties(Properties props,
ClassifierEvaluation eval) {
eval.setName(props.getProperty("kernel.name"));
eval.setExperiment(props.getProperty("kernel.experiment"));
String strParam1 = props.getProperty("kernel.param1");
if (strParam1 != null && strParam1.length() > 0)
eval.setParam1(Double.parseDouble(strParam1));
eval.setParam2(props.getProperty("kernel.param2"));
eval.setOptions(props.getProperty(ParseOption.EVAL_LINE.getOptionKey()));
}
/**
* load properties from outputDir/options.properties. returns empty
* properties if the file does not exist
*
* @param outputDir
* @return
* @throws FileNotFoundException
* @throws IOException
*/
public Properties loadProps(File outputDir) throws FileNotFoundException,
IOException {
return FileUtil.loadProperties(outputDir.getPath() + File.separator
+ "options.properties", true);
}
protected boolean checkFileRead(String file) {
return (new File(file)).canRead();
}
protected String getFileBaseName(Properties kernelProps) {
return kernelProps.getProperty(
ParseOption.DATA_BASENAME.getOptionKey(),
ParseOption.DATA_BASENAME.getDefaultValue());
}
protected void storeSemiSupervised(Properties kernelProps,
ClassifierEvaluation ce, BiMap classIdToNameMap) {
boolean storeInstanceEval = YES.equalsIgnoreCase(kernelProps
.getProperty(ParseOption.STORE_INSTANCE_EVAL.getOptionKey(),
ParseOption.STORE_INSTANCE_EVAL.getDefaultValue()));
boolean storeUnlabeled = YES.equalsIgnoreCase(kernelProps.getProperty(
ParseOption.STORE_UNLABELED.getOptionKey(),
ParseOption.STORE_UNLABELED.getDefaultValue()));
boolean storeIR = YES.equalsIgnoreCase(kernelProps.getProperty(
ParseOption.STORE_IRSTATS.getOptionKey(),
ParseOption.STORE_IRSTATS.getDefaultValue()));
// save the classifier evaluation
this.getClassifierEvaluationDao().saveClassifierEvaluation(ce, classIdToNameMap,
storeInstanceEval || storeUnlabeled, storeIR, 0);
}
/**
* used by semil & svmlin to store semisupervised predictions. these train
* ml and make test predictions in a single step.
*
* @param ce
* updated
* @param listClassInfo
* the class info 0 - instance id, 1 - train/test, 2 - target
* class id
* @param storeUnlabeled
* should the unlabeled predictions be stored?
* @param classIds
* predicted class ids
*/
protected void updateSemiSupervisedPredictions(ClassifierEvaluation ce,
List> listClassInfo, boolean storeUnlabeled,
int[] classIds) {
for (int i = 0; i < classIds.length; i++) {
List classInfo = listClassInfo.get(i);
long instanceId = classInfo.get(0);
boolean train = classInfo.get(1) == 1;
int targetClassId = classInfo.get(2).intValue();
// if we are storing unlabeled instance ids, save this instance
// evaluation
// else only store it if this is a test instance id - save it
if (storeUnlabeled || !train) {
ClassifierInstanceEvaluation cie = new ClassifierInstanceEvaluation();
cie.setClassifierEvaluation(ce);
cie.setInstanceId(instanceId);
cie.setPredictedClassId(classIds[i]);
if (targetClassId != 0)
cie.setTargetClassId(targetClassId);
// add the instance eval to the parent
ce.getClassifierInstanceEvaluations().put(instanceId, cie);
}
}
}
protected void updateSemiSupervisedPredictions(ClassifierEvaluation ce,
List listClassInfo, boolean storeUnlabeled,
String[] predictedClassNames, Map classNameToIdMap) {
for (int i = 0; i < predictedClassNames.length; i++) {
InstanceClassInfo classInfo = listClassInfo.get(i);
boolean train = classInfo.isTrain();
// if we are storing unlabeled instance ids, save this instance
// evaluation
// else only store it if this is a test instance id - save it
if (storeUnlabeled || !train) {
ClassifierInstanceEvaluation cie = new ClassifierInstanceEvaluation();
cie.setClassifierEvaluation(ce);
cie.setInstanceId(classInfo.getInstanceId());
cie.setPredictedClassId(classNameToIdMap.get(predictedClassNames[i]));
int targetClassId = classNameToIdMap.get(classInfo.getTargetClassName());
if (targetClassId != 0)
cie.setTargetClassId(targetClassId);
// add the instance eval to the parent
ce.getClassifierInstanceEvaluations().put(cie.getInstanceId(), cie);
}
}
}
protected BiMap loadClassIdMap(File dataDir, String label)
throws IOException {
BiMap classIndexMap = HashBiMap.create();
String filename = FileUtil.getScopedFileName(dataDir.getPath(), label,
null, null, "class.properties");
File f = new File(filename);
if (f.exists()) {
BufferedReader r = null;
try {
r = new BufferedReader(new FileReader(f));
Properties props = new Properties();
props.load(r);
for (String key : props.stringPropertyNames()) {
classIndexMap.put(Integer.parseInt(key),
props.getProperty(key));
}
} finally {
try {
r.close();
} catch (IOException e) {
}
}
}
return classIndexMap;
}
protected List loadInstanceClassInfo(File dataDir,
String classFileName) throws IOException {
List listClassInfo = null;
// load instance ids and their class ids
BufferedReader r = null;
try {
r = new BufferedReader(new FileReader(classFileName));
listClassInfo = new ArrayList();
String line = null;
while ((line = r.readLine()) != null) {
if (line.trim().length() > 0) {
String classInfoToks[] = line.split("\\s");
if (classInfoToks.length != 3) {
LOGGER.error("error parsing line: " + line);
return null;
}
listClassInfo
.add(new InstanceClassInfo(Long
.parseLong(classInfoToks[0]), Integer
.parseInt(classInfoToks[1]) != 0,
classInfoToks[2]));
}
}
} catch (FileNotFoundException fe) {
LOGGER.warn("class.txt file not available: " + classFileName, fe);
listClassInfo = null;
} finally {
if (r != null) {
r.close();
}
}
return listClassInfo;
}
protected List> loadClassInfo(File dataDir, String classFileName)
throws IOException {
List> listClassInfo = null;
// load instance ids and their class ids
BufferedReader r = null;
try {
r = new BufferedReader(new FileReader(classFileName));
listClassInfo = new ArrayList>();
String line = null;
while ((line = r.readLine()) != null) {
if (line.trim().length() > 0) {
String classInfoToks[] = line.split("\\s");
List classInfo = new ArrayList(3);
for (String tok : classInfoToks) {
classInfo.add(Long.parseLong(tok));
}
if (classInfo.size() != 3) {
LOGGER.error("error parsing line: " + line);
return null;
}
listClassInfo.add(classInfo);
}
}
} catch (FileNotFoundException fe) {
LOGGER.warn("class.txt file not available: " + classFileName, fe);
listClassInfo = null;
} finally {
if (r != null) {
r.close();
}
}
return listClassInfo;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy