org.apache.ctakes.ytex.kernel.SvmlinEvaluationParser Maven / Gradle / Ivy
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.ctakes.ytex.kernel;
import com.google.common.collect.BiMap;
import org.apache.ctakes.ytex.kernel.model.SVMClassifierEvaluation;
import org.slf4j.LoggerFactory;
import org.slf4j.Logger;
import java.io.*;
import java.util.*;
import java.util.regex.Pattern;
public class SvmlinEvaluationParser extends BaseClassifierEvaluationParser {
private static final Logger LOGGER = LoggerFactory.getLogger( "SvmlinEvaluationParser" );
public static Pattern pAlgo = Pattern.compile("-A\\s+(\\d)");
public static Pattern pLambdaW = Pattern.compile("-W\\s+([\\d\\.eE-]+)");
public static Pattern pLambaU = Pattern.compile("-U\\s+([\\d\\.eE-]+)");
/**
* parse directory. Expect following files:
*
* - model.txt - libsvm model file
*
- options.properties - properties file with needed parameter settings
* (see ParseOption)
*
- predict.txt - predictions on test set
*
*/
@Override
public void parseDirectory(File dataDir, File outputDir) throws IOException {
String optionsFile = outputDir.getPath() + File.separator
+ "options.properties";
if (checkFileRead(optionsFile)) {
// read options.properties
Properties props = this.loadProps(outputDir);
SVMClassifierEvaluation eval = new SVMClassifierEvaluation();
// set algorithm
eval.setAlgorithm("svmlin");
// parse results
parseResults(dataDir, outputDir, eval, props);
}
}
private void parseResults(File dataDir, File outputDir,
SVMClassifierEvaluation eval, Properties props) throws IOException {
// parse fold, run, label from file base name
String fileBaseName = this.getFileBaseName(props);
initClassifierEvaluation(fileBaseName, eval);
// initialize common properties
initClassifierEvaluationFromProperties(props, eval);
// parse options from command line
String options = props
.getProperty(ParseOption.EVAL_LINE.getOptionKey());
if (options != null) {
eval.setKernel(parseIntOption(pAlgo, options));
if (eval.getKernel() == null)
eval.setKernel(1);
eval.setCost(parseDoubleOption(pLambdaW, options));
eval.setGamma(parseDoubleOption(pLambaU, options));
}
// parse predictions
if (fileBaseName != null && fileBaseName.length() > 0) {
List listClassInfo = loadInstanceClassInfo(
dataDir, fileBaseName + "id.txt");
// process .output files
if (listClassInfo != null) {
BiMap classIdToNameMap = loadClassIdMap(
dataDir, eval.getLabel());
parseSvmlinOutput(dataDir, outputDir, eval, fileBaseName,
props, listClassInfo, classIdToNameMap);
// save the classifier evaluation
storeSemiSupervised(props, eval, classIdToNameMap);
}
} else {
LOGGER.warn("couldn't parse directory; kernel.label.base not defined. Dir: "
+ outputDir);
}
}
/**
* support multi-class classification
*
* @param dataDir
* @param outputDir
* @param eval
* @param fileBaseName
* @param props
* @param predict
* @param listClassInfo
* @throws IOException
*/
private void parseSvmlinOutput(File dataDir, File outputDir,
SVMClassifierEvaluation eval, String fileBaseName,
Properties props, List listClassInfo,
BiMap classIdToNameMap) throws IOException {
Properties codeProps = FileUtil.loadProperties(
dataDir.getAbsolutePath() + "/" + fileBaseName
+ "code.properties", false);
String[] codes = codeProps.getProperty("codes", "").split(",");
SortedMap codeToPredictionMap = new TreeMap();
if (codes.length == 0) {
throw new IOException("invalid code.properties: " + fileBaseName);
}
// int otherClassId = 0;
String otherClassName = null;
if (codes.length == 1) {
// otherClassId = Integer
// .parseInt(codeProps.getProperty("classOther"));
otherClassName = codeProps.getProperty("classOtherName");
}
for (String code : codes) {
// determine class for given code
// String strClassId = codeProps.getProperty(code+".class");
// if (strClassId == null) {
// throw new IOException("invalid code.properties: "
// + fileBaseName);
// }
// int classId = Integer.parseInt(strClassId);
String className = codeProps.getProperty(code + ".className");
String codeBase = code.substring(0, code.length()-".txt".length());
// read predictions for given class
codeToPredictionMap.put(
className,
readPredictions(outputDir.getAbsolutePath() + "/" + codeBase
+ ".outputs", listClassInfo.size()));
}
// iterate over predictions for each instance, figure out which class is
// the winner
String[] classPredictions = new String[listClassInfo.size()];
for (int i = 0; i < listClassInfo.size(); i++) {
if (otherClassName != null) {
Map.Entry classToPred = codeToPredictionMap
.entrySet().iterator().next();
classPredictions[i] = classToPred.getValue()[i] > 0 ? classToPred
.getKey() : otherClassName;
} else {
NavigableMap predToClassMap = new TreeMap();
for (Map.Entry classToPred : codeToPredictionMap
.entrySet()) {
predToClassMap.put(classToPred.getValue()[i],
classToPred.getKey());
}
classPredictions[i] = predToClassMap.lastEntry().getValue();
}
}
boolean storeUnlabeled = YES.equalsIgnoreCase(props.getProperty(
ParseOption.STORE_UNLABELED.getOptionKey(),
ParseOption.STORE_UNLABELED.getDefaultValue()));
updateSemiSupervisedPredictions(eval, listClassInfo, storeUnlabeled,
classPredictions, classIdToNameMap.inverse());
}
/**
* read the predictions
*
* @param predict
* @param expectedSize
* @return
* @throws FileNotFoundException
* @throws IOException
*/
private double[] readPredictions(String predict, int expectedSize)
throws FileNotFoundException, IOException {
BufferedReader outputReader = null;
try {
double predictions[] = new double[expectedSize];
int i = 0;
String prediction = null;
outputReader = new BufferedReader(new FileReader(predict));
while ((prediction = outputReader.readLine()) != null) {
if (i < expectedSize)
predictions[i++] = (Double.parseDouble(prediction));
else
throw new IOException(predict
+ ": more predictions than expected");
}
if (i < expectedSize - 1)
throw new IOException(predict
+ ": less predictions than expected");
return predictions;
} finally {
if (outputReader != null) {
try {
outputReader.close();
} catch (Exception ignore) {
}
}
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy