All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.ctakes.ytex.kernel.BaseClassifierEvaluationParser Maven / Gradle / Ivy

The newest version!
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.ctakes.ytex.kernel;

import com.google.common.collect.BiMap;
import com.google.common.collect.HashBiMap;
import org.apache.ctakes.ytex.kernel.dao.ClassifierEvaluationDao;
import org.apache.ctakes.ytex.kernel.model.ClassifierEvaluation;
import org.apache.ctakes.ytex.kernel.model.ClassifierInstanceEvaluation;
import org.slf4j.LoggerFactory;
import org.slf4j.Logger;

import java.io.*;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.regex.Matcher;
import java.util.regex.Pattern;


/**
 * miscellaneous methods used for parsing various output types
 * 
 * @author vhacongarlav
 * 
 */
public abstract class BaseClassifierEvaluationParser implements
		ClassifierEvaluationParser {
	static private final Logger LOGGER = LoggerFactory.getLogger( "BaseClassifierEvaluationParser" );

	public static Pattern wsPattern = Pattern.compile("\\s|\\z");
	public static Pattern wsDotPattern = Pattern.compile("\\s|\\.|\\z");

	private ClassifierEvaluationDao classifierEvaluationDao;

	public static class InstanceClassInfo {
		long instanceId;
		boolean train;
		String targetClassName;

		public InstanceClassInfo() {
			super();
		}

		public InstanceClassInfo(long instanceId, boolean train,
				String targetClassName) {
			super();
			this.instanceId = instanceId;
			this.train = train;
			this.targetClassName = targetClassName;
		}

		public long getInstanceId() {
			return instanceId;
		}

		public void setInstanceId(long instanceId) {
			this.instanceId = instanceId;
		}

		public boolean isTrain() {
			return train;
		}

		public void setTrain(boolean train) {
			this.train = train;
		}

		public String getTargetClassName() {
			return targetClassName;
		}

		public void setTargetClassName(String targetClassName) {
			this.targetClassName = targetClassName;
		}
	}

	public ClassifierEvaluationDao getClassifierEvaluationDao() {
		return classifierEvaluationDao;
	}

	public void setClassifierEvaluationDao(
			ClassifierEvaluationDao classifierEvaluationDao) {
		this.classifierEvaluationDao = classifierEvaluationDao;
	}

	public static String extractFirstToken(String line, Pattern tokDelimPattern) {
		Matcher wsMatcher = tokDelimPattern.matcher(line);
		String token = null;
		if (wsMatcher.find() && wsMatcher.start() > 0) {
			token = line.substring(0, wsMatcher.start());
		}
		return token;
	}

	public List parseInstanceIds(String instanceIdFile)
			throws IOException {
		BufferedReader instanceIdReader = null;
		List instanceIds = new ArrayList();
		try {
			instanceIdReader = new BufferedReader(
					new FileReader(instanceIdFile));
			String instanceId = null;
			while ((instanceId = instanceIdReader.readLine()) != null)
				instanceIds.add(Long.parseLong(instanceId));
			return instanceIds;
		} catch (FileNotFoundException e) {
			LOGGER.warn(instanceIdFile
					+ " not available, instance_ids will not be stored");
			return null;
		} finally {
			if (instanceIdReader != null)
				instanceIdReader.close();
		}
	}

	/**
	 * parse a number out of the libsvm command line that matches the specified
	 * pattern.
	 * 
	 * @param pCost
	 * @param options
	 * @return null if option not present
	 */
	protected Double parseDoubleOption(Pattern pCost, String options) {
		Matcher m = pCost.matcher(options);
		if (m.find()) {
			String toParse = m.group(1);
			try {
				return Double.parseDouble(toParse);
			} catch (NumberFormatException nfe) {
				LOGGER.warn("could not parse: " + toParse, nfe);
			}
		}
		return null;
	}

	/**
	 * 
	 * parse a number out of the libsvm command line that matches the specified
	 * pattern.
	 * 
	 * @param pKernel
	 * @param options
	 * @return null if option not present
	 */
	protected Integer parseIntOption(Pattern pKernel, String options) {
		Matcher m = pKernel.matcher(options);
		if (m.find())
			return Integer.parseInt(m.group(1));
		else
			return null;
	}

	protected void initClassifierEvaluation(String instanceIdFile,
			ClassifierEvaluation eval) {
		eval.setFold(FileUtil.parseFoldFromFileName(instanceIdFile));
		eval.setRun(FileUtil.parseRunFromFileName(instanceIdFile));
		eval.setLabel(FileUtil.parseLabelFromFileName(instanceIdFile));
	}

	protected void initClassifierEvaluationFromProperties(Properties props,
			ClassifierEvaluation eval) {
		eval.setName(props.getProperty("kernel.name"));
		eval.setExperiment(props.getProperty("kernel.experiment"));
		String strParam1 = props.getProperty("kernel.param1");
		if (strParam1 != null && strParam1.length() > 0)
			eval.setParam1(Double.parseDouble(strParam1));
		eval.setParam2(props.getProperty("kernel.param2"));
		eval.setOptions(props.getProperty(ParseOption.EVAL_LINE.getOptionKey()));
	}

	/**
	 * load properties from outputDir/options.properties. returns empty
	 * properties if the file does not exist
	 * 
	 * @param outputDir
	 * @return
	 * @throws FileNotFoundException
	 * @throws IOException
	 */
	public Properties loadProps(File outputDir) throws FileNotFoundException,
			IOException {
		return FileUtil.loadProperties(outputDir.getPath() + File.separator
				+ "options.properties", true);
	}

	protected boolean checkFileRead(String file) {
		return (new File(file)).canRead();
	}

	protected String getFileBaseName(Properties kernelProps) {
		return kernelProps.getProperty(
				ParseOption.DATA_BASENAME.getOptionKey(),
				ParseOption.DATA_BASENAME.getDefaultValue());
	}

	protected void storeSemiSupervised(Properties kernelProps,
			ClassifierEvaluation ce, BiMap classIdToNameMap) {
		boolean storeInstanceEval = YES.equalsIgnoreCase(kernelProps
				.getProperty(ParseOption.STORE_INSTANCE_EVAL.getOptionKey(),
						ParseOption.STORE_INSTANCE_EVAL.getDefaultValue()));
		boolean storeUnlabeled = YES.equalsIgnoreCase(kernelProps.getProperty(
				ParseOption.STORE_UNLABELED.getOptionKey(),
				ParseOption.STORE_UNLABELED.getDefaultValue()));
		boolean storeIR = YES.equalsIgnoreCase(kernelProps.getProperty(
				ParseOption.STORE_IRSTATS.getOptionKey(),
				ParseOption.STORE_IRSTATS.getDefaultValue()));
		// save the classifier evaluation
		this.getClassifierEvaluationDao().saveClassifierEvaluation(ce, classIdToNameMap,
				storeInstanceEval || storeUnlabeled, storeIR, 0);
	}

	/**
	 * used by semil & svmlin to store semisupervised predictions. these train
	 * ml and make test predictions in a single step.
	 * 
	 * @param ce
	 *            updated
	 * @param listClassInfo
	 *            the class info 0 - instance id, 1 - train/test, 2 - target
	 *            class id
	 * @param storeUnlabeled
	 *            should the unlabeled predictions be stored?
	 * @param classIds
	 *            predicted class ids
	 */
	protected void updateSemiSupervisedPredictions(ClassifierEvaluation ce,
			List> listClassInfo, boolean storeUnlabeled,
			int[] classIds) {
		for (int i = 0; i < classIds.length; i++) {
			List classInfo = listClassInfo.get(i);
			long instanceId = classInfo.get(0);
			boolean train = classInfo.get(1) == 1;
			int targetClassId = classInfo.get(2).intValue();
			// if we are storing unlabeled instance ids, save this instance
			// evaluation
			// else only store it if this is a test instance id - save it
			if (storeUnlabeled || !train) {
				ClassifierInstanceEvaluation cie = new ClassifierInstanceEvaluation();
				cie.setClassifierEvaluation(ce);
				cie.setInstanceId(instanceId);
				cie.setPredictedClassId(classIds[i]);
				if (targetClassId != 0)
					cie.setTargetClassId(targetClassId);
				// add the instance eval to the parent
				ce.getClassifierInstanceEvaluations().put(instanceId, cie);
			}
		}
	}

	protected void updateSemiSupervisedPredictions(ClassifierEvaluation ce,
			List listClassInfo, boolean storeUnlabeled,
			String[] predictedClassNames, Map classNameToIdMap) {
		for (int i = 0; i < predictedClassNames.length; i++) {
			InstanceClassInfo classInfo = listClassInfo.get(i);
			boolean train = classInfo.isTrain();
			// if we are storing unlabeled instance ids, save this instance
			// evaluation
			// else only store it if this is a test instance id - save it
			if (storeUnlabeled || !train) {
				ClassifierInstanceEvaluation cie = new ClassifierInstanceEvaluation();
				cie.setClassifierEvaluation(ce);
				cie.setInstanceId(classInfo.getInstanceId());
				cie.setPredictedClassId(classNameToIdMap.get(predictedClassNames[i]));
				int targetClassId = classNameToIdMap.get(classInfo.getTargetClassName());
				if (targetClassId != 0)
					cie.setTargetClassId(targetClassId);
				// add the instance eval to the parent
				ce.getClassifierInstanceEvaluations().put(cie.getInstanceId(), cie);
			}
		}
	}
	
	protected BiMap loadClassIdMap(File dataDir, String label)
			throws IOException {
		BiMap classIndexMap = HashBiMap.create();
		String filename = FileUtil.getScopedFileName(dataDir.getPath(), label,
				null, null, "class.properties");
		File f = new File(filename);
		if (f.exists()) {
			BufferedReader r = null;
			try {
				r = new BufferedReader(new FileReader(f));
				Properties props = new Properties();
				props.load(r);
				for (String key : props.stringPropertyNames()) {
					classIndexMap.put(Integer.parseInt(key),
							props.getProperty(key));
				}
			} finally {
				try {
					r.close();
				} catch (IOException e) {
				}
			}
		}
		return classIndexMap;
	}

	protected List loadInstanceClassInfo(File dataDir,
			String classFileName) throws IOException {
		List listClassInfo = null;
		// load instance ids and their class ids
		BufferedReader r = null;
		try {
			r = new BufferedReader(new FileReader(classFileName));
			listClassInfo = new ArrayList();
			String line = null;
			while ((line = r.readLine()) != null) {
				if (line.trim().length() > 0) {
					String classInfoToks[] = line.split("\\s");
					if (classInfoToks.length != 3) {
						LOGGER.error("error parsing line: " + line);
						return null;
					}
					listClassInfo
							.add(new InstanceClassInfo(Long
									.parseLong(classInfoToks[0]), Integer
									.parseInt(classInfoToks[1]) != 0,
									classInfoToks[2]));
				}
			}
		} catch (FileNotFoundException fe) {
			LOGGER.warn("class.txt file not available: " + classFileName, fe);
			listClassInfo = null;
		} finally {
			if (r != null) {
				r.close();
			}
		}
		return listClassInfo;
	}

	protected List> loadClassInfo(File dataDir, String classFileName)
			throws IOException {
		List> listClassInfo = null;
		// load instance ids and their class ids
		BufferedReader r = null;
		try {
			r = new BufferedReader(new FileReader(classFileName));
			listClassInfo = new ArrayList>();
			String line = null;
			while ((line = r.readLine()) != null) {
				if (line.trim().length() > 0) {
					String classInfoToks[] = line.split("\\s");
					List classInfo = new ArrayList(3);
					for (String tok : classInfoToks) {
						classInfo.add(Long.parseLong(tok));
					}
					if (classInfo.size() != 3) {
						LOGGER.error("error parsing line: " + line);
						return null;
					}
					listClassInfo.add(classInfo);
				}
			}
		} catch (FileNotFoundException fe) {
			LOGGER.warn("class.txt file not available: " + classFileName, fe);
			listClassInfo = null;
		} finally {
			if (r != null) {
				r.close();
			}
		}
		return listClassInfo;
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy