All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.ctakes.ytex.svmlight.SVMLightParser Maven / Gradle / Ivy

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.ctakes.ytex.svmlight;

import org.apache.ctakes.ytex.kernel.model.ClassifierInstanceEvaluation;
import org.apache.ctakes.ytex.kernel.model.SVMClassifierEvaluation;
import org.apache.ctakes.ytex.libsvm.LibSVMParser;

import java.io.*;
import java.util.List;
import java.util.Properties;
import java.util.regex.Matcher;
import java.util.regex.Pattern;


/**
 * same as libsvm with following changes:
 * 
    *
  • parse output.txt - contains vcdim and number support vectors < *
*/ public class SVMLightParser extends LibSVMParser { static final Pattern psv = Pattern.compile("Number of SV:\\s(\\d+)\\s.*"); static final Pattern pvc = Pattern .compile("Estimated VCdim of classifier: VCdim<=([\\d\\.]+)"); /** * Parse svm-classify input (instance file) and predictions (prediction * file). instance file has target class id and attributes for each * instance. predict file has value less than or greater than 0 for each * instance, corresponding to class ids -1 and +1. * * @param predictionFile * @param instanceFile * @return * @throws IOException */ @Override protected void parsePredictions(String predictionFile, String instanceFile, Properties props, String instanceIdFile, SVMClassifierEvaluation eval) throws IOException { List instanceIds = null; if (instanceIdFile != null) instanceIds = parseInstanceIds(instanceIdFile); boolean bStoreUnlabeled = YES.equals(props.getProperty( ParseOption.STORE_UNLABELED.getOptionKey(), ParseOption.STORE_UNLABELED.getDefaultValue())); BufferedReader instanceReader = null; BufferedReader predictionReader = null; try { instanceReader = new BufferedReader(new FileReader(instanceFile)); predictionReader = new BufferedReader( new FileReader(predictionFile)); String instanceLine = null; String predictionLine = null; int nLine = 0; while (((instanceLine = instanceReader.readLine()) != null) && ((predictionLine = predictionReader.readLine()) != null)) { long instanceId = instanceIds.size() > nLine ? instanceIds .get(nLine) : nLine; nLine++; int classIdTarget = Integer.parseInt(extractFirstToken( instanceLine, wsPattern)); // only store unlabeled instances if configured to do so if (bStoreUnlabeled || classIdTarget != 0) { ClassifierInstanceEvaluation result = new ClassifierInstanceEvaluation(); // target class id is null for unlabeled instances result.setTargetClassId(classIdTarget == 0 ? null : classIdTarget); int classIdPredicted = 0; try { double dPredict = Double.parseDouble(predictionLine); if (dPredict > 0) classIdPredicted = 1; else classIdPredicted = -1; } catch (NumberFormatException nfe) { System.err.println("error parsing:" + predictionLine); nfe.printStackTrace(System.err); } result.setPredictedClassId(classIdPredicted); result.setInstanceId(instanceId); result.setClassifierEvaluation(eval); eval.getClassifierInstanceEvaluations().put(instanceId, result); } } } finally { if (instanceReader != null) { try { instanceReader.close(); } catch (Exception e) { e.printStackTrace(System.err); } } if (predictionReader != null) { try { predictionReader.close(); } catch (Exception e) { e.printStackTrace(System.err); } } } } /** *
	 * Number of SV: 133 (including 0 at upper bound)
	 * L1 loss: loss=0.00000
	 * Norm of weight vector: |w|=2.09380
	 * Norm of longest example vector: |x|=16.91153
	 * Estimated VCdim of classifier: VCdim<=684.90185
	 * 
* * @param eval * @param trainOutputFile * @throws IOException */ private void parseTrainOutput(SVMClassifierEvaluation eval, String trainOutputFile) throws IOException { if (trainOutputFile == null) return; BufferedReader r = null; try { r = new BufferedReader(new FileReader(trainOutputFile)); String line = null; while ((line = r.readLine()) != null) { Matcher m = psv.matcher(line); if (m.matches()) eval.setSupportVectors(Integer.parseInt(m.group(1))); m = pvc.matcher(line); if (m.matches()) eval.setVcdim(Double.parseDouble(m.group(1))); } } catch (FileNotFoundException fnfe) { // ignore } finally { if (r != null) r.close(); } } /** * parse output.txt - contains vcdim and number support vectors */ @Override protected void parseResults(File dataDir, File outputDir, String model, String predict, SVMClassifierEvaluation eval, Properties props) throws IOException { super.parseResults(dataDir, outputDir, model, predict, eval, props); eval.setAlgorithm("svmlight"); parseTrainOutput(eval, outputDir + File.separator + "output.txt"); } /** * store semi supervised results. */ @Override protected void storeResults(File dataDir, Properties props, SVMClassifierEvaluation eval) { this.storeSemiSupervised(props, eval, null); } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy