All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.maltparser.ml.liblinear.Liblinear Maven / Gradle / Ivy

Go to download

MaltParser is a system for data-driven dependency parsing, which can be used to induce a parsing model from treebank data and to parse new data using an induced model.

The newest version!
package org.maltparser.ml.liblinear;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintStream;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.Set;
import java.util.TreeSet;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;

import de.bwaldvogel.liblinear.FeatureNode;
import de.bwaldvogel.liblinear.Linear;
import de.bwaldvogel.liblinear.Model;
import de.bwaldvogel.liblinear.Parameter;
import de.bwaldvogel.liblinear.Problem;
import de.bwaldvogel.liblinear.SolverType;



import org.maltparser.core.config.Configuration;
import org.maltparser.core.config.ConfigurationException;
import org.maltparser.core.exception.MaltChainedException;
import org.maltparser.core.feature.FeatureVector;
import org.maltparser.core.feature.function.FeatureFunction;
import org.maltparser.core.feature.value.FeatureValue;
import org.maltparser.core.feature.value.MultipleFeatureValue;
import org.maltparser.core.feature.value.SingleFeatureValue;
import org.maltparser.core.helper.NoPrintStream;
import org.maltparser.core.syntaxgraph.DependencyStructure;
import org.maltparser.ml.LearningMethod;
import org.maltparser.parser.DependencyParserConfig;
import org.maltparser.parser.guide.instance.InstanceModel;
import org.maltparser.parser.history.action.SingleDecision;
import org.maltparser.parser.history.kbest.KBestList;
import org.maltparser.parser.history.kbest.ScoredKBestList;



public class Liblinear implements LearningMethod {
	public final static String LIBLINEAR_VERSION = "1.51";
	public enum Verbostity {
		SILENT, ERROR, ALL
	}
	private LinkedHashMap liblinearOptions;
	 
	protected InstanceModel owner;
	protected int learnerMode;
	protected String name;
	protected int numberOfInstances;
	protected boolean saveInstanceFiles;
	protected boolean excludeNullValues;
	protected String pathExternalLiblinearTrain = null;
//	private int[] cardinalities;
	/**
	 * Instance output stream writer 
	 */
	private BufferedWriter instanceOutput = null; 
	/**
	 * Liblinear model object, only used during classification.
	 */
	private Model model = null;
	
	/**
	 * Parameter string
	 */
	private String paramString;

	private ArrayList xlist = null;

	private Verbostity verbosity;
	
	private HashMap featureMap;
	private int featureCounter = 1;
	private boolean featurePruning = false;
	private TreeSet featureSet;
	
	/**
	 * Constructs a Liblinear learner.
	 * 
	 * @param owner the guide model owner
	 * @param learnerMode the mode of the learner TRAIN or CLASSIFY
	 */
	public Liblinear(InstanceModel owner, Integer learnerMode) throws MaltChainedException {
		setOwner(owner);
		setLearningMethodName("liblinear");
		setLearnerMode(learnerMode.intValue());
		setNumberOfInstances(0);
		verbosity = Verbostity.SILENT;

		liblinearOptions = new LinkedHashMap();
		initLiblinearOptions();
		parseParameters(getConfiguration().getOptionValue("liblinear", "liblinear_options").toString());
		initSpecialParameters();
		
		if (learnerMode == BATCH) {
			if (featurePruning) {
				featureMap = new HashMap();
			}
			instanceOutput = new BufferedWriter(getInstanceOutputStreamWriter(".ins"));
		} 
		if (featurePruning) {
			featureSet = new TreeSet();
		}
	}
	
	private int addFeatureMapValue(int featurePosition, int code) {
		long key = ((((long)featurePosition) << 48) | (long)code);
		if (featureMap.containsKey(key)) {
			return featureMap.get(key);
		}
		int value = featureCounter++;
		featureMap.put(key, value);
		return value;
	}
	
	private int getFeatureMapValue(int featurePosition, int code) {
		long key = ((((long)featurePosition) << 48) | (long)code);
		if (featureMap.containsKey(key)) {
			return featureMap.get(key);
		}
		return -1;
	}
	
	private void saveFeatureMap(OutputStream os, HashMap map) throws MaltChainedException {
		try {
		    ObjectOutputStream obj_out_stream = new ObjectOutputStream (os);
		    obj_out_stream.writeObject(map);
		    obj_out_stream.close();
		} catch (IOException e) {
			throw new LiblinearException("Save feature map error", e);
		}
	}
	
	private HashMap loadFeatureMap(InputStream is) throws MaltChainedException {
		HashMap map = new HashMap();
		try {
		    ObjectInputStream obj_in_stream = new ObjectInputStream(is);
		    map = (HashMap)obj_in_stream.readObject();
		    obj_in_stream.close();
		} catch (ClassNotFoundException e) {
			throw new LiblinearException("Load feature map error", e);
		} catch (IOException e) {
			throw new LiblinearException("Load feature map error", e);
		}
		return map;
	}
	
	public void addInstance(SingleDecision decision, FeatureVector featureVector) throws MaltChainedException {
		if (featureVector == null) {
			throw new LiblinearException("The feature vector cannot be found");
		} else if (decision == null) {
			throw new LiblinearException("The decision cannot be found");
		}	
		
		StringBuilder sb = new StringBuilder();
		try {
			sb.append(decision.getDecisionCode()+"\t");
			int n = featureVector.size();
			for (int i = 0; i < n; i++) {
				FeatureValue featureValue = featureVector.get(i).getFeatureValue();
				if (excludeNullValues == true && featureValue.isNullValue()) {
					sb.append("-1");
				} else {
					if (featureValue instanceof SingleFeatureValue) {
						sb.append(((SingleFeatureValue)featureValue).getIndexCode()+"");
					} else if (featureValue instanceof MultipleFeatureValue) {
						Set values = ((MultipleFeatureValue)featureValue).getCodes();
						int j=0;
						for (Integer value : values) {
							sb.append(value.toString());
							if (j != values.size()-1) {
								sb.append("|");
							}
							j++;
						}
					}
				}
				sb.append('\t');
			}
			sb.append('\n');
			instanceOutput.write(sb.toString());
			instanceOutput.flush();
			increaseNumberOfInstances();
		} catch (IOException e) {
			throw new LiblinearException("The Liblinear learner cannot write to the instance file. ", e);
		}
	}
	
	public void finalizeSentence(DependencyStructure dependencyGraph) throws MaltChainedException { }
	
	/* (non-Javadoc)
	 * @see org.maltparser.ml.LearningMethod#noMoreInstances()
	 */
	public void noMoreInstances() throws MaltChainedException {
		closeInstanceWriter();
	}


	/* (non-Javadoc)
	 * @see org.maltparser.ml.LearningMethod#train(org.maltparser.parser.guide.feature.FeatureVector)
	 */
	public void train() throws MaltChainedException {
		 if (owner == null) {
			throw new LiblinearException("The parent guide model cannot be found. ");
		}
//		cardinalities = getCardinalities(featureVector);
		if (pathExternalLiblinearTrain == null) {
			try {
				Problem problem = null;
				if (featurePruning) {
					problem = readLibLinearProblemWithFeaturePruning(getInstanceInputStreamReader(".ins"));
				} else {
//					problem = readLibLinearProblem(getInstanceInputStreamReader(".ins"), cardinalities);
				}
				Configuration config = owner.getGuide().getConfiguration();
				if (config.isLoggerInfoEnabled()) {
					config.logInfoMessage("Creating Liblinear model "+getFile(".mod").getName()+"\n");
				}
				final PrintStream out = System.out;
				final PrintStream err = System.err;
				System.setOut(NoPrintStream.NO_PRINTSTREAM);
				System.setErr(NoPrintStream.NO_PRINTSTREAM);
				Linear.saveModel(new File(getFile(".mod").getAbsolutePath()), Linear.train(problem, getLiblinearParameters()));
				System.setOut(err);
				System.setOut(out);
				if (!saveInstanceFiles) {
					getFile(".ins").delete();
				}
			} catch (OutOfMemoryError e) {
				throw new LiblinearException("Out of memory. Please increase the Java heap size (-Xmx). ", e);
			} catch (IllegalArgumentException e) {
				throw new LiblinearException("The Liblinear learner was not able to redirect Standard Error stream. ", e);
			} catch (SecurityException e) {
				throw new LiblinearException("The Liblinear learner cannot remove the instance file. ", e);
			} catch (IOException e) {
				throw new LiblinearException("The Liblinear learner cannot save the model file '"+getFile(".mod").getAbsolutePath()+"'. ", e);
			}
		} else {
			trainExternal();
		}
		
		if (featurePruning) {
			try {
				saveFeatureMap(new FileOutputStream(getFile(".map").getAbsolutePath()), featureMap);
			} catch (FileNotFoundException e) {
				throw new LiblinearException("The Liblinear learner cannot save the feature map file '"+getFile(".map").getAbsolutePath()+"'. ", e);
			}
		} else {
//			saveCardinalities(getInstanceOutputStreamWriter(".car"), cardinalities);
		}
	}
	
	private void trainExternal() throws MaltChainedException {
		try {		
//			maltSVMFormat2OriginalSVMFormat(getInstanceInputStreamReader(".ins"), getInstanceOutputStreamWriter(".ins.tmp"), cardinalities);
			Configuration config = owner.getGuide().getConfiguration();
			if (config.isLoggerInfoEnabled()) {
				config.logInfoMessage("Creating Liblinear model (external) "+getFile(".mod").getName());
			}

			final String[] params = getLibLinearParamStringArray();
			String[] arrayCommands = new String[params.length+3];
			int i = 0;
			arrayCommands[i++] = pathExternalLiblinearTrain;
			for (; i <= params.length; i++) {
				arrayCommands[i] = params[i-1];
			}
			arrayCommands[i++] = getFile(".ins.tmp").getAbsolutePath();
			arrayCommands[i++] = getFile(".mod").getAbsolutePath();
			
	        if (verbosity == Verbostity.ALL) {
	        	config.logInfoMessage('\n');
	        }
			final Process child = Runtime.getRuntime().exec(arrayCommands);
	        final InputStream in = child.getInputStream();
	        final InputStream err = child.getErrorStream();
	        int c;
	        while ((c = in.read()) != -1){
	        	if (verbosity == Verbostity.ALL) {
	        		config.logInfoMessage((char)c);
	        	}
	        }
	        while ((c = err.read()) != -1){
	        	if (verbosity == Verbostity.ALL || verbosity == Verbostity.ERROR) {
	        		config.logInfoMessage((char)c);
	        	}
	        }
            if (child.waitFor() != 0) {
            	config.logErrorMessage(" FAILED ("+child.exitValue()+")");
            }
	        in.close();
	        err.close();
	        if (!saveInstanceFiles) {
				getFile(".ins").delete();
				getFile(".ins.tmp").delete();
	        }
	        if (config.isLoggerInfoEnabled()) {
	        	config.logInfoMessage('\n');
	        }
		} catch (InterruptedException e) {
			 throw new LiblinearException("Liblinear is interrupted. ", e);
		} catch (IllegalArgumentException e) {
			throw new LiblinearException("The Liblinear learner was not able to redirect Standard Error stream. ", e);
		} catch (SecurityException e) {
			throw new LiblinearException("The Liblinear learner cannot remove the instance file. ", e);
		} catch (IOException e) {
			throw new LiblinearException("The Liblinear learner cannot save the model file '"+getFile(".mod").getAbsolutePath()+"'. ", e);
		} catch (OutOfMemoryError e) {
			throw new LiblinearException("Out of memory. Please increase the Java heap size (-Xmx). ", e);
		}
	}
	
//	private int[] getCardinalities(FeatureVector featureVector) {
//		int[] cardinalities = new int[featureVector.size()];
//		int i = 0;
//		for (FeatureFunction feature : featureVector) {
//			cardinalities[i++] = feature.getFeatureValue().getCardinality();
//		}
//		return cardinalities;
//	}
//	
//	private void saveCardinalities(OutputStreamWriter osw, int[] cardinalities) throws MaltChainedException {
//		final BufferedWriter out = new BufferedWriter(osw);
//		try {
//			for (int i = 0, n = cardinalities.length; i < n; i++) {
//				out.write(Integer.toString(cardinalities[i]));
//				if (i < n - 1) {
//					out.write(',');
//				}
//			}
//			out.write('\n');
//			out.close();
//		} catch (IOException e) {
//			throw new LiblinearException("", e);
//		}
//	}
	
//	private int[] loadCardinalities(InputStreamReader isr) throws MaltChainedException {
//		int[] cardinalities = null;
//		try {
//			final BufferedReader in = new BufferedReader(isr); 
//			String line;
//			if ((line = in.readLine()) != null) {
//				String[] items = line.split(",");
//				cardinalities = new int[items.length];
//				for (int i = 0; i < items.length; i++) {
//					cardinalities[i] = Integer.parseInt(items[i]);
//				}
// 			}
//			in.close();
//		} catch (IOException e) {
//			throw new LiblinearException("", e);
//		} catch (NumberFormatException e) {
//			throw new LiblinearException("", e);
//		}
//		return cardinalities;
//	}
	
	/* (non-Javadoc)
	 * @see org.maltparser.ml.LearningMethod#moveAllInstances(org.maltparser.ml.LearningMethod, org.maltparser.core.feature.function.FeatureFunction, java.util.ArrayList)
	 */
	public void moveAllInstances(LearningMethod method, FeatureFunction divideFeature, ArrayList divideFeatureIndexVector) throws MaltChainedException {
		if (method == null) {
			throw new LiblinearException("The learning method cannot be found. ");
		} else if (divideFeature == null) {
			throw new LiblinearException("The divide feature cannot be found. ");
		} 
		
		try {
			final BufferedReader in = new BufferedReader(getInstanceInputStreamReader(".ins"));
			final BufferedWriter out = method.getInstanceWriter();
			final StringBuilder sb = new StringBuilder(6);
			int l = in.read();
			char c;
			int j = 0;
	
			while(true) {
				if (l == -1) {
					sb.setLength(0);
					break;
				}
				c = (char)l; 
				l = in.read();
				if (c == '\t') {
					if (divideFeatureIndexVector.contains(j-1)) {
						out.write(Integer.toString(((SingleFeatureValue)divideFeature.getFeatureValue()).getIndexCode()));
						out.write('\t');
					}
					out.write(sb.toString());
					j++;
					out.write('\t');
					sb.setLength(0);
				} else if (c == '\n') {
					out.write(sb.toString());
					if (divideFeatureIndexVector.contains(j-1)) {
						out.write('\t');
						out.write(Integer.toString(((SingleFeatureValue)divideFeature.getFeatureValue()).getIndexCode()));
					}
					out.write('\n');
					sb.setLength(0);
					method.increaseNumberOfInstances();
					this.decreaseNumberOfInstances();
					j = 0;
				} else {
					sb.append(c);
				}
			}	
			in.close();
			getFile(".ins").delete();
			out.flush();
		} catch (SecurityException e) {
			throw new LiblinearException("The Liblinear learner cannot remove the instance file. ", e);
		} catch (NullPointerException  e) {
			throw new LiblinearException("The instance file cannot be found. ", e);
		} catch (FileNotFoundException e) {
			throw new LiblinearException("The instance file cannot be found. ", e);
		} catch (IOException e) {
			throw new LiblinearException("The Liblinear learner read from the instance file. ", e);
		}

	}
	
	/* (non-Javadoc)
	 * @see org.maltparser.ml.LearningMethod#predict(org.maltparser.parser.guide.feature.FeatureVector, org.maltparser.ml.KBestList)
	 */
	public boolean predict(FeatureVector featureVector, SingleDecision decision) throws MaltChainedException {
		if (model == null) {
			try {
				model = Linear.loadModel(new BufferedReader(getInstanceInputStreamReaderFromConfigFile(".mod")));
			} catch (IOException e) {
				throw new LiblinearException("The model cannot be loaded. ", e);
			}
		}
		if (model == null) { 
			throw new LiblinearException("The Liblinear learner cannot predict the next class, because the learning model cannot be found. ");
		} else if (featureVector == null) {
			throw new LiblinearException("The Liblinear learner cannot predict the next class, because the feature vector cannot be found. ");
		}
		if (featurePruning) {
			return predictWithFeaturePruning(featureVector, decision);
		}
		
//		if (cardinalities == null) {
//			if (getConfigFileEntry(".car") != null) {
//				cardinalities = loadCardinalities(getInstanceInputStreamReaderFromConfigFile(".car"));
//			} else {
//				cardinalities = getCardinalities(featureVector);
//			}
//		}

		//System.out.println("METHOD PREDICT CARDINALITIES SIZE" + cardinalities.length + " FEATURE VECTOR SIZE " +featureVector.size());
		if (xlist == null) {
			xlist = new ArrayList(featureVector.size()); 
		}


//		int offset = 1;
//		int i = 0;
//		for (FeatureFunction feature : featureVector) {
//			final FeatureValue featureValue = feature.getFeatureValue();
//			if (!(excludeNullValues == true && featureValue.isNullValue())) {
//				if (featureValue instanceof SingleFeatureValue) {
//					if (((SingleFeatureValue)featureValue).getCode() < cardinalities[i]) {
//						xlist.add(new FeatureNode(((SingleFeatureValue)featureValue).getCode() + offset, 1));
//					}
//				} else if (featureValue instanceof MultipleFeatureValue) {
//					for (Integer value : ((MultipleFeatureValue)featureValue).getCodes()) {
//						if (value < cardinalities[i]) {
//							xlist.add(new FeatureNode(value + offset, 1));
//						}
//					}
//				}
//			}
//			offset += cardinalities[i];
//			i++;
//		}
		FeatureNode[] xarray = new FeatureNode[xlist.size()];
		for (int k = 0; k < xlist.size(); k++) {
			xarray[k] = xlist.get(k);
		}

		if (decision.getKBestList().getK() == 1) {
			decision.getKBestList().add(Linear.predict(model, xarray));
		} else {
			liblinear_predict_with_kbestlist(model, xarray, decision.getKBestList());
		}
		
		xlist.clear();

		return true;
	}
	
	public boolean predictWithFeaturePruning(FeatureVector featureVector, SingleDecision decision) throws MaltChainedException {
		if (featureMap == null) {
			featureMap = loadFeatureMap(getInputStreamFromConfigFileEntry(".map"));
		}

		for (int i = 0; i < featureVector.size(); i++) {
			final FeatureValue featureValue = featureVector.getFeatureValue(i-1);
			if (!(excludeNullValues == true && featureValue.isNullValue())) {
				if (featureValue instanceof SingleFeatureValue) {
					int v = getFeatureMapValue(i, ((SingleFeatureValue)featureValue).getIndexCode());
					if (v != -1) {
						featureSet.add(new XNode(v,1));
					}
				} else if (featureValue instanceof MultipleFeatureValue) {
					for (Integer value : ((MultipleFeatureValue)featureValue).getCodes()) {
						int v = getFeatureMapValue(i, value);
						if (v != -1) {
							featureSet.add(new XNode(v,1));
						}
					}
				}
			}
		}
		FeatureNode[] xarray = new FeatureNode[featureSet.size()];
		int k = 0;
		for (XNode x : featureSet) {
			xarray[k++] = new FeatureNode(x.getIndex(), x.getValue());
		}


		if (decision.getKBestList().getK() == 1) {
			decision.getKBestList().add(Linear.predict(model, xarray));
		} else {
			liblinear_predict_with_kbestlist(model, xarray, decision.getKBestList());
		}
		featureSet.clear();

		return true;
	}
	
	public void terminate() throws MaltChainedException { 
		closeInstanceWriter();
		model = null;
		xlist = null;
		owner = null;
	}

	public BufferedWriter getInstanceWriter() {
		return instanceOutput;
	}
	
	protected void closeInstanceWriter() throws MaltChainedException {
		try {
			if (instanceOutput != null) {
				instanceOutput.flush();
				instanceOutput.close();
				instanceOutput = null;
			}
		} catch (IOException e) {
			throw new LiblinearException("The Liblinear learner cannot close the instance file. ", e);
		}
	}
	
	
	/**
	 * Returns the parameter string for used for configure Liblinear
	 * 
	 * @return the parameter string for used for configure Liblinear
	 */
	public String getParamString() {
		return paramString;
	}
	
	public InstanceModel getOwner() {
		return owner;
	}

	protected void setOwner(InstanceModel owner) {
		this.owner = owner;
	}
	
	public int getLearnerMode() {
		return learnerMode;
	}

	public void setLearnerMode(int learnerMode) throws MaltChainedException {
		this.learnerMode = learnerMode;
	}
	
	public String getLearningMethodName() {
		return name;
	}
	
	/**
	 * Returns the current configuration
	 * 
	 * @return the current configuration
	 * @throws MaltChainedException
	 */
	public DependencyParserConfig getConfiguration() throws MaltChainedException {
		return owner.getGuide().getConfiguration();
	}
	
	public int getNumberOfInstances() throws MaltChainedException {
		if(numberOfInstances!=0)
			return numberOfInstances;
		else{
			//Do a line count of the instance file and return that
			
			BufferedReader reader = new BufferedReader( getInstanceInputStreamReader(".ins"));
			try {
				while(reader.readLine()!=null){
					numberOfInstances++;
					owner.increaseFrequency();
				}
				reader.close();
			} catch (IOException e) {
				throw new MaltChainedException("No instances found in file",e);
			}
			return numberOfInstances;
		}
	}

	public void increaseNumberOfInstances() {
		numberOfInstances++;
		owner.increaseFrequency();
	}
	
	public void decreaseNumberOfInstances() {
		numberOfInstances--;
		owner.decreaseFrequency();
	}
	
	protected void setNumberOfInstances(int numberOfInstances) {
		this.numberOfInstances = 0;
	}

	protected void setLearningMethodName(String name) {
		this.name = name;
	}
	
	protected OutputStreamWriter getInstanceOutputStreamWriter(String suffix) throws MaltChainedException {
		return getConfiguration().getAppendOutputStreamWriter(owner.getModelName()+getLearningMethodName()+suffix);
	}
	
	protected InputStreamReader getInstanceInputStreamReader(String suffix) throws MaltChainedException {
		return getConfiguration().getInputStreamReader(owner.getModelName()+getLearningMethodName()+suffix);
	}
	
	protected InputStreamReader getInstanceInputStreamReaderFromConfigFile(String suffix) throws MaltChainedException {
		try {
			return new InputStreamReader(getInputStreamFromConfigFileEntry(suffix), "UTF-8");
		} catch (UnsupportedEncodingException e) {
			throw new ConfigurationException("The char set UTF-8 is not supported. ", e);
		}
	}
	
	protected InputStream getInputStreamFromConfigFileEntry(String suffix) throws MaltChainedException {
		return getConfiguration().getInputStreamFromConfigFileEntry(owner.getModelName()+getLearningMethodName()+suffix);
	}
	
	protected File getFile(String suffix) throws MaltChainedException {
		return getConfiguration().getFile(owner.getModelName()+getLearningMethodName()+suffix);
	}
	
//	protected JarEntry getConfigFileEntry(String suffix) throws MaltChainedException {
//		return getConfiguration().getConfigurationDir().getConfigFileEntry(owner.getModelName()+getLearningMethodName()+suffix);
//	}
	
	
	public Problem readLibLinearProblemWithFeaturePruning(InputStreamReader isr) throws MaltChainedException {
		Problem problem = new Problem();
		
		try {
			final BufferedReader fp = new BufferedReader(isr);
			
			problem.bias = -1;
			problem.l = getNumberOfInstances();
			problem.x = new FeatureNode[problem.l][];
			problem.y = new int[problem.l];
			int i = 0;
			final Pattern tabPattern = Pattern.compile("\t");
			final Pattern pipePattern = Pattern.compile("\\|");
			while(true) {
				String line = fp.readLine();

				if(line == null) break;
				String[] columns = tabPattern.split(line);

				if (columns.length == 0) {
					continue;
				}
				int j = 0;
				try {
					problem.y[i] = Integer.parseInt(columns[j]);					
					for(j = 1; j < columns.length; j++) {
						final String[] items = pipePattern.split(columns[j]);
						for (int k = 0; k < items.length; k++) {
							try {
								int colon = items[k].indexOf(':');
								if (colon == -1) {
									if (Integer.parseInt(items[k]) != -1) {
										int v = addFeatureMapValue(j, Integer.parseInt(items[k]));
										if (v != -1) {
											featureSet.add(new XNode(v,1));
										}
									}
								} else {
									int index = addFeatureMapValue(j, Integer.parseInt(items[k].substring(0,colon)));
									double value;
									if (items[k].substring(colon+1).indexOf('.') != -1) {
										value = Double.parseDouble(items[k].substring(colon+1));
									} else {
										value = Integer.parseInt(items[k].substring(colon+1));
									}
									featureSet.add(new XNode(index,value));
								}
							} catch (NumberFormatException e) {
								throw new LiblinearException("The instance file contain a non-integer value '"+items[k]+"'", e);
							}
						}
					}
					problem.x[i] = new FeatureNode[featureSet.size()];
					int p = 0;
					for (XNode x : featureSet) {
						problem.x[i][p++] = new FeatureNode(x.getIndex(), x.getValue());
					}
					featureSet.clear();
					i++;
				} catch (ArrayIndexOutOfBoundsException e) {
					throw new LiblinearException("Cannot read from the instance file. ", e);
				}
			}
			fp.close();	
			featureSet = null;
			problem.n = featureMap.size();
			System.out.println("Number of features: "+problem.n);
		} catch (IOException e) {
			throw new LiblinearException("Cannot read from the instance file. ", e);
		}
		return problem;
	}
	/**
	 * Reads an instance file into a problem object according to the Malt-SVM format, which is column fixed format (tab-separated).
	 * 
	 * @param isr	the instance stream reader for the instance file
	 * @param cardinalities	a array containing the number of distinct values for a particular column.
	 * @throws LiblinearException
	 */
	public Problem readLibLinearProblem(InputStreamReader isr, int[] cardinalities) throws MaltChainedException {
		Problem problem = new Problem();

		try {
			final BufferedReader fp = new BufferedReader(isr);
			int max_index = 0;
			if (xlist == null) {
				xlist = new ArrayList(); 
			}
			problem.bias = -1; //getBias();
			problem.l = getNumberOfInstances();
			problem.x = new FeatureNode[problem.l][];
			problem.y = new int[problem.l];
			int i = 0;
			final Pattern tabPattern = Pattern.compile("\t");
			final Pattern pipePattern = Pattern.compile("\\|");
			while(true) {
				String line = fp.readLine();

				if(line == null) break;
				String[] columns = tabPattern.split(line);

				if (columns.length == 0) {
					continue;
				}
				
				int offset = 1; 
				int j = 0;
				try {
					problem.y[i] = 
						Integer.parseInt(columns[j]);
					int p = 0;
					for(j = 1; j < columns.length; j++) {
						final String[] items = pipePattern.split(columns[j]);	
						for (int k = 0; k < items.length; k++) {
							try {
								if (Integer.parseInt(items[k]) != -1) {
									xlist.add(p, new FeatureNode(Integer.parseInt(items[k])+offset, 1));
									p++;
								}
							} catch (NumberFormatException e) {
								throw new LiblinearException("The instance file contain a non-integer value '"+items[k]+"'", e);
							}
						}
						offset += cardinalities[j-1];
					}
					problem.x[i] = xlist.subList(0, p).toArray(new FeatureNode[0]);
					if(columns.length > 1) {
						max_index = Math.max(max_index, problem.x[i][p-1].index);
					}
					i++;
					xlist.clear();
				} catch (ArrayIndexOutOfBoundsException e) {
					throw new LiblinearException("Cannot read from the instance file. ", e);
				}
			}
			fp.close();	
			problem.n = max_index;
			System.out.println("Number of features: "+problem.n);
//			if ( problem.bias >= 0 ) {
//				problem.n++;
//			}
			xlist = null;
		} catch (IOException e) {
			throw new LiblinearException("Cannot read from the instance file. ", e);
		}
		return problem;
	}
	
	protected void initSpecialParameters() throws MaltChainedException {
		if (getConfiguration().getOptionValue("singlemalt", "null_value") != null && getConfiguration().getOptionValue("singlemalt", "null_value").toString().equalsIgnoreCase("none")) {
			excludeNullValues = true;
		} else {
			excludeNullValues = false;
		}
		saveInstanceFiles = ((Boolean)getConfiguration().getOptionValue("liblinear", "save_instance_files")).booleanValue();
//		featurePruning = ((Boolean)getConfiguration().getOptionValue("liblinear", "feature_pruning")).booleanValue();
		featurePruning = true;
		if (!getConfiguration().getOptionValue("liblinear", "liblinear_external").toString().equals("")) {
			try {
				if (!new File(getConfiguration().getOptionValue("liblinear", "liblinear_external").toString()).exists()) {
					throw new LiblinearException("The path to the external Liblinear trainer 'svm-train' is wrong.");
				}
				if (new File(getConfiguration().getOptionValue("liblinear", "liblinear_external").toString()).isDirectory()) {
					throw new LiblinearException("The option --liblinear-liblinear_external points to a directory, the path should point at the 'train' file or the 'train.exe' file");
				}
				if (!(getConfiguration().getOptionValue("liblinear", "liblinear_external").toString().endsWith("train") || getConfiguration().getOptionValue("liblinear", "liblinear_external").toString().endsWith("train.exe"))) {
					throw new LiblinearException("The option --liblinear-liblinear_external does not specify the path to 'train' file or the 'train.exe' file. ");
				}
				pathExternalLiblinearTrain = getConfiguration().getOptionValue("liblinear", "liblinear_external").toString();
			} catch (SecurityException e) {
				throw new LiblinearException("Access denied to the file specified by the option --liblinear-liblinear_external. ", e);
			}
		}
		if (getConfiguration().getOptionValue("liblinear", "verbosity") != null) {
			verbosity = Verbostity.valueOf(getConfiguration().getOptionValue("liblinear", "verbosity").toString().toUpperCase());
		}
	}
	
	public String getLibLinearOptions() {
		StringBuilder sb = new StringBuilder();
		for (String key : liblinearOptions.keySet()) {
			sb.append('-');
			sb.append(key);
			sb.append(' ');
			sb.append(liblinearOptions.get(key));
			sb.append(' ');
		}
		return sb.toString();
	}
	
	public void parseParameters(String paramstring) throws MaltChainedException {
		if (paramstring == null) {
			return;
		}
		final String[] argv;
		String allowedFlags = "sceB";
		try {
			argv = paramstring.split("[_\\p{Blank}]");
		} catch (PatternSyntaxException e) {
			throw new LiblinearException("Could not split the liblinear-parameter string '"+paramstring+"'. ", e);
		}
		for (int i=0; i < argv.length-1; i++) {
			if(argv[i].charAt(0) != '-') {
				throw new LiblinearException("The argument flag should start with the following character '-', not with "+argv[i].charAt(0));
			}
			if(++i>=argv.length) {
				throw new LiblinearException("The last argument does not have any value. ");
			}
			try {
				int index = allowedFlags.indexOf(argv[i-1].charAt(1));
				if (index != -1) {
					liblinearOptions.put(Character.toString(argv[i-1].charAt(1)), argv[i]);
				} else {
					throw new LiblinearException("Unknown liblinear parameter: '"+argv[i-1]+"' with value '"+argv[i]+"'. ");		
				}
			} catch (ArrayIndexOutOfBoundsException e) {
				throw new LiblinearException("The liblinear parameter '"+argv[i-1]+"' could not convert the string value '"+argv[i]+"' into a correct numeric value. ", e);
			} catch (NumberFormatException e) {
				throw new LiblinearException("The liblinear parameter '"+argv[i-1]+"' could not convert the string value '"+argv[i]+"' into a correct numeric value. ", e);	
			} catch (NullPointerException e) {
				throw new LiblinearException("The liblinear parameter '"+argv[i-1]+"' could not convert the string value '"+argv[i]+"' into a correct numeric value. ", e);	
			}
		}
	}
	
	public double getBias() throws MaltChainedException {
		try {
			return Double.valueOf(liblinearOptions.get("B")).doubleValue();
		} catch (NumberFormatException e) {
			throw new LiblinearException("The liblinear bias value is not numerical value. ", e);
		}
	}

	public Parameter getLiblinearParameters() throws MaltChainedException {
		Parameter param = new Parameter(SolverType.MCSVM_CS, 0.1, 0.1);
		String type = liblinearOptions.get("s");
		
		if (type.equals("0")) {
			param.setSolverType(SolverType.L2R_LR);
		} else if (type.equals("1")) {
			param.setSolverType(SolverType.L2R_L2LOSS_SVC_DUAL);
		} else if (type.equals("2")) {
			param.setSolverType(SolverType.L2R_L2LOSS_SVC);
		} else if (type.equals("3")) {
			param.setSolverType(SolverType.L2R_L1LOSS_SVC_DUAL);
		} else if (type.equals("4")) {
			param.setSolverType(SolverType.MCSVM_CS);
		} else if (type.equals("5")) {
			param.setSolverType(SolverType.L1R_L2LOSS_SVC);	
		} else if (type.equals("6")) {
			param.setSolverType(SolverType.L1R_LR);	
		} else {
			throw new LiblinearException("The liblinear type (-s) is not an integer value between 0 and 4. ");
		}
		try {
			param.setC(Double.valueOf(liblinearOptions.get("c")).doubleValue());
		} catch (NumberFormatException e) {
			throw new LiblinearException("The liblinear cost (-c) value is not numerical value. ", e);
		}
		try {
			param.setEps(Double.valueOf(liblinearOptions.get("e")).doubleValue());
		} catch (NumberFormatException e) {
			throw new LiblinearException("The liblinear epsilon (-e) value is not numerical value. ", e);
		}
		return param;
	}

	public void initLiblinearOptions() {
		liblinearOptions.put("s", "4"); // type = SolverType.L2LOSS_SVM_DUAL (default)
		liblinearOptions.put("c", "0.1"); // cost = 1 (default)
		liblinearOptions.put("e", "0.1"); // epsilon = 0.1 (default)
		liblinearOptions.put("B", "-1"); // bias = -1 (default)
	}

	public String[] getLibLinearParamStringArray() {
		final ArrayList params = new ArrayList();

		for (String key : liblinearOptions.keySet()) {
			params.add("-"+key); params.add(liblinearOptions.get(key));
		}
		return params.toArray(new String[params.size()]);
	}
	
	
	public void liblinear_predict_with_kbestlist(Model model, FeatureNode[] x, KBestList kBestList) throws MaltChainedException {
		int i;
		final int nr_class = model.getNrClass();
		final double[] dec_values = new double[nr_class];

		Linear.predictValues(model, x, dec_values);
		final int[] labels = model.getLabels();
		int[] predictionList = new int[nr_class];
		for(i=0;i dec_values[lagest]) {
					lagest = j;
				}
			}
			tmpDec = dec_values[lagest];
			dec_values[lagest] = dec_values[i];
			dec_values[i] = tmpDec;
			tmpObj = predictionList[lagest];
			predictionList[lagest] = predictionList[i];
			predictionList[i] = tmpObj;
		}
		
		int k = nr_class-1;
		if (kBestList.getK() != -1) {
			k = kBestList.getK() - 1;
		}
		
		for (i=0; i= 0; i++, k--) {
			if (kBestList instanceof ScoredKBestList) {
				((ScoredKBestList)kBestList).add(predictionList[i], (float)dec_values[i]);
			} else {
				kBestList.add(predictionList[i]);
			}

		}
	}
	
	/**
	 * Converts the instance file (Malt's own SVM format) into the Liblinear (SVMLight) format. The input instance file is removed (replaced)
	 * by the instance file in the Liblinear (SVMLight) format. If a column contains -1, the value will be removed in destination file. 
	 * 
	 * @param isr the input stream reader for the source instance file
	 * @param osw	the output stream writer for the destination instance file
	 * @param cardinalities a vector containing the number of distinct values for a particular column
	 * @throws LiblinearException
	 */
	public static void maltSVMFormat2OriginalSVMFormat(InputStreamReader isr, OutputStreamWriter osw, int[] cardinalities) throws MaltChainedException {
		try {
			final BufferedReader in = new BufferedReader(isr);
			final BufferedWriter out = new BufferedWriter(osw);

			int c;
			int j = 0;
			int offset = 1;
			int code = 0;
			while(true) {
				c = in.read();
				if (c == -1) {
					break;
				}
				
				if (c == '\t' || c == '|') {
					if (j == 0) {
						out.write(Integer.toString(code));
						j++;
					} else {
						if (code != -1) {
							out.write(' ');
							out.write(Integer.toString(code+offset));
							out.write(":1");
						}
						if (c == '\t') {
							offset += cardinalities[j-1];
							j++;
						}
					}
					code = 0;
				} else if (c == '\n') {
					j = 0;
					offset = 1;
					out.write('\n');
					code = 0;
				} else if (c == '-') {
					code = -1;
				} else if (code != -1) {
					if (c > 47 && c < 58) {
						code = code * 10 + (c-48);
					} else {
						throw new LiblinearException("The instance file contain a non-integer value, when converting the Malt SVM format into Liblinear format.");
					}
				}	
			}			
			in.close();	
			out.close();
		} catch (IOException e) {
			throw new LiblinearException("Cannot read from the instance file, when converting the Malt SVM format into Liblinear format. ", e);
		}
	}
	
	protected void finalize() throws Throwable {
		try {
			closeInstanceWriter();
		} finally {
			super.finalize();
		}
	}
	
	/* (non-Javadoc)
	 * @see java.lang.Object#toString()
	 */
	public String toString() {
		final StringBuffer sb = new StringBuffer();
		sb.append("\nLiblinear INTERFACE\n");
		sb.append("  Liblinear version: "+LIBLINEAR_VERSION+"\n");
		sb.append("  Liblinear string: "+paramString+"\n");
		
		sb.append(getLibLinearOptions());
		return sb.toString();
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy