All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.itc.irst.tcc.sre.KernelMatrix Maven / Gradle / Ivy

/*
 * Copyright 2005 FBK-irst (http://www.fbk.eu)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.itc.irst.tcc.sre;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.text.DecimalFormat;
import java.util.Properties;

import org.itc.irst.tcc.sre.data.ArgumentSet;
import org.itc.irst.tcc.sre.data.ExampleSet;
import org.itc.irst.tcc.sre.data.SentenceSetCopy;
import org.itc.irst.tcc.sre.kernel.expl.Mapping;
import org.itc.irst.tcc.sre.kernel.expl.MappingFactory;
import org.itc.irst.tcc.sre.util.FeatureIndex;
import org.itc.irst.tcc.sre.util.Vector;
import org.itc.irst.tcc.sre.util.ZipModel;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * TO DO
 *
 * @author 	Claudio Giuliano
 * @version %I%, %G%
 * @since		1.0
 */
public class KernelMatrix
{
	/**
	 * Define a static logger variable so that it references the
	 * Logger instance named KernelMatrix.
	 */
	static Logger logger = LoggerFactory.getLogger(KernelMatrix.class.getName());

	//
	private Properties parameter;

	//
	public KernelMatrix(Properties parameter)
	{
		this.parameter = parameter;
	} // end constructor

	//
	public void run() throws Exception
	{
		logger.info("build the relation extraction kernel matrix");

		// create zip archive
		//ZipModel model = new ZipModel(parameter.modelFile());
		File modelFile = new File(parameter.getProperty("model-file"));
		ZipModel model = new ZipModel(modelFile);

		// read data set
		//ExampleSet inputSet = readDataSet(parameter.inputFile());
		File inputFile = new File(parameter.getProperty("example-file"));
		ExampleSet inputSet = readDataSet(inputFile);
		logger.debug("input training set size: " + inputSet.size());

		// find argument types
		ArgumentSet.getInstance().init(inputSet);

		// set the relation type
		int count = inputSet.getClassCount();

		logger.debug("number of classes: " + count);
		//logger.info("learn " + (relationType == DIRECTED_RELATION ? "directed" : "undirected") + " relations (" + relationType + ")");

		// create the mapping factory
		MappingFactory mappingFactory = MappingFactory.getMappingFactory();
		Mapping mapping = mappingFactory.getInstance(parameter.getProperty("kernel-type"));

		// set the command line parameters
		mapping.setParameters(parameter);

		// get the number of subspaces
		int subspaceCount = mapping.subspaceCount();
		logger.debug("number of subspaces: " + subspaceCount);

		// create the index
		FeatureIndex[] index = createFeatureIndex(subspaceCount);

		// embed the input data into a feature space
		logger.info("embed the training set");
		ExampleSet outputSet = mapping.map(inputSet, index);
		logger.debug("embedded training set size: " + outputSet.size());

		// save the training set
		File training = saveExampleSet(outputSet, model);

		// save the indexes
		saveFeatureIndexes(index, model);

		// save param
		saveParameters(model);

		// calculate the kernel matrix
		calculateKernelMatrix(outputSet, model);


		// close the model
		model.close();
	} // end run

	// read the data set
	private ExampleSet readDataSet(File in) throws IOException
	{
		logger.info("read the example set");

		//
		ExampleSet inputSet = new SentenceSetCopy();
		inputSet.read(new BufferedReader(new FileReader(in)));

		return inputSet;
	}	// end readDataSet



	// create feature index
	private FeatureIndex[] createFeatureIndex(int subspaceCount) //throws Exception
	{
		logger.info("create feature index");

		FeatureIndex[] index = new FeatureIndex[subspaceCount];
		for (int i=0;i file with training data (SRE format)\n");
			sb.append("\tmodel-file\t-> file in which to store resulting model\n");

			sb.append("Options:\n");
			sb.append("\t-h\t\t-> this help\n");
			sb.append("\t-k string\t-> set type of kernel function (default SL):\n");
			sb.append("\t\t\t\tLC: Local Context Kernel\n");
			sb.append("\t\t\t\tGC: Global Context Kernel\n");
			sb.append("\t\t\t\tSL: Shallow Linguistic Context Kernel\n");

			sb.append("\t-n [1..]\t-> set the parameter n-gram of kernels SL and GC  (default 3)\n");
			sb.append("\t-w [0..]\t-> set the window size of kernel LC (default 2)\n");

			return sb.toString();
		} // end getHelp

		//
		public String toString()
		{
			StringWriter sw = new StringWriter();
			list(new PrintWriter(sw));

			return sw.toString();
		} // end toString

		//
		class IllegalParameterException extends IllegalArgumentException
		{
			public IllegalParameterException(String s)
			{
				super(s);
			} // end constructor

		} // end IllegalParameterException

	} // end class CommandLineParameters

} // end class KernelMatrix




© 2015 - 2025 Weber Informatics LLC | Privacy Policy