org.itc.irst.tcc.sre.KernelMatrix Maven / Gradle / Ivy
/*
* Copyright 2005 FBK-irst (http://www.fbk.eu)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.itc.irst.tcc.sre;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.text.DecimalFormat;
import java.util.Properties;
import org.itc.irst.tcc.sre.data.ArgumentSet;
import org.itc.irst.tcc.sre.data.ExampleSet;
import org.itc.irst.tcc.sre.data.SentenceSetCopy;
import org.itc.irst.tcc.sre.kernel.expl.Mapping;
import org.itc.irst.tcc.sre.kernel.expl.MappingFactory;
import org.itc.irst.tcc.sre.util.FeatureIndex;
import org.itc.irst.tcc.sre.util.Vector;
import org.itc.irst.tcc.sre.util.ZipModel;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* TO DO
*
* @author Claudio Giuliano
* @version %I%, %G%
* @since 1.0
*/
public class KernelMatrix
{
/**
* Define a static logger variable so that it references the
* Logger instance named KernelMatrix
.
*/
static Logger logger = LoggerFactory.getLogger(KernelMatrix.class.getName());
//
private Properties parameter;
//
public KernelMatrix(Properties parameter)
{
this.parameter = parameter;
} // end constructor
//
public void run() throws Exception
{
logger.info("build the relation extraction kernel matrix");
// create zip archive
//ZipModel model = new ZipModel(parameter.modelFile());
File modelFile = new File(parameter.getProperty("model-file"));
ZipModel model = new ZipModel(modelFile);
// read data set
//ExampleSet inputSet = readDataSet(parameter.inputFile());
File inputFile = new File(parameter.getProperty("example-file"));
ExampleSet inputSet = readDataSet(inputFile);
logger.debug("input training set size: " + inputSet.size());
// find argument types
ArgumentSet.getInstance().init(inputSet);
// set the relation type
int count = inputSet.getClassCount();
logger.debug("number of classes: " + count);
//logger.info("learn " + (relationType == DIRECTED_RELATION ? "directed" : "undirected") + " relations (" + relationType + ")");
// create the mapping factory
MappingFactory mappingFactory = MappingFactory.getMappingFactory();
Mapping mapping = mappingFactory.getInstance(parameter.getProperty("kernel-type"));
// set the command line parameters
mapping.setParameters(parameter);
// get the number of subspaces
int subspaceCount = mapping.subspaceCount();
logger.debug("number of subspaces: " + subspaceCount);
// create the index
FeatureIndex[] index = createFeatureIndex(subspaceCount);
// embed the input data into a feature space
logger.info("embed the training set");
ExampleSet outputSet = mapping.map(inputSet, index);
logger.debug("embedded training set size: " + outputSet.size());
// save the training set
File training = saveExampleSet(outputSet, model);
// save the indexes
saveFeatureIndexes(index, model);
// save param
saveParameters(model);
// calculate the kernel matrix
calculateKernelMatrix(outputSet, model);
// close the model
model.close();
} // end run
// read the data set
private ExampleSet readDataSet(File in) throws IOException
{
logger.info("read the example set");
//
ExampleSet inputSet = new SentenceSetCopy();
inputSet.read(new BufferedReader(new FileReader(in)));
return inputSet;
} // end readDataSet
// create feature index
private FeatureIndex[] createFeatureIndex(int subspaceCount) //throws Exception
{
logger.info("create feature index");
FeatureIndex[] index = new FeatureIndex[subspaceCount];
for (int i=0;i file with training data (SRE format)\n");
sb.append("\tmodel-file\t-> file in which to store resulting model\n");
sb.append("Options:\n");
sb.append("\t-h\t\t-> this help\n");
sb.append("\t-k string\t-> set type of kernel function (default SL):\n");
sb.append("\t\t\t\tLC: Local Context Kernel\n");
sb.append("\t\t\t\tGC: Global Context Kernel\n");
sb.append("\t\t\t\tSL: Shallow Linguistic Context Kernel\n");
sb.append("\t-n [1..]\t-> set the parameter n-gram of kernels SL and GC (default 3)\n");
sb.append("\t-w [0..]\t-> set the window size of kernel LC (default 2)\n");
return sb.toString();
} // end getHelp
//
public String toString()
{
StringWriter sw = new StringWriter();
list(new PrintWriter(sw));
return sw.toString();
} // end toString
//
class IllegalParameterException extends IllegalArgumentException
{
public IllegalParameterException(String s)
{
super(s);
} // end constructor
} // end IllegalParameterException
} // end class CommandLineParameters
} // end class KernelMatrix
© 2015 - 2025 Weber Informatics LLC | Privacy Policy