All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.itc.irst.tcc.sre.CreateTestSet Maven / Gradle / Ivy

/*
 * Copyright 2005 FBK-irst (http://www.fbk.eu)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.itc.irst.tcc.sre;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.Properties;

import org.itc.irst.tcc.sre.data.ArgumentSet;
import org.itc.irst.tcc.sre.data.ExampleSet;
import org.itc.irst.tcc.sre.data.SentenceSetCopy;
import org.itc.irst.tcc.sre.kernel.expl.AbstractMapping;
import org.itc.irst.tcc.sre.kernel.expl.ContextMappingFactory;
import org.itc.irst.tcc.sre.util.FeatureIndex;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * TO DO
 * 
 * @author Claudio Giuliano
 * @version %I%, %G%
 * @since 1.0
 */
public class CreateTestSet {
    /**
     * Define a static logger variable so that it references the Logger instance
     * named CreateTestSet.
     */
    static Logger logger = LoggerFactory.getLogger(CreateTestSet.class
            .getName());

    //
    private File inputFile;

    //
    private File outputDir;

    //
    private File outputFile;

    //
    private String kernel;

    //
    private int relationType;

    //
    private Properties parameter;

    //
    private String path;

    //
    public CreateTestSet(File inputFile, File outputDir) {
        this.inputFile = inputFile;
        this.outputDir = outputDir;

        path = outputDir.getAbsolutePath();
        if (!path.endsWith(File.separator))
            path += File.separator;

        logger.info("dir " + path);
        /*
         * if (!outputDir.endsWith(File.separator)) outputDir += File.separator;
         * 
         * if (!new File(outputDir).mkdir()) { logger.error("cannot find dir " +
         * outputDir); System.exit(-1); }
         */
        parameter = new Properties();
    } // end constructor

    // read parameters
    private void readParameters() throws IOException {
        logger.info("read parameters");

        // get the param model
        File paramFile = new File(path + "param");

        parameter.load(new FileInputStream(paramFile));
    } // end readParameters

    // read the data set
    private ExampleSet readDataSet(File in) throws IOException {
        logger.info("read the example set");

        //
        logger.info("read data set");
        ExampleSet inputSet = new SentenceSetCopy();
        inputSet.read(new BufferedReader(new FileReader(in)));

        return inputSet;
    } // end readDataSet

    //
    private FeatureIndex[] readFeatureIndex(int subspaceCount)
            throws IOException {
        logger.info("read feature index");

        FeatureIndex[] index = new FeatureIndex[subspaceCount];
        for (int i = 0; i < subspaceCount; i++) {
            index[i] = new FeatureIndex(true, 1);

            File dicFile = new File(path + "dic" + i);
            // File dicFile = model.get("dic" + i);

            BufferedReader br = new BufferedReader(new FileReader(dicFile));
            index[i].read(br);
            br.close();

            logger.debug("dic" + i + ", " + dicFile + ", " + index[i].size());
        } // end for

        return index;
    } // end readFeatureIndexes

    // save the embedded test set
    private File saveExampleSet(ExampleSet outputSet) throws IOException {
        logger.info("save the embedded test set");

        //
        // File tmp = File.createTempFile("test", null);
        // tmp.deleteOnExit();
        File tmp = new File(path + "test");

        BufferedWriter out = new BufferedWriter(new FileWriter(tmp));
        outputSet.write(out);
        out.close();

        return tmp;
    } // end saveExampleSet

    //
    public void run() throws Exception {
        logger.info("mapper relations");

        // open zip archive
        // UnZipModel model = new UnZipModel(outputDir);

        // get the param model
        readParameters();
        logger.info(parameter + "");

        // read data set
        ExampleSet inputSet = readDataSet(inputFile);

        // find argument types
        ArgumentSet.getInstance().init(inputSet);

        // create the mapping factory
        // AbstractMapping contextMapping = mappingFactory();
        // set the parameters
        // contextMapping.setParameters(parameter);

        ContextMappingFactory contextMappingFactory = ContextMappingFactory
                .getContextMappingFactory();
        AbstractMapping contextMapping = contextMappingFactory
                .getInstance(parameter.getProperty("kernel-type"));
        // set the parameters
        contextMapping.setParameters(parameter);

        // get the number of subspaces
        int subspaceCount = contextMapping.subspaceCount();

        // read the index
        FeatureIndex[] index = readFeatureIndex(subspaceCount);

        // embed the input data into a feature space
        logger.info("embed the test set");
        ExampleSet outputSet = contextMapping.map(inputSet, index);
        logger.debug("embedded test set size: " + outputSet.size());

        // save the test set
        File svmTestFile = saveExampleSet(outputSet);

    } // end main

    //
    public static void main(String args[]) throws Exception {

        int parm = 2;

        if (args.length != parm) {
            System.out.println(getHelp());
            System.exit(-1);
        }

        File inputFile = new File(args[args.length - 2]);
        File outputDir = new File(args[args.length - 1]);

        CreateTestSet mapper = new CreateTestSet(inputFile, outputDir);
        mapper.run();
    } // end main

    /**
     * Returns a command-line help.
     * 
     * return a command-line help.
     */
    private static String getHelp() {
        StringBuffer sb = new StringBuffer();

        // SRE
        sb.append("\njSRE: Simple Relation Extraction V1.10\t 30.08.06\n");
        sb.append("developed by Claudio Giuliano ([email protected])\n\n");

        // License
        sb.append("Copyright 2005 FBK-irst (http://www.fbk.eu)\n");
        sb.append("\n");
        sb.append("Licensed under the Apache License, Version 2.0 (the \"License\");\n");
        sb.append("you may not use this file except in compliance with the License.\n");
        sb.append("You may obtain a copy of the License at\n");
        sb.append("\n");
        sb.append("    http://www.apache.org/licenses/LICENSE-2.0\n");
        sb.append("\n");
        sb.append("Unless required by applicable law or agreed to in writing, software\n");
        sb.append("distributed under the License is distributed on an \"AS IS\" BASIS,\n");
        sb.append("WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n");
        sb.append("See the License for the specific language governing permissions and\n");
        sb.append("limitations under the License.\n\n");

        // Usage
        sb.append("Usage: java org.itc.irst.tcc.sre.CreateTestSet [options] example-file output-dir\n\n");

        // Arguments
        sb.append("Arguments:\n");
        sb.append("\ttest-file\t-> file with test data (SRE format)\n");
        sb.append("\toutput-dir\t-> directory in which to store resulting files\n");
        // sb.append("\toutput-file\t-> file in which to store resulting output\n");

        sb.append("Options:\n");
        sb.append("\t-h\t\t-> this help\n");

        return sb.toString();
    } // end getHelp

} // end class CreateTestSet




© 2015 - 2025 Weber Informatics LLC | Privacy Policy