
org.apache.ctakes.ytex.kernel.SparseDataExporterImpl Maven / Gradle / Ivy
The newest version!
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.ctakes.ytex.kernel;
import org.apache.commons.cli.*;
import org.slf4j.LoggerFactory;
import org.slf4j.Logger;
import org.springframework.jdbc.core.JdbcTemplate;
import org.springframework.jdbc.core.RowCallbackHandler;
import org.springframework.jdbc.core.namedparam.NamedParameterJdbcTemplate;
import org.springframework.transaction.TransactionStatus;
import org.springframework.transaction.support.TransactionCallback;
import org.springframework.transaction.support.TransactionTemplate;
import javax.sql.DataSource;
import java.io.IOException;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.*;
public class SparseDataExporterImpl implements SparseDataExporter {
private static final Logger LOGGER = LoggerFactory.getLogger( "SparseDataExporterImpl" );
@SuppressWarnings("static-access")
public static void main(String args[]) throws IOException {
Options options = new Options();
options.addOption(OptionBuilder
.withArgName("prop")
.hasArg()
.isRequired()
.withDescription(
"property file with queries and other parameters.")
.create("prop"));
options.addOption(OptionBuilder.withArgName("type").hasArg()
.isRequired()
.withDescription("export format; valid values: weka, libsvm")
.create("type"));
if (args.length == 0)
printHelp(options);
else {
try {
CommandLineParser parser = new GnuParser();
CommandLine line = parser.parse(options, args);
String propFile = line.getOptionValue("prop");
String format = line.getOptionValue("type");
SparseDataExporter exporter = KernelContextHolder
.getApplicationContext().getBean(
SparseDataExporter.class);
exporter.exportData(propFile, format);
} catch (ParseException pe) {
printHelp(options);
}
}
}
private static void printHelp(Options options) {
HelpFormatter formatter = new HelpFormatter();
formatter.printHelp("java " + SparseDataExporterImpl.class.getName()
+ " export sparse data", options);
}
protected JdbcTemplate jdbcTemplate;
protected KernelUtil kernelUtil;
protected NamedParameterJdbcTemplate namedJdbcTemplate;
protected Map nameToFormatterMap = new HashMap();
protected TransactionTemplate txTemplateNew;
public SparseDataExporterImpl() {
super();
}
protected void addNominalWordToInstance(SparseData sparseData,
long instanceId, String word, String wordValue) {
// add the instance id to the set of instance ids if necessary
if (!sparseData.getInstanceIds().contains(instanceId))
sparseData.getInstanceIds().add(instanceId);
SortedMap instanceWords = sparseData
.getInstanceNominalWords().get(instanceId);
SortedSet wordValueSet = sparseData.getNominalWordValueMap()
.get(word);
if (instanceWords == null) {
instanceWords = new TreeMap();
sparseData.getInstanceNominalWords().put(instanceId, instanceWords);
}
if (wordValueSet == null) {
wordValueSet = new TreeSet();
sparseData.getNominalWordValueMap().put(word, wordValueSet);
}
// add the word-value for the instance
instanceWords.put(word, wordValue);
// add the value to the set of valid values
wordValueSet.add(wordValue);
}
protected void addNumericWordToInstance(SparseData sparseData,
long instanceId, String word, double wordValue) {
// add the instance id to the set of instance ids if necessary
if (!sparseData.getInstanceIds().contains(instanceId))
sparseData.getInstanceIds().add(instanceId);
// add the numeric word to the map of words for this document
SortedMap words = sparseData.getInstanceNumericWords()
.get(instanceId);
if (words == null) {
words = new TreeMap();
sparseData.getInstanceNumericWords().put(instanceId, words);
}
words.put(word, wordValue);
sparseData.getNumericWords().add(word);
}
/*
* (non-Javadoc)
*
* @see org.apache.ctakes.ytex.kernel.SparseDataExporter#exportData(org.apache.ctakes.ytex.kernel.SparseData,
* org.apache.ctakes.ytex.kernel.SparseDataFormatter, java.util.Properties)
*/
public void exportData(InstanceData instanceLabel,
SparseDataFormatter formatter, Properties properties,
BagOfWordsDecorator bDecorator) throws IOException {
String scope = properties.getProperty("scope", null);
SparseData sparseData = null;
if (scope == null) {
sparseData = this.loadData(instanceLabel,
properties.getProperty("numericWordQuery"),
properties.getProperty("nominalWordQuery"),
properties.getProperty("prepareScript"),
properties.getProperty("prepareScriptDelimiter", ";"),
bDecorator, null, null, null);
}
formatter.initializeExport(instanceLabel, properties, sparseData);
for (String label : instanceLabel.getLabelToInstanceMap().keySet()) {
if ("label".equals(scope)) {
sparseData = this.loadData(instanceLabel,
properties.getProperty("numericWordQuery"),
properties.getProperty("nominalWordQuery"),
properties.getProperty("prepareScript"),
properties.getProperty("prepareScriptDelimiter", ";"),
bDecorator, label, null, null);
}
formatter
.initializeLabel(label, instanceLabel
.getLabelToInstanceMap().get(label), properties,
sparseData);
for (int run : instanceLabel.getLabelToInstanceMap().get(label)
.keySet()) {
for (int fold : instanceLabel.getLabelToInstanceMap()
.get(label).get(run).keySet()) {
if ( LOGGER.isInfoEnabled()
&& (label.length() > 0 || run > 0 || fold > 0))
LOGGER.info("exporting, label " + label + " run " + run
+ " fold " + fold);
if ("fold".equals(scope)) {
sparseData = this.loadData(instanceLabel, properties
.getProperty("numericWordQuery"), properties
.getProperty("nominalWordQuery"), properties
.getProperty("prepareScript"), properties
.getProperty("prepareScriptDelimiter", ";"),
bDecorator, label, fold, run);
}
formatter.initializeFold(sparseData, label, run, fold,
instanceLabel.getLabelToInstanceMap().get(label)
.get(run).get(fold));
for (boolean train : instanceLabel.getLabelToInstanceMap()
.get(label).get(run).get(fold).keySet()) {
formatter.exportFold(sparseData, instanceLabel
.getLabelToInstanceMap().get(label).get(run)
.get(fold).get(train), train, label,
0 == run ? null : run, 0 == fold ? null : fold);
}
formatter.clearFold();
}
}
formatter.clearLabel();
}
}
/*
* (non-Javadoc)
*
* @see org.apache.ctakes.ytex.kernel.SparseDataExporter#exportData(java.util.Properties,
* org.apache.ctakes.ytex.kernel.SparseDataFormatter, org.apache.ctakes.ytex.kernel.BagOfWordsDecorator)
*/
@Override
public void exportData(Properties props, SparseDataFormatter formatter,
BagOfWordsDecorator bDecorator) throws IOException {
InstanceData instanceLabel = this.getKernelUtil().loadInstances(
props.getProperty("instanceClassQuery"));
if (props.containsKey("folds")) {
this.getKernelUtil().generateFolds(instanceLabel, props);
}
// load label - instance id maps
// sparseData.setLabelToInstanceMap(this.getKernelUtil().loadInstances(
// props.getProperty("instanceClassQuery"),
// sparseData.getLabelToClassMap()));
this.exportData(instanceLabel, formatter, props, bDecorator);
// this.loadData(sparseData,
// props.getProperty("numericWordQuery"),
// props.getProperty("nominalWordQuery"), bDecorator);
// this.exportData(sparseData, formatter, props);
}
/*
* (non-Javadoc)
*
* @see org.apache.ctakes.ytex.kernel.SparseDataExporter#exportData(java.lang.String,
* java.lang.String)
*/
@Override
public void exportData(String propertiesFile, String format)
throws IOException, InvalidPropertiesFormatException {
Properties props = new Properties();
this.getKernelUtil().loadProperties(propertiesFile, props);
this.exportData(props, nameToFormatterMap.get(format.toLowerCase())
.getFormatter(), null);
}
public DataSource getDataSource(DataSource ds) {
return this.jdbcTemplate.getDataSource();
}
public KernelUtil getKernelUtil() {
return kernelUtil;
}
public Map getNameToFormatterMap() {
return nameToFormatterMap;
}
/**
* run the prepare script if defined.
*
* @param prepareScript
* sequence of sql statements to be executed with named params.
* @param prepareScriptDelimiter
* delimiter separating the sql statements.
* @param params
* for named parameters in sql statements.
*/
protected void prepare(final String prepareScript,
final String prepareScriptDelimiter,
final Map params) {
if (prepareScript != null && prepareScript.length() > 0) {
String[] statements = prepareScript.split(prepareScriptDelimiter);
// throw out empty lines
for (String sql : statements) {
if (sql != null && sql.trim().length() > 0) {
this.namedJdbcTemplate.update(sql, params);
}
}
}
}
/**
*
* @param sql
* result set has 3 columns. 1st column - integer - instance id.
* 2nd column - word. 3rd column - word value.
* @param instanceWordMap
* map of instance id to word-word value.
* @param wordValueMap
* map of word to valid values for the word.
* @return populate maps with results of query.
*/
protected void getNominalInstanceWords(final String sql,
final String prepareScript, final String prepareScriptDelimiter,
final SparseData sparseData, final Map params) {
txTemplateNew.execute(new TransactionCallback
© 2015 - 2025 Weber Informatics LLC | Privacy Policy