org.apache.ctakes.ytex.kernel.BagOfWordsData Maven / Gradle / Ivy
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.ctakes.ytex.kernel;
import java.util.*;
/**
* Data structure populated by AbstractBagOfWordsExporter that has all the
* instance attributes needed for exporting to various formats.
*
* @author vijay
*
*/
public class BagOfWordsData {
/**
* should we perform tf-idf normalization?
*/
boolean tfIdf;
/**
* Map of instance id to class label
*/
Map documentClasses = new HashMap();
/**
* class labels
*/
SortedSet classes = new TreeSet();
/**
* numeric attribute labels
*/
SortedSet numericWords = new TreeSet();
/**
* map if instance id to map of attribute name - value pairs
*/
Map> instanceNumericWords = new HashMap>();
/**
* instance nominal attribute values
*/
Map> instanceNominalWords = new HashMap>();
/**
* nominal attribute names and values
*/
SortedMap> nominalWordValueMap = new TreeMap>();
/**
* for tf-idf, length of each instance
*/
Map docLengthMap = new HashMap();
/**
* for tf-idf, term-document count map
*/
Map idfMap = new HashMap();
public Map getDocumentClasses() {
return documentClasses;
}
public void setDocumentClasses(Map documentClasses) {
this.documentClasses = documentClasses;
}
public SortedSet getClasses() {
return classes;
}
public void setClasses(SortedSet classes) {
this.classes = classes;
}
public SortedSet getNumericWords() {
return numericWords;
}
public void setNumericWords(SortedSet numericWords) {
this.numericWords = numericWords;
}
public Map> getInstanceNumericWords() {
return instanceNumericWords;
}
public void setInstanceNumericWords(
Map> instanceNumericWords) {
this.instanceNumericWords = instanceNumericWords;
}
public Map> getInstanceNominalWords() {
return instanceNominalWords;
}
public void setInstanceNominalWords(
Map> instanceNominalWords) {
this.instanceNominalWords = instanceNominalWords;
}
public SortedMap> getNominalWordValueMap() {
return nominalWordValueMap;
}
public void setNominalWordValueMap(
SortedMap> nominalWordValueMap) {
this.nominalWordValueMap = nominalWordValueMap;
}
public Map getDocLengthMap() {
return docLengthMap;
}
public void setDocLengthMap(Map docLengthMap) {
this.docLengthMap = docLengthMap;
}
public Map getIdfMap() {
return idfMap;
}
public void setIdfMap(Map idfMap) {
this.idfMap = idfMap;
}
public boolean isTfIdf() {
return tfIdf;
}
public void setTfIdf(boolean tfIdf) {
this.tfIdf = tfIdf;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy