All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.ctakes.ytex.kernel.BagOfWordsData Maven / Gradle / Ivy

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.ctakes.ytex.kernel;

import java.util.*;

/**
 * Data structure populated by AbstractBagOfWordsExporter that has all the
 * instance attributes needed for exporting to various formats.
 * 
 * @author vijay
 * 
 */
public class BagOfWordsData {
	/**
	 * should we perform tf-idf normalization?
	 */
	boolean tfIdf;
	/**
	 * Map of instance id to class label
	 */
	Map documentClasses = new HashMap();
	/**
	 * class labels
	 */
	SortedSet classes = new TreeSet();
	/**
	 * numeric attribute labels
	 */
	SortedSet numericWords = new TreeSet();
	/**
	 * map if instance id to map of attribute name - value pairs
	 */
	Map> instanceNumericWords = new HashMap>();
	/**
	 * instance nominal attribute values
	 */
	Map> instanceNominalWords = new HashMap>();
	/**
	 * nominal attribute names and values
	 */
	SortedMap> nominalWordValueMap = new TreeMap>();
	/**
	 * for tf-idf, length of each instance
	 */
	Map docLengthMap = new HashMap();
	/**
	 * for tf-idf, term-document count map
	 */
	Map idfMap = new HashMap();

	public Map getDocumentClasses() {
		return documentClasses;
	}

	public void setDocumentClasses(Map documentClasses) {
		this.documentClasses = documentClasses;
	}

	public SortedSet getClasses() {
		return classes;
	}

	public void setClasses(SortedSet classes) {
		this.classes = classes;
	}

	public SortedSet getNumericWords() {
		return numericWords;
	}

	public void setNumericWords(SortedSet numericWords) {
		this.numericWords = numericWords;
	}

	public Map> getInstanceNumericWords() {
		return instanceNumericWords;
	}

	public void setInstanceNumericWords(
			Map> instanceNumericWords) {
		this.instanceNumericWords = instanceNumericWords;
	}

	public Map> getInstanceNominalWords() {
		return instanceNominalWords;
	}

	public void setInstanceNominalWords(
			Map> instanceNominalWords) {
		this.instanceNominalWords = instanceNominalWords;
	}

	public SortedMap> getNominalWordValueMap() {
		return nominalWordValueMap;
	}

	public void setNominalWordValueMap(
			SortedMap> nominalWordValueMap) {
		this.nominalWordValueMap = nominalWordValueMap;
	}

	public Map getDocLengthMap() {
		return docLengthMap;
	}

	public void setDocLengthMap(Map docLengthMap) {
		this.docLengthMap = docLengthMap;
	}

	public Map getIdfMap() {
		return idfMap;
	}

	public void setIdfMap(Map idfMap) {
		this.idfMap = idfMap;
	}

	public boolean isTfIdf() {
		return tfIdf;
	}

	public void setTfIdf(boolean tfIdf) {
		this.tfIdf = tfIdf;
	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy