All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.tika.parser.ctakes.CTAKESConfig Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.tika.parser.ctakes;

import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.Serializable;
import java.util.Properties;

import org.apache.tika.io.NullOutputStream;

/*
 * Configuration for {@see CTAKESContentHandler}.
 * 
 * This class allows to enable cTAKES and set its parameters.
 * 
 */
public class CTAKESConfig implements Serializable {
	/**
	 * Serial version UID
	 */
	private static final long serialVersionUID = -1599741171775528923L;
	
	// Path to XML descriptor for AnalysisEngine
	private String aeDescriptorPath = "/ctakes-core/desc/analysis_engine/SentencesAndTokensAggregate.xml";
	
	// UMLS username
	private String UMLSUser = "";
	
	// UMLS password
	private String UMLSPass = "";
	
	// Enables formatted output
	private boolean prettyPrint = true; 
	
	// Type of cTAKES (UIMA) serializer
	private CTAKESSerializer serializerType = CTAKESSerializer.XMI;
	
	// OutputStream object used for CAS serialization
	private OutputStream stream = NullOutputStream.NULL_OUTPUT_STREAM;
	
	// Enables CAS serialization
	private boolean serialize = false;
	
	// Enables text analysis using cTAKES
	private boolean text = true;
	
	// List of metadata to analyze using cTAKES
	private String[] metadata = null;
	
	// List of annotation properties to add to metadata in addition to text covered by an annotation
	private CTAKESAnnotationProperty[] annotationProps = null;
	
	// Character used to separate the annotation properties into metadata
	private char separatorChar = ':';

	/**
	 * Default constructor.
	 */
	public CTAKESConfig() {
		init(this.getClass().getResourceAsStream("CTAKESConfig.properties"));
	}
	
	/**
	 * Loads properties from InputStream and then tries to close InputStream.
	 * @param stream {@see InputStream} object used to read properties.
	 */
	public CTAKESConfig(InputStream stream) {
		init(stream);
	}
	
	private void init(InputStream stream) {
		if (stream == null) {
			return;
		}
		Properties props = new Properties();
		
		try {
			props.load(stream);
		} catch (IOException e) {
			// TODO warning
		} finally {
			if (stream != null) {
				try {
					stream.close();
				} catch (IOException ioe) {
					// TODO warning
				}
			}
		}
		
		setAeDescriptorPath(props.getProperty("aeDescriptorPath", getAeDescriptorPath()));
		setUMLSUser(props.getProperty("UMLSUser", getUMLSUser()));
		setUMLSPass(props.getProperty("UMLSPass", getUMLSPass()));
		setText(Boolean.valueOf(props.getProperty("text", Boolean.toString(isText()))));
		setMetadata(props.getProperty("metadata", getMetadataAsString()).split(","));
		setAnnotationProps(props.getProperty("annotationProps", getAnnotationPropsAsString()).split(","));
		setSeparatorChar(props.getProperty("separatorChar", Character.toString(getSeparatorChar())).charAt(0));
	}
	
	/**
	 * Returns the path to XML descriptor for AnalysisEngine.
	 * @return the path to XML descriptor for AnalysisEngine.
	 */
	public String getAeDescriptorPath() {
		return aeDescriptorPath;
	}
	
	/**
	 * Returns the UMLS username.
	 * @return the UMLS username.
	 */
	public String getUMLSUser() {
		return UMLSUser;
	}
	
	/**
	 * Returns the UMLS password.
	 * @return the UMLS password.
	 */
	public String getUMLSPass() {
		return UMLSPass;
	}
	
	/**
	 * Returns {@code true} if formatted output is enabled, {@code false} otherwise.
	 * @return {@code true} if formatted output is enabled, {@code false} otherwise.
	 */
	public boolean isPrettyPrint() {
		return prettyPrint;
	}
	
	/**
	 * Returns the type of cTAKES (UIMA) serializer used to write the CAS.
	 * @return the type of cTAKES serializer.
	 */
	public CTAKESSerializer getSerializerType() {
		return serializerType;
	}
	
	/**
	 * Returns an {@see OutputStream} object used write the CAS.
	 * @return {@see OutputStream} object used write the CAS.
	 */
	public OutputStream getOutputStream() {
		return stream;
	}
	
	/**
	 * Returns {@code true} if CAS serialization is enabled, {@code false} otherwise.
	 * @return {@code true} if CAS serialization output is enabled, {@code false} otherwise.
	 */
	public boolean isSerialize() {
		return serialize;
	}
	
	/**
	 * Returns {@code true} if content text analysis is enabled {@code false} otherwise.
	 * @return {@code true} if content text analysis is enabled {@code false} otherwise.
	 */
	public boolean isText() {
		return text;
	}
	
	/**
	 * Returns an array of metadata whose values will be analyzed using cTAKES.
	 * @return an array of metadata whose values will be analyzed using cTAKES.
	 */
	public String[] getMetadata() {
		return metadata;
	}
	
	/**
	 * Returns a string containing a comma-separated list of metadata whose values will be analyzed using cTAKES.
	 * @return a string containing a comma-separated list of metadata whose values will be analyzed using cTAKES.
	 */
	public String getMetadataAsString() {
		if (metadata == null) {
			return "";
		}
		StringBuilder sb = new StringBuilder();
		for (int i = 0; i < metadata.length; i++) {
			sb.append(metadata[i]);
			if (i < metadata.length-1) {
				sb.append(",");
			}
		}
		return sb.toString();
	}
	
	/**
	 * Returns an array of {@see CTAKESAnnotationProperty}'s that will be included into cTAKES metadata.
	 * @return an array of {@see CTAKESAnnotationProperty}'s that will be included into cTAKES metadata.
	 */
	public CTAKESAnnotationProperty[] getAnnotationProps() {
		return annotationProps;
	}
	
	/**
	 * Returns a string containing a comma-separated list of {@see CTAKESAnnotationProperty} names that will be included into cTAKES metadata.
	 * @return
	 */
	public String getAnnotationPropsAsString() {
		StringBuilder sb = new StringBuilder();
		sb.append("coveredText");
		if (annotationProps != null) {
			for (CTAKESAnnotationProperty property : annotationProps) {
				sb.append(separatorChar);
				sb.append(property.getName());
			}
		}
		return sb.toString();
	}
	
	/**
	 * Returns the separator character used for annotation properties.
	 * @return the separator character used for annotation properties.
	 */
	public char getSeparatorChar() {
		return separatorChar;
	}

	/**
	 * Sets the path to XML descriptor for AnalysisEngine.
	 * @param aeDescriptorPath the path to XML descriptor for AnalysisEngine.
	 */
	public void setAeDescriptorPath(String aeDescriptorPath) {
		this.aeDescriptorPath = aeDescriptorPath;
	}

	/**
	 * Sets the UMLS username.
	 * @param uMLSUser the UMLS username.
	 */
	public void setUMLSUser(String uMLSUser) {
		this.UMLSUser = uMLSUser;
	}

	/**
	 * Sets the UMLS password.
	 * @param uMLSPass the UMLS password.
	 */
	public void setUMLSPass(String uMLSPass) {
		this.UMLSPass = uMLSPass;
	}

	/**
	 * Enables the formatted output for serializer.
	 * @param prettyPrint {@true} to enable formatted output, {@code false} otherwise.
	 */
	public void setPrettyPrint(boolean prettyPrint) {
		this.prettyPrint = prettyPrint;
	}

	/**
	 * Sets the type of cTAKES (UIMA) serializer used to write CAS. 
	 * @param serializerType the type of cTAKES serializer.
	 */
	public void setSerializerType(CTAKESSerializer serializerType) {
		this.serializerType = serializerType;
	}
	
	/**
	 * Sets the {@see OutputStream} object used to write the CAS.
	 * @param stream the {@see OutputStream} object used to write the CAS.
	 */
	public void setOutputStream(OutputStream stream) {
		this.stream = stream;
	}
	
	/**
	 * Enables CAS serialization.
	 * @param serialize {@true} to enable CAS serialization, {@code false} otherwise.
	 */
	public void setSerialize(boolean serialize) {
		this.serialize = serialize;
	}
	
	/**
	 * Enables content text analysis using cTAKES.
	 * @param text {@true} to enable content text analysis, {@code false} otherwise.
	 */
	public void setText(boolean text) {
		this.text = text;
	}
	
	/**
	 * Sets the metadata whose values will be analyzed using cTAKES.
	 * @param metadata the metadata whose values will be analyzed using cTAKES.
	 */
	public void setMetadata(String[] metadata) {
		this.metadata = metadata;
	}
	
	/**
	 * Sets the {@see CTAKESAnnotationProperty}'s that will be included into cTAKES metadata.
	 * @param annotationProps the {@see CTAKESAnnotationProperty}'s that will be included into cTAKES metadata.
	 */
	public void setAnnotationProps(CTAKESAnnotationProperty[] annotationProps) {
		this.annotationProps = annotationProps;
	}
	
	/**
	 * ets the {@see CTAKESAnnotationProperty}'s that will be included into cTAKES metadata.
	 * @param annotationProps the {@see CTAKESAnnotationProperty}'s that will be included into cTAKES metadata.
	 */
	public void setAnnotationProps(String[] annotationProps) {
		CTAKESAnnotationProperty[] properties = new CTAKESAnnotationProperty[annotationProps.length];
		for (int i = 0; i < annotationProps.length; i++) {
			properties[i] = CTAKESAnnotationProperty.valueOf(annotationProps[i]);
		}
		setAnnotationProps(properties);
	}
	
	/**
	 * Sets the separator character used for annotation properties.
	 * @param separatorChar the separator character used for annotation properties.
	 */
	public void setSeparatorChar(char separatorChar) {
		this.separatorChar = separatorChar;
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy