All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.tika.parser.ctakes.CTAKESConfig Maven / Gradle / Ivy

There is a newer version: 2024.11.18751.20241128T090041Z-241100
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.tika.parser.ctakes;

import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.Serializable;
import java.util.Properties;

import static org.apache.commons.io.output.NullOutputStream.NULL_OUTPUT_STREAM;

/**
 * Configuration for {@link CTAKESContentHandler}.
 * 
 * This class allows to enable cTAKES and set its parameters.
 */
public class CTAKESConfig implements Serializable {
    /**
     * Serial version UID
     */
    private static final long serialVersionUID = -1599741171775528923L;

    // Path to XML descriptor for AnalysisEngine
    private String aeDescriptorPath = "/ctakes-core/desc/analysis_engine/SentencesAndTokensAggregate.xml";

    // UMLS username
    private String UMLSUser = "";

    // UMLS password
    private String UMLSPass = "";

    // Enables formatted output
    private boolean prettyPrint = true; 

    // Type of cTAKES (UIMA) serializer
    private CTAKESSerializer serializerType = CTAKESSerializer.XMI;

    // OutputStream object used for CAS serialization
    private OutputStream stream = NULL_OUTPUT_STREAM;

    // Enables CAS serialization
    private boolean serialize = false;

    // Enables text analysis using cTAKES
    private boolean text = true;

    // List of metadata to analyze using cTAKES
    private String[] metadata = null;

    // List of annotation properties to add to metadata in addition to text covered by an annotation
    private CTAKESAnnotationProperty[] annotationProps = null;

    // Character used to separate the annotation properties into metadata
    private char separatorChar = ':';

    /**
     * Default constructor.
     */
    public CTAKESConfig() {
        init(this.getClass().getResourceAsStream("CTAKESConfig.properties"));
    }

    /**
     * Loads properties from InputStream and then tries to close InputStream.
     * @param stream {@link InputStream} object used to read properties.
     */
    public CTAKESConfig(InputStream stream) {
        init(stream);
    }

    private void init(InputStream stream) {
        if (stream == null) {
            return;
        }
        Properties props = new Properties();

        try {
            props.load(stream);
        } catch (IOException e) {
            // TODO warning
        } finally {
            if (stream != null) {
                try {
                    stream.close();
                } catch (IOException ioe) {
                    // TODO warning
                }
            }
        }

        setAeDescriptorPath(props.getProperty("aeDescriptorPath", getAeDescriptorPath()));
        setUMLSUser(props.getProperty("UMLSUser", getUMLSUser()));
        setUMLSPass(props.getProperty("UMLSPass", getUMLSPass()));
        setText(Boolean.valueOf(props.getProperty("text", Boolean.toString(isText()))));
        setMetadata(props.getProperty("metadata", getMetadataAsString()).split(","));
        setAnnotationProps(props.getProperty("annotationProps", getAnnotationPropsAsString()).split(","));
        setSeparatorChar(props.getProperty("separatorChar", Character.toString(getSeparatorChar())).charAt(0));
    }

    /**
     * Returns the path to XML descriptor for AnalysisEngine.
     * @return the path to XML descriptor for AnalysisEngine.
     */
    public String getAeDescriptorPath() {
        return aeDescriptorPath;
    }

    /**
     * Returns the UMLS username.
     * @return the UMLS username.
     */
    public String getUMLSUser() {
        return UMLSUser;
    }

    /**
     * Returns the UMLS password.
     * @return the UMLS password.
     */
    public String getUMLSPass() {
        return UMLSPass;
    }

    /**
     * Returns {@code true} if formatted output is enabled, {@code false} otherwise.
     * @return {@code true} if formatted output is enabled, {@code false} otherwise.
     */
    public boolean isPrettyPrint() {
        return prettyPrint;
    }

    /**
     * Returns the type of cTAKES (UIMA) serializer used to write the CAS.
     * @return the type of cTAKES serializer.
     */
    public CTAKESSerializer getSerializerType() {
        return serializerType;
    }

    /**
     * Returns an {@link OutputStream} object used write the CAS.
     * @return {@link OutputStream} object used write the CAS.
     */
    public OutputStream getOutputStream() {
        return stream;
    }

    /**
     * Returns {@code true} if CAS serialization is enabled, {@code false} otherwise.
     * @return {@code true} if CAS serialization output is enabled, {@code false} otherwise.
     */
    public boolean isSerialize() {
        return serialize;
    }

    /**
     * Returns {@code true} if content text analysis is enabled {@code false} otherwise.
     * @return {@code true} if content text analysis is enabled {@code false} otherwise.
     */
    public boolean isText() {
        return text;
    }

    /**
     * Returns an array of metadata whose values will be analyzed using cTAKES.
     * @return an array of metadata whose values will be analyzed using cTAKES.
     */
    public String[] getMetadata() {
        return metadata;
    }

    /**
     * Returns a string containing a comma-separated list of metadata whose values will be analyzed using cTAKES.
     * @return a string containing a comma-separated list of metadata whose values will be analyzed using cTAKES.
     */
    public String getMetadataAsString() {
        if (metadata == null) {
            return "";
        }
        StringBuilder sb = new StringBuilder();
        for (int i = 0; i < metadata.length; i++) {
            sb.append(metadata[i]);
            if (i < metadata.length-1) {
                sb.append(",");
            }
        }
        return sb.toString();
    }

    /**
     * Returns an array of {@link CTAKESAnnotationProperty}'s that will be included into cTAKES metadata.
     * @return an array of {@link CTAKESAnnotationProperty}'s that will be included into cTAKES metadata.
     */
    public CTAKESAnnotationProperty[] getAnnotationProps() {
        return annotationProps;
    }

    /**
     * Returns a string containing a comma-separated list of {@link CTAKESAnnotationProperty} names that will be included into cTAKES metadata.
     * @return
     */
    public String getAnnotationPropsAsString() {
        StringBuilder sb = new StringBuilder();
        sb.append("coveredText");
        if (annotationProps != null) {
            for (CTAKESAnnotationProperty property : annotationProps) {
                sb.append(separatorChar);
                sb.append(property.getName());
            }
        }
        return sb.toString();
    }

    /**
     * Returns the separator character used for annotation properties.
     * @return the separator character used for annotation properties.
     */
    public char getSeparatorChar() {
        return separatorChar;
    }

    /**
     * Sets the path to XML descriptor for AnalysisEngine.
     * @param aeDescriptorPath the path to XML descriptor for AnalysisEngine.
     */
    public void setAeDescriptorPath(String aeDescriptorPath) {
        this.aeDescriptorPath = aeDescriptorPath;
    }

    /**
     * Sets the UMLS username.
     * @param uMLSUser the UMLS username.
     */
    public void setUMLSUser(String uMLSUser) {
        this.UMLSUser = uMLSUser;
    }

    /**
     * Sets the UMLS password.
     * @param uMLSPass the UMLS password.
     */
    public void setUMLSPass(String uMLSPass) {
        this.UMLSPass = uMLSPass;
    }

    /**
     * Enables the formatted output for serializer.
     * @param prettyPrint {@code true} to enable formatted output, {@code false} otherwise.
     */
    public void setPrettyPrint(boolean prettyPrint) {
        this.prettyPrint = prettyPrint;
    }

    /**
     * Sets the type of cTAKES (UIMA) serializer used to write CAS. 
     * @param serializerType the type of cTAKES serializer.
     */
    public void setSerializerType(CTAKESSerializer serializerType) {
        this.serializerType = serializerType;
    }

    /**
     * Sets the {@link OutputStream} object used to write the CAS.
     * @param stream the {@link OutputStream} object used to write the CAS.
     */
    public void setOutputStream(OutputStream stream) {
        this.stream = stream;
    }

    /**
     * Enables CAS serialization.
     * @param serialize {@code true} to enable CAS serialization, {@code false} otherwise.
     */
    public void setSerialize(boolean serialize) {
        this.serialize = serialize;
    }

    /**
     * Enables content text analysis using cTAKES.
     * @param text {@code true} to enable content text analysis, {@code false} otherwise.
     */
    public void setText(boolean text) {
        this.text = text;
    }

    /**
     * Sets the metadata whose values will be analyzed using cTAKES.
     * @param metadata the metadata whose values will be analyzed using cTAKES.
     */
    public void setMetadata(String[] metadata) {
        this.metadata = metadata;
    }

    /**
     * Sets the {@link CTAKESAnnotationProperty}'s that will be included into cTAKES metadata.
     * @param annotationProps the {@link CTAKESAnnotationProperty}'s that will be included into cTAKES metadata.
     */
    public void setAnnotationProps(CTAKESAnnotationProperty[] annotationProps) {
        this.annotationProps = annotationProps;
    }

    /**
     * ets the {@link CTAKESAnnotationProperty}'s that will be included into cTAKES metadata.
     * @param annotationProps the {@link CTAKESAnnotationProperty}'s that will be included into cTAKES metadata.
     */
    public void setAnnotationProps(String[] annotationProps) {
        CTAKESAnnotationProperty[] properties = new CTAKESAnnotationProperty[annotationProps.length];
        for (int i = 0; i < annotationProps.length; i++) {
            properties[i] = CTAKESAnnotationProperty.valueOf(annotationProps[i]);
        }
        setAnnotationProps(properties);
    }

    /**
     * Sets the separator character used for annotation properties.
     * @param separatorChar the separator character used for annotation properties.
     */
    public void setSeparatorChar(char separatorChar) {
        this.separatorChar = separatorChar;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy