All Downloads are FREE. Search and download functionalities are using the official Maven repository.

gov.sandia.cognition.text.convert.DocumentFieldConcatenator Maven / Gradle / Ivy

There is a newer version: 4.0.1
Show newest version
/*
 * File:                DocumentFieldConcatenator.java
 * Authors:             Justin Basilico
 * Company:             Sandia National Laboratories
 * Project:             Cognitive Foundry Text Core
 * 
 * Copyright February 01, 2011, Sandia Corporation.
 * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive 
 * license for use of this work by or on behalf of the U.S. Government. Export 
 * of this program may require a license from the United States Government. 
 */

package gov.sandia.cognition.text.convert;

import gov.sandia.cognition.text.DefaultTextual;
import gov.sandia.cognition.text.Textual;
import gov.sandia.cognition.text.document.Document;
import gov.sandia.cognition.text.document.Field;
import java.util.LinkedList;
import java.util.List;

/**
 * A document-text converter that concatenates multiple text fields from a
 * document together for further processing.
 * 
 * @author  Justin Basilico
 * @since   3.1
 */
public class DocumentFieldConcatenator
    extends AbstractSingleTextualConverter
{
    /** The default field separator is a newline. */
    public static final String DEFAULT_FIELD_SEPARATOR = "\n";

    /** The list of fields to concatenate together from a document. */
    protected List fieldNames;

    /** The field separator. */
    protected String fieldSeparator;

    /**
     * Creates a new {@code DocumentFieldConcatenator} with an empty list of
     * fields and a newline separator.
     */
    public DocumentFieldConcatenator()
    {
        this(new LinkedList(), DEFAULT_FIELD_SEPARATOR);
    }

    /**
     * Creates a new {@code DocumentFieldConcatenator} with the given field
     * names and field separator.
     *
     * @param   fieldNames
     *      The names of the fields to include.
     * @param   fieldSeparator
     *      The field separator.
     */
    public DocumentFieldConcatenator(
        final List fieldNames,
        final String fieldSeparator)
    {
        super();

        this.setFieldNames(fieldNames);
        this.setFieldSeparator(fieldSeparator);
    }

    @Override
    public Textual evaluate(
        final Document document)
    {
        if (document == null)
        {
            return null;
        }
        
        // Concatenate together all the fields.
        final StringBuilder result = new StringBuilder();
        int i = 0;
        for (String fieldName : this.getFieldNames())
        {
            if (i > 0)
            {
                // Add a separator between fields.
                result.append(this.getFieldSeparator());
            }

            // Get the field.
            final Field field = document.getField(fieldName);
            if (field != null)
            {
                // Not null, so append the text.
                result.append(field.getText());
            }
            
            i++;
        }

        // Convert the string builder to a textual.
        return new DefaultTextual(result.toString());
    }

    /**
     * Gets the list of field names whose text are to be concatenated together.
     *
     * @return
     *      The list of field names.
     */
    public List getFieldNames()
    {
        return this.fieldNames;
    }

    /**
     * Sets the list of field names whose text are to be concatenated together.
     *
     * @param   fieldNames
     *      The list of field names.
     */
    public void setFieldNames(
        final List fieldNames)
    {
        this.fieldNames = fieldNames;
    }

    /**
     * Gets the string used as a separator between field text values.
     *
     * @return
     *      The separator between fields used in the result.
     */
    public String getFieldSeparator()
    {
        return fieldSeparator;
    }

    /**
     * Sets the string used as a separator between field text values.
     *
     * @param   fieldSeparator
     *      The separator between fields used in the result.
     */
    public void setFieldSeparator(
        final String fieldSeparator)
    {
        this.fieldSeparator = fieldSeparator;
    }
    
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy