All Downloads are FREE. Search and download functionalities are using the official Maven repository.

nlp.pipeline.SentenceBuilder Maven / Gradle / Ivy

Go to download

Natural language processing toolbox using Sigma knowledge engineering system.

There is a newer version: 1.1
Show newest version
/*
 * Copyright 2014-2015 IPsoft
 *
 * Author: Andrei Holub [email protected]
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program ; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
 * MA  02111-1307 USA
 */

package nlp.pipeline;

import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.util.CoreMap;
import semRewrite.substitutor.ClauseSubstitutor;
import semRewrite.substitutor.CoreLabelSequence;

import java.util.List;
import java.util.Optional;
import java.util.function.Function;

public class SentenceBuilder {

    public static Function NO_MUTATION = label -> label.originalText();

    private final List sentences;

    /** **************************************************************
     */
    public SentenceBuilder(Annotation document) {

        this.sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
    }

    /** **************************************************************
     */
    public SentenceBuilder(CoreMap sentence) {

        this.sentences = ImmutableList.of(sentence);
    }

    /** ************************************************************
     */
    private boolean needSpaceBefore(CoreLabel label) {

        return needSpaceBefore(label.originalText());
    }

    /** **************************************************************
     */
    private boolean needSpaceBefore(String text) {

        boolean skipSpace = text.length() == 1 && (
                ",".equals(text)
                        || ".".equals(text)
                        || ")".equals(text)
                        || "!".equals(text)
                        || "?".equals(text)
        );
        skipSpace |= text.isEmpty();
        return !skipSpace;
    }

    /** **************************************************************
     */
    public List asStrings() {

        return asStrings(NO_MUTATION);
    }

    /** **************************************************************
     * Builds String representation of current sentence mutating labels with provided substitutor.
     */
    public List asStrings(ClauseSubstitutor substitutor) {

        return asStrings(label -> {
            if(substitutor.containsKey(label)) {
                // Replace only first element for complex keys
                Optional grouped = substitutor.getGroupedByFirstLabel(label);
                return grouped.isPresent() ? grouped.get().toText() : "";
            }
            return label.originalText();
        });
    }

    /** **************************************************************
     * Transform the labels to String allowing make additional manual mutation on each label.
     */
    public List asStrings(Function onLabel) {

        List sentences = Lists.newArrayList();
        for (CoreMap sentence : this.sentences) {
            StringBuilder builder = new StringBuilder();
            for (CoreLabel label : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
                String text = onLabel.apply(label);
                if (builder.length() != 0 && needSpaceBefore(text)) {
                    builder.append(" ");
                }
                builder.append(text);
                if ("PRP$".equals(label.tag())) {
                    builder.append(text.endsWith("s") ? "'" : "'s");
                }
            }
            sentences.add(builder.toString());
        }
        return sentences;

    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy