nlp.pipeline.SentenceBuilder Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of sigma-nlp Show documentation
Show all versions of sigma-nlp Show documentation
Natural language processing toolbox using Sigma knowledge engineering system.
/*
* Copyright 2014-2015 IPsoft
*
* Author: Andrei Holub [email protected]
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program ; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*/
package nlp.pipeline;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.util.CoreMap;
import semRewrite.substitutor.ClauseSubstitutor;
import semRewrite.substitutor.CoreLabelSequence;
import java.util.List;
import java.util.Optional;
import java.util.function.Function;
public class SentenceBuilder {
public static Function NO_MUTATION = label -> label.originalText();
private final List sentences;
/** **************************************************************
*/
public SentenceBuilder(Annotation document) {
this.sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
}
/** **************************************************************
*/
public SentenceBuilder(CoreMap sentence) {
this.sentences = ImmutableList.of(sentence);
}
/** ************************************************************
*/
private boolean needSpaceBefore(CoreLabel label) {
return needSpaceBefore(label.originalText());
}
/** **************************************************************
*/
private boolean needSpaceBefore(String text) {
boolean skipSpace = text.length() == 1 && (
",".equals(text)
|| ".".equals(text)
|| ")".equals(text)
|| "!".equals(text)
|| "?".equals(text)
);
skipSpace |= text.isEmpty();
return !skipSpace;
}
/** **************************************************************
*/
public List asStrings() {
return asStrings(NO_MUTATION);
}
/** **************************************************************
* Builds String representation of current sentence mutating labels with provided substitutor.
*/
public List asStrings(ClauseSubstitutor substitutor) {
return asStrings(label -> {
if(substitutor.containsKey(label)) {
// Replace only first element for complex keys
Optional grouped = substitutor.getGroupedByFirstLabel(label);
return grouped.isPresent() ? grouped.get().toText() : "";
}
return label.originalText();
});
}
/** **************************************************************
* Transform the labels to String allowing make additional manual mutation on each label.
*/
public List asStrings(Function onLabel) {
List sentences = Lists.newArrayList();
for (CoreMap sentence : this.sentences) {
StringBuilder builder = new StringBuilder();
for (CoreLabel label : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
String text = onLabel.apply(label);
if (builder.length() != 0 && needSpaceBefore(text)) {
builder.append(" ");
}
builder.append(text);
if ("PRP$".equals(label.tag())) {
builder.append(text.endsWith("s") ? "'" : "'s");
}
}
sentences.add(builder.toString());
}
return sentences;
}
}