semRewrite.substitutor.NounSubstitutor Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of sigma-nlp Show documentation
Show all versions of sigma-nlp Show documentation
Natural language processing toolbox using Sigma knowledge engineering system.
/*
Copyright 2014-2015 IPsoft
Author: Andrei Holub [email protected]
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program ; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston,
MA 02111-1307 USA
*/
package semRewrite.substitutor;
import semRewrite.substitutor.CoreLabelSequence;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import edu.stanford.nlp.ling.CoreLabel;
import java.util.List;
import java.util.Map;
import java.util.Objects;
public class NounSubstitutor extends SimpleSubstitutorStorage {
/** **************************************************************
*/
public NounSubstitutor(List labels) {
initialize(labels);
}
/** **************************************************************
* Collects information about continuous noun sequences like "Garry Bloom", "Tim Buk Tu"
*/
private void initialize(List labels) {
Map groupsFull = parseGroupsAndCollectRoots(labels);
addGroups(groupsFull);
}
/** **************************************************************
*/
private Map parseGroupsAndCollectRoots(List labels) {
System.out.println("Info in NounSubstitutor.parseGroupsAndCollectRoots(): " + labels);
Map sequences = Maps.newHashMap();
CoreLabel firstLabel = null;
List sequence = Lists.newArrayList();
for (CoreLabel label : labels) {
if (firstLabel != null
&& ("NNP".equals(label.tag()) && Objects.equals(label.tag(), firstLabel.tag()))) {
sequence.add(label);
}
else {
if (sequence.size() > 1) {
semRewrite.substitutor.CoreLabelSequence s = new semRewrite.substitutor.CoreLabelSequence(sequence);
sequences.put(s, s);
}
firstLabel = label;
sequence = Lists.newArrayList(firstLabel);
}
}
if (sequence.size() > 1) {
semRewrite.substitutor.CoreLabelSequence s = new CoreLabelSequence(sequence);
sequences.put(s, s);
}
return sequences;
}
}