semRewrite.substitutor.IdiomSubstitutor Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of sigma-nlp Show documentation
Show all versions of sigma-nlp Show documentation
Natural language processing toolbox using Sigma knowledge engineering system.
/*
* Copyright 2014-2015 IPsoft
*
* Author: Andrei Holub [email protected]
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program ; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*/
package semRewrite.substitutor;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import edu.stanford.nlp.ling.CoreLabel;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
public class IdiomSubstitutor extends SimpleSubstitutorStorage {
public IdiomSubstitutor(List labels) {
initialize(labels);
}
/** ***************************************************************
* Looking for idioms in the labels sequence
* E.g. "raining cats and dogs"
*/
private void initialize(List labels) {
System.out.println("Info in IdiomSubstitutor.initialize(): " + labels);
int from = 0;
Map collectedIdioms = Maps.newHashMap();
ArrayList labelsText = Lists.newArrayList(labels.stream()
.map(label -> label.originalText()).toArray(String[]::new)
);
ArrayList synset = Lists.newArrayList();
while (from < labels.size()) {
List tail = labelsText.subList(from + 1, labelsText.size());
// int to = WordNet.wn.getMultiWords().findMultiWord(labels.get(from).lemma(), labels.get(from).originalText(), tail, synset);
// if (to > 0) {
// semRewrite.substitutor.CoreLabelSequence idiom = new CoreLabelSequence(labels.subList(from, from + to));
// collectedIdioms.put(idiom, idiom);
// }
from++;
}
addGroups(collectedIdioms);
System.out.println("Info in IdiomSubstitutor.groups(): " + groups);
}
}