resources.pipelines.groovy.filterTokens4LDA.groovy Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of learningframework Show documentation
Show all versions of learningframework Show documentation
A GATE plugin that provides many different machine learning
algorithms for a wide range of NLP-related machine learning tasks like
text classification, tagging, or chunking.
// pre-filter tokens and put subset into set LDA
// Johann Petrak, 2018-09-27
import gate.Utils;
// remove what we added last time, if any
oldAnns = outputAS.get("TokenWord")
outputAS.removeAll(oldAnns)
for(Annotation ann : inputAS.get("Token")) {
fm = ann.getFeatures()
kind = fm.get("kind")
pick = true
if(!kind.equals("word")) {
pick = false
}
pos = (String)fm.get("category")
if(pos.startsWith("V")) {
pick = false
}
if(pick) {
str = (String)fm.get("string")
if(str.length() > 1) {
fm.put("lc_string",str.toLowerCase())
gate.Utils.addAnn(outputAS, ann, "TokenWord", fm)
}
}
}