
org.t3as.ner.classifier.feature.ExistingCleanPhraseFeature Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of nicta-ner Show documentation
Show all versions of nicta-ner Show documentation
The NICTA t3as Named-Entity Recognition is a Java based Named-Entity Recognition library which extracts named
entities from text such as Organisation, Location, Date and Person names. This is the main library that does the
actual NER work.
The newest version!
/*
* #%L
* NICTA t3as Named-Entity Recognition library
* %%
* Copyright (C) 2010 - 2014 NICTA
* %%
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program. If not, see
* .
* #L%
*/
package org.t3as.ner.classifier.feature;
import com.google.common.collect.ImmutableCollection;
import com.google.common.collect.ImmutableSet;
import org.t3as.ner.Phrase;
import org.t3as.ner.util.IO;
import org.t3as.ner.util.Strings;
import javax.annotation.concurrent.Immutable;
import java.io.IOException;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import static org.t3as.ner.util.Strings.clean;
import static org.t3as.ner.util.Strings.toEngLowerCase;
@Immutable
public class ExistingCleanPhraseFeature extends Feature {
private ImmutableCollection phrases;
public ExistingCleanPhraseFeature(final List resources, final int weight) throws IOException {
super(resources, weight);
}
@Override
public double score(final Phrase p) {
final int w = getWeight();
if (w == 0) return 0;
final String phrase = Strings.simplify(p.phraseString());
return phrases.contains(toEngLowerCase(clean(phrase))) ? w : 0;
}
@Override
public int getSize() { return phrases.size(); }
@Override
public void loadResources() throws IOException {
final Set s = new HashSet<>();
for (final String resource : getResources()) {
s.addAll(IO.cleanLowercaseLines(getClass(), resource));
}
phrases = ImmutableSet.copyOf(s);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy