All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.t3as.ner.classifier.feature.ExistingCleanPhraseFeature Maven / Gradle / Ivy

Go to download

The NICTA t3as Named-Entity Recognition is a Java based Named-Entity Recognition library which extracts named entities from text such as Organisation, Location, Date and Person names. This is the main library that does the actual NER work.

The newest version!
/*
 * #%L
 * NICTA t3as Named-Entity Recognition library
 * %%
 * Copyright (C) 2010 - 2014 NICTA
 * %%
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public
 * License along with this program.  If not, see
 * .
 * #L%
 */
package org.t3as.ner.classifier.feature;

import com.google.common.collect.ImmutableCollection;
import com.google.common.collect.ImmutableSet;
import org.t3as.ner.Phrase;
import org.t3as.ner.util.IO;
import org.t3as.ner.util.Strings;

import javax.annotation.concurrent.Immutable;
import java.io.IOException;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import static org.t3as.ner.util.Strings.clean;
import static org.t3as.ner.util.Strings.toEngLowerCase;

@Immutable
public class ExistingCleanPhraseFeature extends Feature {

    private ImmutableCollection phrases;

    public ExistingCleanPhraseFeature(final List resources, final int weight) throws IOException {
        super(resources, weight);
    }

    @Override
    public double score(final Phrase p) {
        final int w = getWeight();
        if (w == 0) return 0;

        final String phrase = Strings.simplify(p.phraseString());
        return phrases.contains(toEngLowerCase(clean(phrase))) ? w : 0;
    }

    @Override
    public int getSize() { return phrases.size(); }

    @Override
    public void loadResources() throws IOException {
        final Set s = new HashSet<>();
        for (final String resource : getResources()) {
            s.addAll(IO.cleanLowercaseLines(getClass(), resource));
        }
        phrases = ImmutableSet.copyOf(s);
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy