All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.tika.parser.ner.opennlp.OpenNLPNERecogniser Maven / Gradle / Ivy

There is a newer version: 1.0.18
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.tika.parser.ner.opennlp;

import org.apache.tika.parser.ner.NERecogniser;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;


/**
 *
 * This implementation of {@link NERecogniser} chains an array of
 * {@link OpenNLPNameFinder}s for which NER models are
 * available in classpath.
 *
 * The following models are scanned during initialization via class loader.:
 *
 * 
 *     
 *         
 *     
 *     
 *         
 *     
 *     
 *         
 *     
 *     
 *         
 *     
 *     
 *         
 *     
 *     
 *         
 *     
 *     
 *         
 *     
 *     
 *         
 *     
 * 
Entity TypePath
{@value PERSON} {@value PERSON_FILE}
{@value LOCATION}{@value LOCATION_FILE}
{@value ORGANIZATION}{@value ORGANIZATION_FILE}
{@value TIME}{@value TIME_FILE}
{@value DATE}{@value DATE_FILE}
{@value PERCENT}{@value PERCENT_FILE}
{@value MONEY}{@value MONEY_FILE}
* * @see org.apache.tika.parser.ner.NamedEntityParser#DEFAULT_NER_IMPL */ public class OpenNLPNERecogniser implements NERecogniser { public static final String MODELS_DIR = OpenNLPNERecogniser.class .getPackage().getName().replace(".", "/"); public static final String PERSON_FILE = "ner-person.bin"; public static final String LOCATION_FILE = "ner-location.bin"; public static final String ORGANIZATION_FILE = "ner-organization.bin"; public static final String TIME_FILE = "ner-time.bin"; public static final String DATE_FILE = "ner-date.bin"; public static final String PERCENT_FILE = "ner-percentage.bin"; public static final String MONEY_FILE = "ner-money.bin"; //Default (English) Models for the common 7 classes of named types public static final String NER_PERSON_MODEL = MODELS_DIR + "/" + PERSON_FILE; public static final String NER_LOCATION_MODEL = MODELS_DIR + "/" + LOCATION_FILE; public static final String NER_ORGANIZATION_MODEL = MODELS_DIR + "/" + ORGANIZATION_FILE; public static final String NER_TIME_MODEL = MODELS_DIR + "/" + TIME_FILE; public static final String NER_DATE_MODEL = MODELS_DIR + "/" + DATE_FILE; public static final String NER_PERCENT_MODEL = MODELS_DIR + "/" + PERCENT_FILE; public static final String NER_MONEY_MODEL = MODELS_DIR + "/" + MONEY_FILE; public static final Map DEFAULT_MODELS = new HashMap(){{ put(PERSON, NER_PERSON_MODEL); put(LOCATION, NER_LOCATION_MODEL); put(ORGANIZATION, NER_ORGANIZATION_MODEL); put(TIME, NER_TIME_MODEL); put(DATE, NER_DATE_MODEL); put(PERCENT, NER_PERCENT_MODEL); put(MONEY, NER_MONEY_MODEL); }}; private Set entityTypes; private List nameFinders; private boolean available; /** * Creates a default chain of Name finders using default OpenNLP recognizers */ public OpenNLPNERecogniser(){ this(DEFAULT_MODELS); } /** * Creates a chain of Named Entity recognisers * @param models map of entityType -> model path * NOTE: the model path should be known to class loader. */ public OpenNLPNERecogniser(Map models){ this.nameFinders = new ArrayList<>(); this.entityTypes = new HashSet<>(); for (Map.Entry entry : models.entrySet()) { OpenNLPNameFinder finder = new OpenNLPNameFinder(entry.getKey(), entry.getValue()); if (finder.isAvailable()) { this.nameFinders.add(finder); this.entityTypes.add(entry.getKey()); } } this.entityTypes = Collections.unmodifiableSet(this.entityTypes); this.available = nameFinders.size() > 0; //at least one finder is present } @Override public boolean isAvailable() { return available; } @Override public Set getEntityTypes() { return entityTypes; } @Override public Map> recognise(String text) { String[] tokens = OpenNLPNameFinder.tokenize(text); Map> names = new HashMap<>(); for (OpenNLPNameFinder finder : nameFinders) { names.putAll(finder.findNames(tokens)); } return names; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy