All Downloads are FREE. Search and download functionalities are using the official Maven repository.

opennlp.tools.entitylinker.EntityLinker Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package opennlp.tools.entitylinker;

import java.io.IOException;
import java.util.List;

import opennlp.tools.util.Span;

/**
 * EntityLinkers establish connections to external data to enrich extracted
 * entities. For instance, for Location entities a linker can be developed to
 * lookup each found location in a geonames gazateer. Another example may be to
 * find peoples' names and look them up in a database or active directory.
 * Intended to return n best matches for any give search, but can also be
 * implemented as deterministic
 *
 * @param  A type that extends Span. LinkedSpan and BaseLink are provided to
 *            provide this signature: EntityLinker<LinkedSpan<BaseLink>> as a
 *            default
 */
public interface EntityLinker {

  /**
   * allows for passing properties through the EntityLinkerFactory into all
   * impls dynamically. EntityLinker impls should initialize reusable objects
   * used by the impl in this method. If this is done, any errors will be
   * captured and thrown by the EntityLinkerFactory.
   *
   * @param initializationData the EntityLinkerProperties object that contains
   *                           properties needed by the impl, as well as any
   *                           other objects required for the impl
   * @throws java.io.IOException
   */
  void init(EntityLinkerProperties initializationData) throws IOException;

  /**
   * Links an entire document of named entities to an external source
   *
   * @param doctext          the full text of the document
   * @param tokensBySentence a list of tokens spans that correspond to each sentence.
   *                         The outer array refers to the sentence, the inner
   *                         array is the tokens for the outer sentence. Similar
   *                         in nature to Map of SentenceIndex keys to Listof
   *                         tokens as values
   * @param namesBySentence  a list of name spans that correspond to each
   *                         sentence. The outer array refers to the sentence,
   *                         the inner array refers to the tokens that for the
   *                         same sentence.Similar in nature to
   *                         Map<SentenceIndex,List<Name Spans For This
   *                         Sentence's Tokens>> @ return
   * @return
   */
  List find(String doctext, Span[] sentences, Span[][] tokensBySentence, Span[][] namesBySentence);


  /**
   * Links the names that correspond to the tokens[] spans. The sentenceindex
   * can be used to get the sentence text and tokens from the text based on the
   * sentence and token spans. The text is available for additional context.
   *
   * @param doctext          the full text of the document
   * @param tokensBySentence a list of tokens spans that correspond to each sentence.
   *                         The outer array refers to the sentence, the inner
   *                         array is the tokens for the outer sentence. Similar
   *                         in nature to Map of SentenceIndex keys to Listof
   *                         tokens as values
   * @param namesBySentence  a list of name spans that correspond to each
   *                         sentence. The outer array refers to the sentence,
   *                         the inner array refers to the tokens that for the
   *                         same sentence.Similar in nature to
   *                         Map<SentenceIndex,List<Name Spans For This
   *                         Sentence's Tokens>> @ return
   * @param sentenceIndex the index to the sentence span that the tokens[]
   *                      Span[] corresponds to
   * @return
   */
  List find(String doctext, Span[] sentences, Span[][] tokensBySentence,
      Span[][] namesBySentence, int sentenceIndex);
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy