All Downloads are FREE. Search and download functionalities are using the official Maven repository.

opennlp.tools.cmdline.namefind.NameSampleCountersStream Maven / Gradle / Ivy

There is a newer version: 2.5.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package opennlp.tools.cmdline.namefind;

import java.io.IOException;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import opennlp.tools.namefind.NameSample;
import opennlp.tools.util.FilterObjectStream;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.Span;

/**
 * Counts tokens, sentences and names by type.
 */
public class NameSampleCountersStream
    extends FilterObjectStream {

  private static final Logger logger = LoggerFactory.getLogger(NameSampleCountersStream.class);

  private int sentenceCount;
  private int tokenCount;

  private Map nameCounters = new HashMap<>();

  protected NameSampleCountersStream(ObjectStream samples) {
    super(samples);
  }

  @Override
  public NameSample read() throws IOException {

    NameSample sample = samples.read();

    if (sample != null) {
      sentenceCount++;
      tokenCount += sample.getSentence().length;

      for (Span nameSpan : sample.getNames()) {
        Integer nameCounter = nameCounters.get(nameSpan.getType());

        if (nameCounter == null) {
          nameCounter = 0;
        }

        nameCounters.put(nameSpan.getType(), nameCounter + 1);
      }
    }

    return sample;
  }

  @Override
  public void reset() throws IOException, UnsupportedOperationException {
    super.reset();

    sentenceCount = 0;
    tokenCount = 0;
    nameCounters = new HashMap<>();
  }

  public int getSentenceCount() {
    return sentenceCount;
  }

  public int getTokenCount() {
    return tokenCount;
  }

  public Map getNameCounters() {
    return Collections.unmodifiableMap(nameCounters);
  }

  public void printSummary() {
    logger.info("Training data summary:");
    logger.info("#Sentences: {}", getSentenceCount());
    logger.info("#Tokens: {}", getTokenCount());

    int totalNames = 0;
    for (Map.Entry counter : getNameCounters().entrySet()) {
      logger.info("# {} entities: {}", counter.getKey(), counter.getValue());
      totalNames += counter.getValue();
    }
    logger.info("# total: {}", totalNames);
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy