All Downloads are FREE. Search and download functionalities are using the official Maven repository.

opennlp.tools.lemmatizer.LemmaSample Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package opennlp.tools.lemmatizer;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Objects;

/**
 * Represents an lemmatized sentence.
 */
public class LemmaSample implements Serializable {

  private List tokens;

  private List tags;

  private final List lemmas;

  /**
   * Represents one lemma sample.
   * @param tokens the token
   * @param tags the postags
   * @param lemmas the lemmas
   */
  public LemmaSample(String[] tokens, String[] tags, String[] lemmas) {

    validateArguments(tokens.length, tags.length, lemmas.length);

    this.tokens = Collections.unmodifiableList(new ArrayList<>(Arrays.asList(tokens)));
    this.tags = Collections.unmodifiableList(new ArrayList<>(Arrays.asList(tags)));
    this.lemmas = Collections.unmodifiableList(new ArrayList<>(Arrays.asList(lemmas)));
  }

  /**
   * Lemma Sample constructor.
   * @param tokens the tokens
   * @param tags the postags
   * @param lemmas the lemmas
   */
  public LemmaSample(List tokens, List tags, List lemmas) {

    validateArguments(tokens.size(), tags.size(), lemmas.size());

    this.tokens = Collections.unmodifiableList(new ArrayList<>(tokens));
    this.tags = Collections.unmodifiableList(new ArrayList<>(tags));
    this.lemmas = Collections.unmodifiableList(new ArrayList<>(lemmas));
  }

  public String[] getTokens() {
    return tokens.toArray(new String[tokens.size()]);
  }

  public String[] getTags() {
    return tags.toArray(new String[tags.size()]);
  }

  public String[] getLemmas() {
    return lemmas.toArray(new String[lemmas.size()]);
  }

  private void validateArguments(int tokensSize, int tagsSize, int lemmasSize)
      throws IllegalArgumentException {
    if (tokensSize != tagsSize || tagsSize != lemmasSize) {
      throw new IllegalArgumentException(
          "All arrays must have the same length: " +
              "sentenceSize: " + tokensSize +
              ", tagsSize: " + tagsSize +
              ", predsSize: " + lemmasSize + "!");
    }
  }

  @Override
  public String toString() {
    StringBuilder lemmaString = new StringBuilder();

    for (int ci = 0; ci < lemmas.size(); ci++) {
      lemmaString.append(tokens.get(ci)).append("\t").append(tags.get(ci))
           .append("\t").append(lemmas.get(ci)).append("\n");
    }
    return lemmaString.toString();
  }

  @Override
  public int hashCode() {
    return Objects.hash(Arrays.hashCode(getTokens()), Arrays.hashCode(getTags()),
        Arrays.hashCode(getLemmas()));
  }

  @Override
  public boolean equals(Object obj) {
    if (this == obj) {
      return true;
    }

    if (obj instanceof LemmaSample) {
      LemmaSample a = (LemmaSample) obj;

      return Arrays.equals(getTokens(), a.getTokens())
          && Arrays.equals(getTags(), a.getTags())
          && Arrays.equals(getLemmas(), a.getLemmas());
    }

    return false;
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy