org.apache.lucene.analysis.ja.Token Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of lucene-analysis-kuromoji Show documentation
Apache Lucene (module: kuromoji)
The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.analysis.ja;

import org.apache.lucene.analysis.ja.dict.JaMorphData;
import org.apache.lucene.analysis.morph.TokenType;

/** Analyzed token with morphological data from its dictionary. */
public class Token extends org.apache.lucene.analysis.morph.Token {
  private final JaMorphData morphData;

  private final int morphId;

  public Token(
      char[] surfaceForm,
      int offset,
      int length,
      int startOffset,
      int endOffset,
      int morphId,
      TokenType type,
      JaMorphData morphData) {
    super(surfaceForm, offset, length, startOffset, endOffset, type);
    this.morphId = morphId;
    this.morphData = morphData;
  }

  @Override
  public String toString() {
    return "Token(\""
        + new String(surfaceForm, offset, length)
        + "\" offset="
        + startOffset
        + " length="
        + length
        + " posLen="
        + posLen
        + " type="
        + type
        + " morphId="
        + morphId
        + " leftID="
        + morphData.getLeftId(morphId)
        + ")";
  }

  /**
   * @return reading. null if token doesn't have reading.
   */
  public String getReading() {
    return morphData.getReading(morphId, surfaceForm, offset, length);
  }

  /**
   * @return pronunciation. null if token doesn't have pronunciation.
   */
  public String getPronunciation() {
    return morphData.getPronunciation(morphId, surfaceForm, offset, length);
  }

  /**
   * @return part of speech.
   */
  public String getPartOfSpeech() {
    return morphData.getPartOfSpeech(morphId);
  }

  /**
   * @return inflection type or null
   */
  public String getInflectionType() {
    return morphData.getInflectionType(morphId);
  }

  /**
   * @return inflection form or null
   */
  public String getInflectionForm() {
    return morphData.getInflectionForm(morphId);
  }

  /**
   * @return base form or null if token is not inflected
   */
  public String getBaseForm() {
    return morphData.getBaseForm(morphId, surfaceForm, offset, length);
  }

  /**
   * Returns true if this token is known word
   *
   * @return true if this token is in standard dictionary. false if not.
   */
  public boolean isKnown() {
    return type == TokenType.KNOWN;
  }

  /**
   * Returns true if this token is unknown word
   *
   * @return true if this token is unknown word. false if not.
   */
  public boolean isUnknown() {
    return type == TokenType.UNKNOWN;
  }

  /**
   * Returns true if this token is defined in user dictionary
   *
   * @return true if this token is in user dictionary. false if not.
   */
  public boolean isUser() {
    return type == TokenType.USER;
  }
}