All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.analysis.ko.POS Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.analysis.ko;

import java.util.Locale;

/**
 * Part of speech classification for Korean based on Sejong corpus classification.
 * The list of tags and their meanings is available here:
 * https://docs.google.com/spreadsheets/d/1-9blXKjtjeKZqsf4NzHeYJCrr49-nXeRF6D80udfcwY
 */
public class POS {

  /**
   * The type of the token.
   */
  public enum Type {
    /**
     * A simple morpheme.
     */
    MORPHEME,

    /**
     * Compound noun.
     */
    COMPOUND,

    /**
     * Inflected token.
     */
    INFLECT,

    /**
     * Pre-analysis token.
     */
    PREANALYSIS,
  }

  /**
   * Part of speech tag for Korean based on Sejong corpus classification.
   */
  public enum Tag {
    /**
     * Verbal endings
     */
    E(100, "Verbal endings"),

    /**
     * Interjection
     */
    IC(110, "Interjection"),

    /**
     * Ending Particle
     */
    J(120, "Ending Particle"),

    /**
     * General Adverb
     */
    MAG(130, "General Adverb"),

    /**
     * Conjunctive adverb
     */
    MAJ(131, "Conjunctive adverb"),

    /**
     * Determiner
     **/
    MM(140, "Modifier"),

    /**
     * General Noun
     **/
    NNG(150, "General Noun"),

    /**
     * Proper Noun
     **/
    NNP(151, "Proper Noun"),

    /**
     * Dependent noun (following nouns)
     **/
    NNB(152, "Dependent noun"),

    /**
     * Dependent noun
     **/
    NNBC(153, "Dependent noun"),

    /**
     * Pronoun
     **/
    NP(154, "Pronoun"),

    /**
     * Numeral
     **/
    NR(155, "Numeral"),

    /**
     * Terminal punctuation (? ! .)
     **/
    SF(160, "Terminal punctuation"),

    /**
     * Chinese character
     **/
    SH(161, "Chinese Characeter"),

    /**
     * Foreign language
     **/
    SL(162, "Foreign language"),

    /**
     * Number
     **/
    SN(163, "Number"),

    /**
     * Space
     **/
    SP(164, "Space"),

    /**
     * Closing brackets
     **/
    SSC(165, "Closing brackets"),

    /**
     * Opening brackets
     **/
    SSO(166, "Opening brackets"),

    /**
     * Separator (· / :)
     **/
    SC(167, "Separator"),

    /**
     * Other symbol
     **/
    SY(168, "Other symbol"),

    /**
     * Ellipsis
     **/
    SE(169, "Ellipsis"),

    /**
     * Adjective
     **/
    VA(170, "Adjective"),

    /**
     * Negative designator
     **/
    VCN(171, "Negative designator"),

    /**
     * Positive designator
     **/
    VCP(172, "Positive designator"),

    /**
     * Verb
     **/
    VV(173, "Verb"),

    /**
     * Auxiliary Verb or Adjective
     **/
    VX(174, "Auxiliary Verb or Adjective"),

    /**
     * Prefix
     **/
    XPN(181, "Prefix"),

    /**
     * Root
     **/
    XR(182, "Root"),

    /**
     * Adjective Suffix
     **/
    XSA(183, "Adjective Suffix"),

    /**
     * Noun Suffix
     **/
    XSN(184, "Noun Suffix"),

    /**
     * Verb Suffix
     **/
    XSV(185, "Verb Suffix"),

    /**
     * Unknown
     */
    UNKNOWN(999, "Unknown"),

    /**
     * Unknown
     */
    UNA(-1, "Unknown"),

    /**
     * Unknown
     */
    NA(-1, "Unknown"),

    /**
     * Unknown
     */
    VSV(-1, "Unknown");

    private final int code;
    private final String desc;

    /**
     * Returns the code associated with the tag (as defined in pos-id.def).
     */
    public int code() {
      return code;
    }

    /**
     * Returns the description associated with the tag.
     */
    public String description() {
      return desc;
    }

    /**
     * Returns a new part of speech tag.
     * @param code The code for the tag.
     * @param desc The description of the tag.
     */
    Tag(int code, String desc) {
      this.code = code;
      this.desc = desc;
    }
  }

  /**
   * Returns the {@link Tag} of the provided name.
   */
  public static Tag resolveTag(String name) {
    String tagUpper = name.toUpperCase(Locale.ENGLISH);
    if (tagUpper.startsWith("J")) {
      return Tag.J;
    } else if (tagUpper.startsWith("E")) {
      return Tag.E;
    } else {
      return Tag.valueOf(tagUpper);
    }
  }

  /**
   * Returns the {@link Tag} of the provided tag.
   */
  public static Tag resolveTag(byte tag) {
    assert tag < Tag.values().length;
    return Tag.values()[tag];
  }

  /**
   * Returns the {@link Type} of the provided name.
   */
  public static Type resolveType(String name) {
    if ("*".equals(name)) {
      return Type.MORPHEME;
    }
    return Type.valueOf(name.toUpperCase(Locale.ENGLISH));
  }

  /**
   * Returns the {@link Type} of the provided type.
   */
  public static Type resolveType(byte type) {
    assert type < Type.values().length;
    return Type.values()[type];
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy