All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.analysis.ko.POS Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.analysis.ko;

import java.util.Locale;

/**
 * Part of speech classification for Korean based on Sejong corpus classification. The list of tags
 * and their meanings is available here:
 * https://docs.google.com/spreadsheets/d/1-9blXKjtjeKZqsf4NzHeYJCrr49-nXeRF6D80udfcwY
 */
public class POS {

  /** The type of the token. */
  public enum Type {
    /** A simple morpheme. */
    MORPHEME,

    /** Compound noun. */
    COMPOUND,

    /** Inflected token. */
    INFLECT,

    /** Pre-analysis token. */
    PREANALYSIS,
  }

  /** Part of speech tag for Korean based on Sejong corpus classification. */
  public enum Tag {
    /** Pre-final ending */
    EP(100, "Pre-final ending"),

    /** Sentence-closing ending */
    EF(101, "Sentence-closing ending"),

    /** Connective ending */
    EC(102, "Connective ending"),

    /** Nominal transformative ending */
    ETN(103, "Nominal transformative ending"),

    /** Adnominal form transformative ending */
    ETM(104, "Adnominal form transformative ending"),

    /** Interjection */
    IC(110, "Interjection"),

    /** Subject case marker */
    JKS(120, "Subject case marker"),

    /** Complement case marker */
    JKC(121, "Complement case marker"),

    /** Adnominal case marker */
    JKG(122, "Adnominal case marker"),

    /** Object case marker */
    JKO(123, "Object case marker"),

    /** Adverbial case marker */
    JKB(124, "Adverbial case marker"),

    /** Vocative case marker */
    JKV(125, "Vocative case marker"),

    /** Quotative case marker */
    JKQ(126, "Quotative case marker"),

    /** Auxiliary postpositional particle */
    JX(127, "Auxiliary postpositional particle"),

    /** Conjunctive postpositional particle */
    JC(128, "Conjunctive postpositional particle"),

    /** General Adverb */
    MAG(130, "General Adverb"),

    /** Conjunctive adverb */
    MAJ(131, "Conjunctive adverb"),

    /** Determiner */
    MM(140, "Modifier"),

    /** General Noun */
    NNG(150, "General Noun"),

    /** Proper Noun */
    NNP(151, "Proper Noun"),

    /** Dependent noun (following nouns) */
    NNB(152, "Dependent noun"),

    /** Dependent noun */
    NNBC(153, "Dependent noun"),

    /** Pronoun */
    NP(154, "Pronoun"),

    /** Numeral */
    NR(155, "Numeral"),

    /** Terminal punctuation (? ! .) */
    SF(160, "Terminal punctuation"),

    /** Chinese character */
    SH(161, "Chinese Characeter"),

    /** Foreign language */
    SL(162, "Foreign language"),

    /** Number */
    SN(163, "Number"),

    /** Space */
    SP(164, "Space"),

    /** Closing brackets */
    SSC(165, "Closing brackets"),

    /** Opening brackets */
    SSO(166, "Opening brackets"),

    /** Separator (· / :) */
    SC(167, "Separator"),

    /** Other symbol */
    SY(168, "Other symbol"),

    /** Ellipsis */
    SE(169, "Ellipsis"),

    /** Adjective */
    VA(170, "Adjective"),

    /** Negative designator */
    VCN(171, "Negative designator"),

    /** Positive designator */
    VCP(172, "Positive designator"),

    /** Verb */
    VV(173, "Verb"),

    /** Auxiliary Verb or Adjective */
    VX(174, "Auxiliary Verb or Adjective"),

    /** Prefix */
    XPN(181, "Prefix"),

    /** Root */
    XR(182, "Root"),

    /** Adjective Suffix */
    XSA(183, "Adjective Suffix"),

    /** Noun Suffix */
    XSN(184, "Noun Suffix"),

    /** Verb Suffix */
    XSV(185, "Verb Suffix"),

    /** Unknown */
    UNKNOWN(999, "Unknown"),

    /** Unknown */
    UNA(-1, "Unknown"),

    /** Unknown */
    NA(-1, "Unknown"),

    /** Unknown */
    VSV(-1, "Unknown");

    private final int code;
    private final String desc;

    /** Returns the code associated with the tag (as defined in pos-id.def). */
    public int code() {
      return code;
    }

    /** Returns the description associated with the tag. */
    public String description() {
      return desc;
    }

    /**
     * Returns a new part of speech tag.
     *
     * @param code The code for the tag.
     * @param desc The description of the tag.
     */
    Tag(int code, String desc) {
      this.code = code;
      this.desc = desc;
    }
  }

  /** Returns the {@link Tag} of the provided name. */
  public static Tag resolveTag(String name) {
    return Tag.valueOf(name.toUpperCase(Locale.ENGLISH));
  }

  /** Returns the {@link Tag} of the provided tag. */
  public static Tag resolveTag(byte tag) {
    assert tag < Tag.values().length;
    return Tag.values()[tag];
  }

  /** Returns the {@link Type} of the provided name. */
  public static Type resolveType(String name) {
    if ("*".equals(name)) {
      return Type.MORPHEME;
    }
    return Type.valueOf(name.toUpperCase(Locale.ENGLISH));
  }

  /** Returns the {@link Type} of the provided type. */
  public static Type resolveType(byte type) {
    assert type < Type.values().length;
    return Type.values()[type];
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy