All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.hankcs.hanlp.seg.NShort.Path.AtomNode Maven / Gradle / Ivy

There is a newer version: portable-1.8.5
Show newest version
/*
 * 
 * He Han
 * [email protected]
 * 2014/05/2014/5/17 14:58
 *
 * 
 * Copyright (c) 2003-2014, 上海林原信息科技有限公司. All Right Reserved, http://www.linrunsoft.com/
 * This source is subject to the LinrunSpace License. Please contact 上海林原信息科技有限公司 to get more information.
 * 
 */
package com.hankcs.hanlp.seg.NShort.Path;

import com.hankcs.hanlp.corpus.tag.Nature;
import com.hankcs.hanlp.dictionary.other.CharType;
import com.hankcs.hanlp.dictionary.CoreDictionary;
import com.hankcs.hanlp.seg.common.Vertex;
import com.hankcs.hanlp.utility.Predefine;

/**
 * 原子分词节点
 * @author hankcs
 */
public class AtomNode
{
    public String sWord;
    public int nPOS;

    public AtomNode(String sWord, int nPOS)
    {
        this.sWord = sWord;
        this.nPOS = nPOS;
    }

    public AtomNode(char c, int nPOS)
    {
        this.sWord = String.valueOf(c);
        this.nPOS = nPOS;
    }

    /**
     * 原子的词性
     * @return
     */
    public Nature getNature()
    {
        Nature nature = Nature.nz;
        switch (nPOS)
        {
            case CharType.CT_CHINESE:
                break;
            case CharType.CT_NUM:
            case CharType.CT_INDEX:
            case CharType.CT_CNUM:
                nature = Nature.m;
                sWord = Predefine.TAG_NUMBER;
                break;
            case CharType.CT_DELIMITER:
                nature = Nature.w;
                break;
            case CharType.CT_LETTER:
                nature = Nature.nx;
                sWord = Predefine.TAG_CLUSTER;
                break;
            case CharType.CT_SINGLE://12021-2129-3121
                if (Predefine.PATTERN_FLOAT_NUMBER.matcher(sWord).matches())//匹配浮点数
                {
                    nature = Nature.m;
                    sWord = Predefine.TAG_NUMBER;
                } else
                {
                    nature = Nature.nx;
                    sWord = Predefine.TAG_CLUSTER;
                }
                break;
            default:
                break;
        }
        return nature;
    }

    @Override
    public String toString()
    {
        return "AtomNode{" +
                "word='" + sWord + '\'' +
                ", nature=" + nPOS +
                '}';
    }

    public static Vertex convert(String word, int type)
    {
        String name = word;
        Nature nature = Nature.n;
        int dValue = 1;
        switch (type)
        {
            case CharType.CT_CHINESE:
                break;
            case CharType.CT_NUM:
            case CharType.CT_INDEX:
            case CharType.CT_CNUM:
                nature = Nature.m;
                word = Predefine.TAG_NUMBER;
                break;
            case CharType.CT_DELIMITER:
                nature = Nature.w;
                break;
            case CharType.CT_LETTER:
                nature = Nature.nx;
                word = Predefine.TAG_CLUSTER;
                break;
            case CharType.CT_SINGLE://12021-2129-3121
//                if (Pattern.compile("^(-?\\d+)(\\.\\d+)?$").matcher(word).matches())//匹配浮点数
//                {
//                    nature = Nature.m;
//                    word = Predefine.TAG_NUMBER;
//                } else
//                {
                    nature = Nature.nx;
                    word = Predefine.TAG_CLUSTER;
//                }
                break;
            default:
                break;
        }

        return new Vertex(word, name, new CoreDictionary.Attribute(nature, dValue));
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy