All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apdplat.word.util.Utils Maven / Gradle / Ivy

Go to download

word分词是一个Java实现的中文分词组件,提供了多种基于词典的分词算法,并利用ngram模型来消除歧义。 能准确识别英文、数字,以及日期、时间等数量词,能识别人名、地名、组织机构名等未登录词。 同时提供了Lucene、Solr、ElasticSearch插件。

There is a newer version: 1.3.1
Show newest version
/**
 * 
 * APDPlat - Application Product Development Platform
 * Copyright (c) 2013, 杨尚川, [email protected]
 * 
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see .
 * 
 */

package org.apdplat.word.util;

import java.io.File;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;

/**
 * 工具类
 * @author 杨尚川
 */
public class Utils {
    //至少出现一次中文字符,且以中文字符开头和结束
    private static final Pattern PATTERN_ONE = Pattern.compile("^[\\u4e00-\\u9fa5]+$");
    //至少出现两次中文字符,且以中文字符开头和结束
    private static final Pattern PATTERN_TWO = Pattern.compile("^[\\u4e00-\\u9fa5]{2,}$");
    /**
     * 至少出现一次中文字符,且以中文字符开头和结束
     * @param word
     * @return 
     */
    public static boolean isChineseCharAndLengthAtLeastOne(String word){
        if(PATTERN_ONE.matcher(word).find()){
            return true;
        }
        return false;
    }
    /**
     * 至少出现两次中文字符,且以中文字符开头和结束
     * @param word
     * @return 
     */
    public static boolean isChineseCharAndLengthAtLeastTwo(String word){
        if(PATTERN_TWO.matcher(word).find()){
            return true;
        }
        return false;
    }
    /**
     * 删除目录
     * @param dir 目录
     * @return 是否成功
     */
    public static boolean deleteDir(File dir) {
        if (dir.isDirectory()) {
            File[] children = dir.listFiles();
            for (File child : children) {
                boolean success = deleteDir(child);
                if (!success) {
                    return false;
                }
            }
        }
        return dir.delete();
    }
    /**
     * 根据MAP的VALUE进行排序
     * @param  key
     * @param  value
     * @param map map
     * @return 根据MAP的VALUE由大到小的排序结果列表
     */
    public static  List> getSortedMapByValue(Map map) {        
        List> list = new ArrayList<>(map.entrySet());  
        Collections.sort(list, new Comparator>() {    
            @Override
            public int compare(Map.Entry o1, Map.Entry o2) {    
                if(o1.getValue() instanceof Integer){
                    return o2.getValue().intValue() - o1.getValue().intValue();
                }
                if(o1.getValue() instanceof Long){
                    return (int)(o2.getValue().longValue() - o1.getValue().longValue());
                }
                if(o1.getValue() instanceof Float){
                    float f1 = o1.getValue().floatValue();
                    float f2 = o2.getValue().floatValue();
                    if(f1 < f2){
                        return 1;
                    }
                    if(f1 == f2){
                        return 0;
                    }
                    return -1;
                }
                if(o1.getValue() instanceof Double){
                    double f1 = o1.getValue().doubleValue();
                    double f2 = o2.getValue().doubleValue();
                    if(f1 < f2){
                        return 1;
                    }
                    if(f1 == f2){
                        return 0;
                    }
                    return -1;
                }
                return 0;
            }    
        });     
        return list;  
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy