All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.github.dennisit.vplus.data.utils.AnalysisUtils Maven / Gradle / Ivy

/*--------------------------------------------------------------------------
 *  Copyright (c) 2010-2020, Elon.su All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are
 * met:
 *
 * Redistributions of source code must retain the above copyright notice,
 * this list of conditions and the following disclaimer.
 * Redistributions in binary form must reproduce the above copyright
 * notice, this list of conditions and the following disclaimer in the
 * documentation and/or other materials provided with the distribution.
 * Neither the name of the elon developer nor the names of its
 * contributors may be used to endorse or promote products derived from
 * this software without specific prior written permission.
 * Author: Elon.su, you can also mail [email protected]
 *--------------------------------------------------------------------------
*/
package com.github.dennisit.vplus.data.utils;

import com.google.common.collect.Lists;
import org.ansj.domain.Result;
import org.ansj.splitWord.analysis.IndexAnalysis;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.StringUtils;

import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;

/**
 * Created by Elon.su on 17/11/30.
 */
public class AnalysisUtils {

    /**
     * 文本串分词
     * @param source 待处理文本串
     * @return 分词集合
     */
    public static List segmentList(String source){
        if(StringUtils.isBlank(source)){
            return Lists.newArrayList();
        }
        Result result = new IndexAnalysis().parseStr(source);
        return Optional.ofNullable(result).map(x -> x.getTerms()).orElse(Lists.newArrayList())
                .stream()
                .map(x -> x.getName())
                .collect(Collectors.toList());
    }

    /**
     * 分词空号分隔
     * @param source 待处理文本串
     * @return 分词结果, 之间用特殊符分隔
     */
    public static String segmentString(String source){
        return segmentString(source, StringUtils.SPACE);
    }


    /**
     * 分词空号分隔
     * @param source 待处理文本串
     * @param separator 切分符
     * @return 分词结果, 之间用特殊符分隔
     */
    public static String segmentString(String source, String separator){
        List segment = segmentList(source);
        if(CollectionUtils.isEmpty(segment)){
            return source;
        }
        return StringUtils.join(segment, separator);
    }


}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy