com.github.dennisit.vplus.data.utils.AnalysisUtils Maven / Gradle / Ivy
/*--------------------------------------------------------------------------
* Copyright (c) 2010-2020, Elon.su All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* Neither the name of the elon developer nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
* Author: Elon.su, you can also mail [email protected]
*--------------------------------------------------------------------------
*/
package com.github.dennisit.vplus.data.utils;
import com.google.common.collect.Lists;
import org.ansj.domain.Result;
import org.ansj.splitWord.analysis.IndexAnalysis;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;
/**
* Created by Elon.su on 17/11/30.
*/
public class AnalysisUtils {
/**
* 文本串分词
* @param source 待处理文本串
* @return 分词集合
*/
public static List segmentList(String source){
if(StringUtils.isBlank(source)){
return Lists.newArrayList();
}
Result result = new IndexAnalysis().parseStr(source);
return Optional.ofNullable(result).map(x -> x.getTerms()).orElse(Lists.newArrayList())
.stream()
.map(x -> x.getName())
.collect(Collectors.toList());
}
/**
* 分词空号分隔
* @param source 待处理文本串
* @return 分词结果, 之间用特殊符分隔
*/
public static String segmentString(String source){
return segmentString(source, StringUtils.SPACE);
}
/**
* 分词空号分隔
* @param source 待处理文本串
* @param separator 切分符
* @return 分词结果, 之间用特殊符分隔
*/
public static String segmentString(String source, String separator){
List segment = segmentList(source);
if(CollectionUtils.isEmpty(segment)){
return source;
}
return StringUtils.join(segment, separator);
}
}