
com.alotuser.address.SmartMatch Maven / Gradle / Ivy
package com.alotuser.address;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.regex.Pattern;
import com.alotuser.address.assets.Address;
import com.alotuser.address.assets.AddressInfo;
import com.alotuser.address.assets.MatchAddress;
import cn.hutool.core.collection.CollUtil;
import cn.hutool.core.lang.RegexPool;
import cn.hutool.core.util.ReUtil;
import cn.hutool.core.util.StrUtil;
/**
* 匹配地址类
* @author I6view
*
*/
public class SmartMatch {
/**
* 前缀字符特殊处理,匹配时候会自动处理掉符合正则的文字
*/
private Pattern pattern = Pattern.compile("^[省市区县州街道镇乡特别行政自治]+");
/**
* 匹配手机号码
*
* @param text 地址信息
* @return String
*/
public static String matchMobile(String text) {
String mobile = ReUtil.getGroup0(RegexPool.MOBILE, text);
if (StrUtil.isNotEmpty(mobile)) {
return mobile;
}
mobile = ReUtil.getGroup0(RegexPool.TEL, text);
if (StrUtil.isNotEmpty(mobile)) {
return mobile;
}
mobile = ReUtil.getGroup0(RegexPool.TEL_400_800, text);
if (StrUtil.isNotEmpty(mobile)) {
return mobile;
}
mobile = ReUtil.getGroup0(RegexPool.MOBILE_HK, text);
if (StrUtil.isNotEmpty(mobile)) {
return mobile;
}
mobile = ReUtil.getGroup0(RegexPool.MOBILE_TW, text);
if (StrUtil.isNotEmpty(mobile)) {
return mobile;
}
return ReUtil.getGroup0(RegexPool.MOBILE_MO, text);
}
/**
* filterStr
* @param text 地址信息
* @return filterStr
*/
public static String filterStr(String text) {
text = ReUtil.replaceAll(text, "[`~!@#$^&*=|{}':;',.<>/?~!@#¥……&*——|‘;:”“’。,、?-]", " ");
return text.replace("\r", "").replace("\n", "");
}
/**
* 匹配地址
*
* @param addressList 地址列表
* @param text 匹配的地址信息
* @param level 匹配级别。从0开始,可以选择只匹配到第几级,为null则忽略
*/
AddressInfo matchAddress(List addressList, String text, Integer level) {
if (StrUtil.isBlank(text)) {
return null;
}
AddressInfo info = new AddressInfo();
// 清除特殊字符
text = ReUtil.replaceAll(text, "[^\u4e00-\u9fa5A-Za-z0-9-]", "");
String address = text;
String matchAddressStr = "";
List matchProvince = new ArrayList<>();
for (int endIndex = 0; endIndex < text.length(); endIndex++) {
matchAddressStr = StrUtil.subWithLength(text, 0, endIndex + 2);
for (Address province : addressList) {
if (province.getName().contains(matchAddressStr)) {
matchProvince.add(new MatchAddress(province, null, null, null, matchAddressStr));
}
}
}
if (!matchProvince.isEmpty()) {
MatchAddress matchAddress = getBestMatch(matchProvince);
setMatchAddressInfo(info, matchAddress);
text = text.replaceFirst(matchAddress.getMatchValue(), "");
text = ReUtil.replaceFirst(pattern, text, "");
}
if (level != null && level == 0) {
setAddress(matchProvince, address, text, info);
return info;
}
// 市查找
List matchCity = new ArrayList<>(); // 粗略匹配上的市
for (int endIndex = 0; endIndex < text.length(); endIndex++) {
matchAddressStr = StrUtil.subWithLength(text, 0, endIndex + 2);
for (Address province : addressList) {
if (province.getChildren() == null) {
continue;
}
if (info.getProvince() == null || province.getName().equals(info.getProvince())) {
for (Address city : province.getChildren()) {
if (city.getName().contains(matchAddressStr)) {
matchCity.add(new MatchAddress(province, city, null, null, matchAddressStr));
}
}
}
}
}
if (!matchCity.isEmpty()) {
MatchAddress matchAddress = getBestMatch(matchCity);
setMatchAddressInfo(info, matchAddress);
text = text.replaceFirst(matchAddress.getMatchValue(), "");
// 如果是市开头的,去掉
text = ReUtil.replaceFirst(pattern, text, "");
}
if (level != null && level == 1) {
setAddress(matchProvince, address, text, info);
return info;
}
// 区县查找
List matchCounty = new ArrayList<>(); // 粗略匹配上的区县
for (int endIndex = 0; endIndex < text.length(); endIndex++) {
matchAddressStr = StrUtil.subWithLength(text, 0, endIndex + 2);
for (Address province : addressList) {
if (province.getChildren() == null) {
continue;
}
if (info.getProvince() != null && !info.getProvince().equals(province.getName())) {
continue;
}
for (Address city : province.getChildren()) {// 市
if (CollUtil.isEmpty(city.getChildren())) {
continue;
}
if (info.getCity() != null && !info.getCity().equals(city.getName())) {
continue;
}
for (Address county : city.getChildren()) { // 区
if (county.getName().contains(matchAddressStr)) {
matchCounty.add(new MatchAddress(province, city, county, null, matchAddressStr));
}
}
}
}
}
if (!matchCounty.isEmpty()) {
MatchAddress matchAddress = getBestMatch(matchCounty);
setMatchAddressInfo(info, matchAddress);
text = text.replaceFirst(matchAddress.getMatchValue(), "");
text = ReUtil.replaceFirst(pattern, text, "");
}
if (level != null && level == 2) {
setAddress(matchProvince, address, text, info);
return info;
}
// 街道查找
List matchStreet = new ArrayList<>(); // 粗略匹配上的街道查
for (int endIndex = 0; endIndex < text.length(); endIndex++) {
matchAddressStr = StrUtil.subWithLength(text, 0, endIndex + 2);
for (Address province : addressList) {
if (province.getChildren() == null) {
continue;
}
if (info.getProvince() != null && !info.getProvince().equals(province.getName())) {
continue;
}
for (Address city : province.getChildren()) {// 市
if (city.getChildren() == null) {
continue;
}
if (info.getCity() != null && !info.getCity().equals(city.getName())) {
continue;
}
for (Address county : city.getChildren()) { // 区
if (county.getChildren() == null) {
continue;
}
if (info.getCounty() != null && !info.getCounty().equals(county.getName())) {
continue;
}
for (Address street : county.getChildren()) { // 街道
if (street.getName().contains(matchAddressStr)) {
matchStreet.add(new MatchAddress(province, city, county, street, matchAddressStr));
}
}
}
}
}
}
if (!matchStreet.isEmpty()) {
MatchAddress matchAddress = getBestMatch(matchStreet);
setMatchAddressInfo(info, matchAddress);
text = text.replaceFirst(matchAddress.getMatchValue(), "");
text = ReUtil.replaceFirst(pattern, text, "");
}
setAddress(matchStreet, address, text, info);
return info;
}
/**
* set Address
* @param matchList matchList
* @param address address
* @param text text
* @param info AddressInfo
*/
private void setAddress(List matchList, String address, String text, AddressInfo info) {
if (matchList.isEmpty() || !address.equals(text)) {
info.setAddress(text);
}
}
/**
* 获取最优匹配
* @param matchAddressList matchAddressList
* @return
*/
private MatchAddress getBestMatch(List matchAddressList) {
return Collections.max(matchAddressList, Comparator.comparingInt(o -> o.getMatchValue().length()));
}
/**
* set Match Info
* @param info
* @param matchAddress
*/
protected void setMatchAddressInfo(AddressInfo info, MatchAddress matchAddress) {
info.setProvince(matchAddress.getProvince());
info.setProvinceCode(matchAddress.getProvinceCode());
info.setCity(matchAddress.getCity());
info.setCityCode(matchAddress.getCityCode());
info.setCounty(matchAddress.getCounty());
info.setCountyCode(matchAddress.getCountyCode());
info.setStreet(matchAddress.getStreet());
info.setStreetCode(matchAddress.getStreetCode());
info.setAreaId(matchAddress.getAreaId());
}
public Pattern getPattern() {
return pattern;
}
public void setPattern(Pattern pattern) {
this.pattern = pattern;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy