org.unlaxer.jaddress.parser.processor.PrefectureToTownNameTokenizer Maven / Gradle / Ivy
package org.unlaxer.jaddress.parser.processor;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
import java.util.Set;
import org.apache.commons.lang3.StringUtils;
import org.unlaxer.Range;
import org.unlaxer.jaddress.entity.standard.定義済みRange階層要素;
import org.unlaxer.jaddress.entity.standard.階層要素;
import org.unlaxer.jaddress.entity.zip.ZipBasedAddress;
import org.unlaxer.jaddress.parser.AddressContext;
import org.unlaxer.jaddress.parser.AddressElement;
import org.unlaxer.jaddress.parser.AddressElementFactory;
import org.unlaxer.jaddress.parser.AddressProcessor;
import org.unlaxer.jaddress.parser.IntermediateResult;
import org.unlaxer.jaddress.parser.MatchKind;
import org.unlaxer.jaddress.parser.ParsingState;
import org.unlaxer.jaddress.parser.ParsingTarget;
import org.unlaxer.jaddress.parser.ResolverResult;
import org.unlaxer.jaddress.parser.ResolverResultKind;
import org.unlaxer.jaddress.parser.ResolverResultKindOfMatchKind;
import org.unlaxer.jaddress.parser.SeparatorKind;
import org.unlaxer.jaddress.parser.StringAndCharacterKinds;
import org.unlaxer.jaddress.parser.StringIndex;
import org.unlaxer.jaddress.parser.TargetStateAndElement;
import org.unlaxer.util.collection.TreeNode;
import org.unlaxer.util.function.Unchecked;
import io.vavr.Tuple2;
public class PrefectureToTownNameTokenizer implements AddressProcessor {
@Override
public ParsingState targetState() {
return ParsingState.都道府県から町名までを分割する;
}
Set excludes = Set.of("ヶ","が","ケ","ー","ガ","-","~" , "ー","の","ノ","之","乃");
@Override
public TargetStateAndElement process(ParsingTarget parsingTarget) {
AddressContext addressContext = parsingTarget.addressContext();
IntermediateResult intermediateResult = parsingTarget.intermediateResult();
TreeNode targetNode = targetNode(parsingTarget);
StringAndCharacterKinds address = addressContext.addressString();
List> allResults = new ArrayList<>();
for (ZipBasedAddress zipBasedAddress : intermediateResult.zipBasedAddressesFromZip()) {
StringIndex currentIndex = new StringIndex(0);
List results = new ArrayList<>();
// 1:都道府県 その他
currentIndex = search(parsingTarget , addressContext, targetNode, address,
currentIndex,zipBasedAddress.kenName() ,results ,
階層要素.国域Top1/*階層要素.都道府県*/,zipBasedAddress)
.map(SearchResult::next)
.orElse(currentIndex);
if(currentIndex.value > 0) {
parsingTarget.addResolverResult(
new ResolverResult(
ResolverResultKindOfMatchKind.郵便番号辞書match都道府県at非先頭, MatchKind.Match));
currentIndex = StringIndex.of(0);
}
// 2 市, 東京23区, 政令指定市, 群
currentIndex = search(parsingTarget , addressContext, targetNode, address,
currentIndex, zipBasedAddress.cityName() ,results ,
階層要素.国域Top2 /*階層要素.市*/ , zipBasedAddress)
.map(SearchResult::next)
.orElse(currentIndex);
// 3:区,町村
// 4:町または大字
Set townNames = StringUtils.isEmpty(zipBasedAddress.townName()) ?
zipBasedAddress.supplementaryTownNames():
Set.of(zipBasedAddress.townName());
for (String townName : townNames) {
Optional search = null;
for(String _townName : addTownNameSuffix(townName)) {
search = search(
parsingTarget , addressContext, targetNode, address,
currentIndex, _townName , results , 階層要素.国域Top4 /*階層要素.町村*/,
zipBasedAddress);
if(search.isEmpty() || search.get().matchKind.isNotMatched()) {
continue;
}
break;
}
if(search.isEmpty() || search.get().matchKind.isNotMatched()) {
continue;
}
search
.map(SearchResult::next)
.orElse(currentIndex);
}
boolean allMatch = results.stream()
.allMatch(SearchResult::isValid);
if(allMatch && results.size() ==3) {
return process(addressContext , targetNode , results);
}else {
allResults.add(results);
}
}
List results = select(allResults);
return process(addressContext , targetNode , results);
}
String[] addTownNameSuffix(String townName) {
if(townName.endsWith("町") || townName.endsWith("村")) {
return new String[] {townName , townName.substring(0,townName.length()-1)};
}
// return new String[] {townName , townName+"町" , townName +"村"};
return new String[] {townName};
}
private List select(List> allResults) {
allResults.sort(
(list1,list2)-> level(list2) - level(list1)
);
List list = allResults.get(0);
return list;
}
public int level(List results) {
return results.isEmpty() ?
0 :
results.get(results.size()-1).next.value;
}
private TargetStateAndElement process(
AddressContext addressContext,
TreeNode targetNode, List results) {
if(results.isEmpty()) {
return new TargetStateAndElement(ParsingState.町名分割エラー , 定義済みRange階層要素.全体);
}
SearchResult lastSearchResult = results.get(results.size()-1);
if(lastSearchResult._階層要素.level <= 階層要素.国域Top3.level /*階層要素.区.level*/ &&
false == lastSearchResult.zipBasedAddress.町名無し次に番地が来る()) {
return new TargetStateAndElement(ParsingState.町名分割エラー , 定義済みRange階層要素.全体);
}
results.stream()
.forEach(SearchResult::run);
StringIndex next = lastSearchResult.next();
StringAndCharacterKinds substring =
addressContext.addressString().substring(next, SeparatorKind.domainSpecificSeparator, SeparatorKind.terminator);
addressContext.addChild(
targetNode,
AddressElementFactory.of(substring, 定義済みRange階層要素.町名より後 ,
SeparatorKind.domainSpecificSeparator , SeparatorKind.domainSpecificSeparator
)
);
return new TargetStateAndElement(ParsingState.町名から丁目の階層種類をDBを用いて求める , 定義済みRange階層要素.町名より後);
}
Optional search(
ParsingTarget parsingTarget,
AddressContext addressContext,
TreeNode targetNode,
StringAndCharacterKinds address,
StringIndex currentIndex,
String nameOfPart,
List results,
階層要素 _階層要素,
ZipBasedAddress zipBasedAddress) {
Optional search =
search(addressContext, targetNode, address, currentIndex, nameOfPart, _階層要素 , zipBasedAddress);
search.ifPresent(result->{
if(result.matchKind.isNotMatched()) {
return;
}
results.add(result);
ResolverResultKind resolverResultKind = null;
switch (_階層要素) {
case 国域Top1:
// case 都道府県:
resolverResultKind = ResolverResultKindOfMatchKind.郵便番号辞書match都道府県;
break;
case 国域Top2:
// case 市:
resolverResultKind = ResolverResultKindOfMatchKind.郵便番号辞書match市;
break;
case 国域Top3:
// case 町村:
resolverResultKind = ResolverResultKindOfMatchKind.郵便番号辞書match区;
break;
case 国域Top4:
// case 町または大字:
resolverResultKind = ResolverResultKindOfMatchKind.郵便番号辞書match町または大字;
break;
default:
throw new IllegalArgumentException();
}
parsingTarget.addResolverResult(new ResolverResult(resolverResultKind , MatchKind.Match));
});
return search;
}
private Optional search(
AddressContext addressContext,
TreeNode targetNode,
StringAndCharacterKinds address,
StringIndex currentIndex,
String nameOfPart,
階層要素 _階層要素,
ZipBasedAddress zipBasedAddress) {
if (false == isEmpty(nameOfPart)) {
Tuple2 match = address.match(nameOfPart, currentIndex , excludes);
MatchKind matchKind = match._2();
Range range = match._1();
if(matchKind == MatchKind.NoMatch) {
return Optional.of(
new SearchResult(_階層要素 , StringIndex.invalid(), currentIndex, matchKind,zipBasedAddress,()->{} )
);
}else {
return Optional.of(
new SearchResult(_階層要素 ,
StringIndex.of(range.startIndexInclusive),StringIndex.of(range.endIndexExclusive) ,
matchKind,zipBasedAddress,
()->{
addressContext.addChild(
targetNode,
AddressElementFactory.of(nameOfPart, _階層要素 ,
SeparatorKind.domainSpecificSeparator , SeparatorKind.domainSpecificSeparator
)
);
}
)
);
}
}
return Optional.empty();
}
static class SearchResult implements Runnable{
public final 階層要素 _階層要素;
public final StringIndex indexOf;
public final StringIndex next;
public final Runnable runnable;
public final MatchKind matchKind;
public final ZipBasedAddress zipBasedAddress;
public SearchResult(階層要素 _階層要素 , StringIndex indexOf, StringIndex next ,
MatchKind matchKind, ZipBasedAddress zipBasedAddress , Runnable runnable) {
super();
this._階層要素 = _階層要素;
this.next = next;
this.indexOf = indexOf;
this.runnable = runnable;
this.zipBasedAddress = zipBasedAddress;
this.matchKind = matchKind;
}
public boolean isValid() {
return indexOf.isValid();
}
public StringIndex next() {
return next;
}
@Override
public void run() {
Unchecked.run(()->runnable.run());
}
}
static boolean isEmpty(String word) {
return word == null || word.equalsIgnoreCase("NULL") || word.isEmpty();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy