org.unlaxer.jaddress.parser.processor.HeuristicBlockTokenizer Maven / Gradle / Ivy
package org.unlaxer.jaddress.parser.processor;
import static org.unlaxer.jaddress.parser.ResolverResultKindOfBoolean.actual丁目;
import static org.unlaxer.jaddress.parser.ResolverResultKindOfBoolean.actual地番;
import static org.unlaxer.jaddress.parser.ResolverResultKindOfBoolean.actual支号;
import static org.unlaxer.jaddress.parser.ResolverResultKindOfBoolean.actual枝番号;
import static org.unlaxer.jaddress.parser.ResolverResultKindOfBoolean.expects丁目;
import static org.unlaxer.jaddress.parser.ResolverResultKindOfBoolean.expects地番;
import static org.unlaxer.jaddress.parser.ResolverResultKindOfBoolean.expects支号;
import static org.unlaxer.jaddress.parser.ResolverResultKindOfBoolean.expects枝番号;
import static org.unlaxer.jaddress.parser.ResolverResultKindOfBoolean.丁目が最大値を超えた;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.SortedMap;
import java.util.stream.Collectors;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.unlaxer.jaddress.entity.standard.定義済みRange階層要素;
import org.unlaxer.jaddress.entity.standard.階層要素;
import org.unlaxer.jaddress.parser.AddressContext;
import org.unlaxer.jaddress.parser.AddressElement;
import org.unlaxer.jaddress.parser.AddressElementFactory;
import org.unlaxer.jaddress.parser.AddressProcessor;
import org.unlaxer.jaddress.parser.AddressToken;
import org.unlaxer.jaddress.parser.CharacterKind;
import org.unlaxer.jaddress.parser.HeuristicBlockMatcher;
import org.unlaxer.jaddress.parser.NumberParser;
import org.unlaxer.jaddress.parser.ParsingState;
import org.unlaxer.jaddress.parser.ParsingTarget;
import org.unlaxer.jaddress.parser.ResolverResult;
import org.unlaxer.jaddress.parser.ResolverResultKindOfBoolean;
import org.unlaxer.jaddress.parser.SeparatorKind;
import org.unlaxer.jaddress.parser.StringAndCharacterKind;
import org.unlaxer.jaddress.parser.StringAndCharacterKinds;
import org.unlaxer.jaddress.parser.StringIndex;
import org.unlaxer.jaddress.parser.TargetStateAndElement;
import org.unlaxer.jaddress.parser.TripletAddressToken;
import org.unlaxer.jaddress.parser.processor.BlockHierarchyResolver.BlockPatternResolverResult;
import org.unlaxer.util.Singletons;
import org.unlaxer.util.collection.TreeNode;
import io.vavr.Tuple2;
public class HeuristicBlockTokenizer implements AddressProcessor{
Logger logger = LoggerFactory.getLogger(getClass());
@Override
public ParsingState targetState() {
return ParsingState.丁目以降を分割する;
}
static NumberParser numberParser = Singletons.get(NumberParser.class);
@Override
public TargetStateAndElement process(ParsingTarget parsingTarget) {
AddressContext addressContext = parsingTarget.addressContext();
BlockPatternResolverResult blockPatternResolverResult =
parsingTarget.intermediateResult().blockPatternResolverResult();
TreeNode targetNode = targetNode(parsingTarget);
AddressElement addressElement = targetNode.get();
logger.debug("target block address {}" , addressElement.asString());
SortedMap<階層要素, TripletCharacterKinds> characterKindsBy階層要素 =
blockPatternResolverResult.characterKindsBy階層要素;
TripletAddressToken matchWithSuccessor = TripletAddressToken.onlySuccessorOf(addressElement);
boolean hasSuccessor = false;
Map<階層要素,Boolean> skipBy階層要素 = new HashMap<階層要素, Boolean>();
for (階層要素 _階層要素 : characterKindsBy階層要素.keySet()) {
addResolverResult(_階層要素, expectsResolverResultBy階層要素, parsingTarget);
TripletCharacterKinds tripletCharacterKinds = characterKindsBy階層要素.get(_階層要素);
logger.debug("current is {}" , _階層要素);
Boolean skip = skipBy階層要素.get(_階層要素);
if(skip != null && skip) {
continue;
}
matchWithSuccessor = numberParser.matchWithSuccessor(matchWithSuccessor);
boolean matched =
matchWithSuccessor.isMatched() &&
HeuristicBlockMatcher.isMatch(parsingTarget, matchWithSuccessor, _階層要素);
if(matched) {
int value = value(matchWithSuccessor);
if(_階層要素 == 階層要素.町域Top1 && value > 42) {//日本で最大の丁目は42丁目らしいです!
parsingTarget.addResolverResult(new ResolverResult(丁目が最大値を超えた));
_階層要素 = 階層要素.町域Top2;
skipBy階層要素.put(階層要素.町域Top2, true);
}
Tuple2 create =
create(matchWithSuccessor, _階層要素 , tripletCharacterKinds);
addResolverResult(_階層要素, actualResolverResultBy階層要素, parsingTarget);
addressContext.addChild(targetNode,create._1());
matchWithSuccessor = create._2();
}else {
//FIXME! ★1との処理関係を把握 (successorで上書きされる?)
//ここに入る条件としては想定よりもBlockが短い時。(丁目番地号を想定していたのに丁目しかない時など)
//★1は想定通りであろうがなかろうが定義済みRange階層要素.建物以降のAddressElementを作成する
if(matchWithSuccessor.original != null && matchWithSuccessor.original.isPresent()) {
AddressElement create =
AddressElementFactory.of(matchWithSuccessor.original , 定義済みRange階層要素.建物以降);
addressContext.addChild(targetNode,create);
hasSuccessor = true;
}
break;
}
}
//★1
if(matchWithSuccessor != null && matchWithSuccessor.successor().isPresent()) {
AddressElement create =
AddressElementFactory.of(matchWithSuccessor.successor(), 定義済みRange階層要素.建物以降);
addressContext.addChild(targetNode,create);
hasSuccessor = true;
}
return hasSuccessor ?
new TargetStateAndElement(
ParsingState.都道府県から枝番までで建物階層と建物名をDBを用いて求める,
定義済みRange階層要素.建物以降):
new TargetStateAndElement(
ParsingState.建物より後のTokenをmappingする,
定義済みRange階層要素.全体);
}
int value(TripletAddressToken matchWithSuccessor) {
//FIXME! 漢字
try {
int parseInt = Integer.parseInt(matchWithSuccessor.matched().asString());
return parseInt;
}catch (Exception e) {
return -1;
}
}
Tuple2 create(
TripletAddressToken triplet ,階層要素 _階層要素 , TripletCharacterKinds tripletCharacterKinds) {
AddressElement create = create(triplet, _階層要素);
AddressToken successor = triplet.successor();
List collect = tripletCharacterKinds.suffix().collection().stream()
.map(CharacterKind::strings)
.flatMap(Collection::stream)
.collect(Collectors.toList());
for (String suffix : collect) {
StringIndex indexOf = successor.indexOf(suffix);
if(indexOf.isValid()) {
if(indexOf.value == 0 || isHeaderSpace(indexOf, successor)) {
create.setSuffix(suffix);
AddressToken substring = successor.substring(
StringIndex.of(indexOf.value + suffix.length()),
SeparatorKind.domainSpecificSeparator,
SeparatorKind.domainSpecificSeparator);
TripletAddressToken onlySuccessorOf = TripletAddressToken.onlySuccessorOf(substring);
return new Tuple2(create, onlySuccessorOf);
}
}
}
return new Tuple2(create, triplet);
}
boolean isHeaderSpace(StringIndex indexOf , AddressToken successor) {
AddressToken substring = successor.substring(
StringIndex.of(0),
indexOf,
SeparatorKind.domainSpecificSeparator,
SeparatorKind.domainSpecificSeparator);
boolean allMatch = substring.stringAndCharacterKinds().stream()
.map(StringAndCharacterKind::characterKind)
.allMatch(CharacterKind::isDelimitorSpace);
return allMatch;
}
AddressElement create(TripletAddressToken triplet ,階層要素 _階層要素) {
switch (_階層要素) {
case 町域Top1:
return AddressElementFactory.of(
triplet.joinPredecessorAndMatched(),
_階層要素
);
case 町域Top2:
case 町域Top3:
case 町域Top4:
StringAndCharacterKinds stringAndCharacterKindOf =
StringAndCharacterKinds.of(triplet.predecessor().asString() , false);
StringAndCharacterKinds cutFilterchracterKindIndexOf =
stringAndCharacterKindOf.cutFilterchracterKindIndexOf(filterKindBy階層要素.get(_階層要素));
StringAndCharacterKinds join =
cutFilterchracterKindIndexOf.join(triplet.matched().stringAndCharacterKinds());
return AddressElementFactory.of(
join,
_階層要素,
triplet.predecessor().separatorKindOfLeading(),
triplet.matched().separatorKindOfTailing()
// matchedString.successor
);
default:
throw new IllegalArgumentException("Unexpected value: " + _階層要素);
}
}
static Map<階層要素, CharacterKind[]> filterKindBy階層要素 = Map.of(
階層要素.町域Top2,createWithDefaultDelimitor(CharacterKind.suffix丁目),
階層要素.町域Top3,createWithDefaultDelimitor(CharacterKind.suffix地番),
階層要素.町域Top4,createWithDefaultDelimitor(CharacterKind.suffix号)
);
void addResolverResult(
階層要素 _階層要素,
Map<階層要素 ,ResolverResultKindOfBoolean> resultBy階層要素 ,
ParsingTarget parsingTarget){
ResolverResultKindOfBoolean resolverResultKindOfBoolean =
resultBy階層要素.get(_階層要素);
if(resolverResultKindOfBoolean != null) {
parsingTarget.addResolverResult(
new ResolverResult(resolverResultKindOfBoolean));
}
}
static Map<階層要素 ,ResolverResultKindOfBoolean> expectsResolverResultBy階層要素 =
Map.of(
階層要素.町域Top1 , expects丁目,
階層要素.町域Top2 , expects地番,
階層要素.町域Top3 , expects支号,
階層要素.町域Top4 , expects枝番号
);
static Map<階層要素 ,ResolverResultKindOfBoolean> actualResolverResultBy階層要素 =
Map.of(
階層要素.町域Top1 , actual丁目,
階層要素.町域Top2 , actual地番,
階層要素.町域Top3 , actual支号,
階層要素.町域Top4 , actual枝番号
);
static CharacterKind[] createWithDefaultDelimitor(CharacterKind mainCharacterKind) {
CharacterKind[] create =new CharacterKind[] {
CharacterKind.delimitorSpace,
CharacterKind.delimitorComma ,
CharacterKind.delimitorSpace,
CharacterKind.delimitorHyphen ,
CharacterKind.delimitorJapanese ,
CharacterKind.delimitorSlash
};
create[0] = mainCharacterKind;
return create;
}
} © 2015 - 2025 Weber Informatics LLC | Privacy Policy