All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.unlaxer.jaddress.parser.processor.PrefectureToTownNameTokenizer Maven / Gradle / Ivy

package org.unlaxer.jaddress.parser.processor;

import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
import java.util.Set;

import org.apache.commons.lang3.StringUtils;
import org.unlaxer.Range;
import org.unlaxer.jaddress.entity.standard.定義済みRange階層要素;
import org.unlaxer.jaddress.entity.standard.階層要素;
import org.unlaxer.jaddress.entity.zip.ZipBasedAddress;
import org.unlaxer.jaddress.parser.AddressContext;
import org.unlaxer.jaddress.parser.AddressElement;
import org.unlaxer.jaddress.parser.AddressElementFactory;
import org.unlaxer.jaddress.parser.AddressProcessor;
import org.unlaxer.jaddress.parser.IntermediateResult;
import org.unlaxer.jaddress.parser.MatchKind;
import org.unlaxer.jaddress.parser.ParsingState;
import org.unlaxer.jaddress.parser.ParsingTarget;
import org.unlaxer.jaddress.parser.ResolverResult;
import org.unlaxer.jaddress.parser.ResolverResultKind;
import org.unlaxer.jaddress.parser.ResolverResultKindOfMatchKind;
import org.unlaxer.jaddress.parser.SeparatorKind;
import org.unlaxer.jaddress.parser.StringAndCharacterKinds;
import org.unlaxer.jaddress.parser.StringIndex;
import org.unlaxer.jaddress.parser.TargetStateAndElement;
import org.unlaxer.util.collection.TreeNode;
import org.unlaxer.util.function.Unchecked;

import io.vavr.Tuple2;

public class PrefectureToTownNameTokenizer implements AddressProcessor {

	@Override
	public ParsingState targetState() {
		return ParsingState.都道府県から町名までを分割する;
	}
	
	Set excludes = Set.of(
		"ヶ","が","ケ","ー","ガ",
		"-","~" , "ー",
		"の","ノ","之","乃",
		" ","	"," "
	);

	@Override
	public TargetStateAndElement process(ParsingTarget parsingTarget) {
		
		AddressContext addressContext = parsingTarget.addressContext();
		IntermediateResult intermediateResult = parsingTarget.intermediateResult();
		
		TreeNode targetNode = targetNode(parsingTarget);
		
		StringAndCharacterKinds address = addressContext.addressString();
		
		List> allResults = new ArrayList<>();
		
		
		for (ZipBasedAddress zipBasedAddress : intermediateResult.zipBasedAddressesFromZip()) {
			
			StringIndex currentIndex = new StringIndex(0);
			List results = new ArrayList<>();
			
			// 1:都道府県 その他
			currentIndex = search(parsingTarget , addressContext, targetNode, address, 
				currentIndex,zipBasedAddress.kenName()  ,results ,  
				階層要素.国域Top1/*階層要素.都道府県*/,zipBasedAddress)
					.map(SearchResult::next)
					.orElse(currentIndex);
			
			if(currentIndex.value > 0) {
				parsingTarget.addResolverResult(
						new ResolverResult(
								ResolverResultKindOfMatchKind.郵便番号辞書match都道府県at非先頭, MatchKind.Match));
				currentIndex = StringIndex.of(0);
			}
			
			// 2 市, 東京23区, 政令指定市, 群
			currentIndex = search(parsingTarget , addressContext, targetNode, address, 
				currentIndex, zipBasedAddress.cityName() ,results , 
				階層要素.国域Top2 /*階層要素.市*/ , zipBasedAddress)
					.map(SearchResult::next)
					.orElse(currentIndex);

			// 3:区,町村

			// 4:町または大字 
			Set townNames = StringUtils.isEmpty(zipBasedAddress.townName()) ?
					zipBasedAddress.supplementaryTownNames():
					Set.of(zipBasedAddress.townName());
			
			
			for (String townName : townNames) {
				
				Optional search = null;
				
				for(String _townName : addTownNameSuffix(townName)) {
					search = search(
							parsingTarget , addressContext, targetNode, address, 
							currentIndex, _townName , results , 階層要素.国域Top4 /*階層要素.町村*/,
							zipBasedAddress);
					
					if(search.isEmpty() || search.get().matchKind.isNotMatched()) {
						continue;
					}
					break;
				}
				
				if(search.isEmpty() || search.get().matchKind.isNotMatched()) {
					continue;
				}
				currentIndex = search
					.map(SearchResult::next)
					.orElse(currentIndex);
						
			}
			
			boolean allMatch = results.stream()
				.allMatch(SearchResult::isValid);
			
			if(allMatch && results.size() ==3) {
				return process(addressContext , targetNode , results);
			}else {
				allResults.add(results);
			}
		}
		List results = select(allResults);
		
		return process(addressContext , targetNode , results);
	}
	
	String[] addTownNameSuffix(String townName) {
		if(townName.endsWith("町") || townName.endsWith("村")) {
			return new String[] {townName , townName.substring(0,townName.length()-1)};
		}
		return new String[] {townName+"町" , townName +"村" , townName };
//		return new String[] {townName};
	}
	
	private List select(List> allResults) {
		allResults.sort(
			(list1,list2)-> level(list2) - level(list1)
		);
		List list = allResults.get(0);
		return list;
	}
	
	public int level(List results) {
		return results.isEmpty() ?
				0 :
				results.get(results.size()-1).next.value;
	}

	private TargetStateAndElement process(
			AddressContext addressContext,
			TreeNode targetNode, List results) {
		
		if(results.isEmpty()) {
			return new TargetStateAndElement(ParsingState.町名分割エラー , 定義済みRange階層要素.全体);
		}
		
		SearchResult lastSearchResult = results.get(results.size()-1);
		
		if(lastSearchResult._階層要素.level <= 階層要素.国域Top3.level /*階層要素.区.level*/ &&
				false == lastSearchResult.zipBasedAddress.町名無し次に番地が来る()) {
			return new TargetStateAndElement(ParsingState.町名分割エラー , 定義済みRange階層要素.全体);
		}
		
		
		results.stream()
			.forEach(SearchResult::run);
		
		StringIndex next = lastSearchResult.next();
		
		StringAndCharacterKinds substring = 
				addressContext.addressString().substring(next, SeparatorKind.domainSpecificSeparator, SeparatorKind.terminator);
		
		addressContext.addChild(
			targetNode, 
			AddressElementFactory.of(substring, 定義済みRange階層要素.町名より後 , 
				SeparatorKind.domainSpecificSeparator , SeparatorKind.domainSpecificSeparator 
			)
		);
		
		return new TargetStateAndElement(ParsingState.町名から丁目の階層種類をDBを用いて求める , 定義済みRange階層要素.町名より後);
	}

	 Optional search(
			ParsingTarget parsingTarget,
			AddressContext addressContext, 
			TreeNode targetNode,
			StringAndCharacterKinds address, 
			StringIndex currentIndex, 
			String nameOfPart,
			List results,
			階層要素 _階層要素,
			ZipBasedAddress zipBasedAddress) {
		
		Optional search = 
				search(addressContext, targetNode, address, currentIndex, nameOfPart, _階層要素 , zipBasedAddress);
		
		search.ifPresent(result->{
			if(result.matchKind.isNotMatched()) {
				return;
			}
			
			results.add(result);
			ResolverResultKind resolverResultKind = null;
			switch (_階層要素) {
			case 国域Top1:
//			case 都道府県:
				resolverResultKind = ResolverResultKindOfMatchKind.郵便番号辞書match都道府県;
				break;
			case 国域Top2:
//			case 市:
				resolverResultKind = ResolverResultKindOfMatchKind.郵便番号辞書match市;
				break;
			case 国域Top3:
//			case 町村:
				resolverResultKind = ResolverResultKindOfMatchKind.郵便番号辞書match区;
				break;
			case 国域Top4:
//			case 町または大字:
				resolverResultKind = ResolverResultKindOfMatchKind.郵便番号辞書match町または大字;
				break;
			default:
				throw new IllegalArgumentException();
			}
			parsingTarget.addResolverResult(new ResolverResult(resolverResultKind , MatchKind.Match));
		});
		return search;
	}


	private Optional search(
			AddressContext addressContext, 
			TreeNode targetNode,
			StringAndCharacterKinds address, 
			StringIndex currentIndex, 
			String nameOfPart,
			階層要素 _階層要素,
			ZipBasedAddress zipBasedAddress) {
		
		if (false == isEmpty(nameOfPart)) {
			
			Tuple2 match = address.match(nameOfPart, currentIndex , excludes);
			
			MatchKind matchKind = match._2();
			Range range = match._1();
			
			if(matchKind == MatchKind.NoMatch) {
				
				return Optional.of(
					new SearchResult(_階層要素 , StringIndex.invalid(), currentIndex, matchKind,zipBasedAddress,()->{} )
				);
				
			}else {
				return Optional.of( 
					new SearchResult(_階層要素 , 
						StringIndex.of(range.startIndexInclusive),StringIndex.of(range.endIndexExclusive) ,
						matchKind,zipBasedAddress,
						()->{
							addressContext.addChild(
									targetNode, 
									AddressElementFactory.of(nameOfPart, _階層要素 , 
											SeparatorKind.domainSpecificSeparator , SeparatorKind.domainSpecificSeparator 
									)
							);
						}
					)
				);
			}
		}
		return Optional.empty();
	}
	
	enum TownNameSeparator{
		町,
		区,
		群,
		市,
		村,
		;
		
		public String value() {
			return name();
		}
	}
	
	static class SearchResult implements Runnable{
		public final 階層要素 _階層要素;
		public final StringIndex indexOf;
		public final StringIndex next;
		public final Runnable runnable;
		public final MatchKind matchKind;
		public final ZipBasedAddress zipBasedAddress;
		
		public SearchResult(階層要素 _階層要素 , StringIndex indexOf, StringIndex next , 
				MatchKind matchKind,  ZipBasedAddress zipBasedAddress , Runnable runnable) {
			super();
			this._階層要素 = _階層要素;
			this.next = next;
			this.indexOf = indexOf;
			this.runnable = runnable;
			this.zipBasedAddress = zipBasedAddress;
			this.matchKind = matchKind;
		}
		public boolean isValid() {
			return indexOf.isValid();
		}
		
		public StringIndex next() {
			return next;
		}
		
		@Override
		public void run() {
			Unchecked.run(()->runnable.run());
		}
	}
	
	static boolean isEmpty(String word) {
		return word == null || word.equalsIgnoreCase("NULL") || word.isEmpty(); 
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy