All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.unlaxer.jaddress.parser.CharacterKinds Maven / Gradle / Ivy

The newest version!
package org.unlaxer.jaddress.parser;

import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.ListIterator;
import java.util.Set;
import java.util.function.Predicate;
import java.util.stream.Collectors;

public class CharacterKinds implements
	SingleCharacterKindAllMatch ,
	SingleCharacterKindAnyMatch ,
	SingleCharacterKindNotMatch,
	CombinedCharacterKindAllMatch ,
	CombinedCharacterKindAnyMatch ,
	CombinedCharacterKindNotMatch{
	
	// TODO support sequential matching
	final Set characterKinds;
	
	public static final CharacterKinds EMPTY = new CharacterKinds();

	public CharacterKinds(Collection characterKinds) {
		super();
		this.characterKinds = new HashSet<>(characterKinds);
	}
	
	public CharacterKinds(CharacterKind... characterKinds) {
		super();
		this.characterKinds = Set.of(characterKinds);
	}
	
	public CharacterKinds(StringAndCharacterKinds stringAndCharacterKinds) {
		this(
			stringAndCharacterKinds.inner.stream()
				.map(StringAndCharacterKind::characterKind)
				.collect(Collectors.toList())
		);
	}
	
	public static CharacterKinds empty() {
		return EMPTY;
	}
	
	public Set collection(){
		return characterKinds;
	}
	
	public boolean allMatch(CharacterKind characterKind){
		return characterKinds.stream()
			.allMatch(kind->kind == characterKind);
	}
	
	public boolean anyMatch(CharacterKind characterKind){
		return characterKinds.stream()
			.anyMatch(kind->kind == characterKind);
	}
	
	public boolean anyMatch(Predicate predicate){
		return characterKinds.stream()
			.anyMatch(predicate);
	}

	
	public boolean notMatch(CharacterKind characterKind){
		return false == characterKinds.stream()
			.anyMatch(kind->kind == characterKind);
	}

	@Override
	public boolean isArabicNumber() {
		return allMatch(CharacterKind.arabicNumber);
	}

	@Override
	public boolean isSymbol() {
		return allMatch(CharacterKind.symbol);
	}

	@Override
	public boolean isAlphabet() {
		return allMatch(CharacterKind.alphabet);
	}

	@Override
	public boolean isJapaneseNumber() {
		return allMatch(CharacterKind.japaneseAddressNumber);
	}

	@Override
	public boolean isHiragana() {
		return allMatch(CharacterKind.hiragana);
	}

	@Override
	public boolean isKatakana() {
		return allMatch(CharacterKind.katakana);
	}

	@Override
	public boolean isDelimitorHyphen() {
		return allMatch(CharacterKind.delimitorHyphen);
	}

	@Override
	public boolean isDelimitorSlash() {
		return allMatch(CharacterKind.delimitorSlash);
	}

	@Override
	public boolean isDelimitorSpace() {
		return allMatch(CharacterKind.delimitorSpace);
	}

	@Override
	public boolean isDelimitorComma() {
		return allMatch(CharacterKind.delimitorComma);
	}

	@Override
	public boolean isDelimitorJapanese() {
		return allMatch(CharacterKind.delimitorJapanese);
	}

	@Override
	public boolean isDelimitorJapaneseCyoumeAddress() {
		return allMatch(CharacterKind.suffix丁目);
	}

	@Override
	public boolean isDelimitorJapaneseBanchiAddress() {
		return allMatch(CharacterKind.suffix地番);
	}

	@Override
	public boolean isDelimitorJapaneseGouAddress() {
		return allMatch(CharacterKind.suffix号);
	}

	@Override
	public boolean isNormal() {
		return allMatch(CharacterKind.normal);
	}
	
	boolean allMatch(Predicate predicate) {
		return characterKinds.stream()
				.allMatch(predicate);
	}

	@Override
	public boolean isJapanese() {
		return allMatch(CharacterKind::isJapanese);
	}

	@Override
	public boolean isDelimitor() {
		return allMatch(CharacterKind::isDelimitor);
	}

	@Override
	public boolean isJapanesAddressDelimitor() {
		return allMatch(CharacterKind::isJapanesAddressDelimitor);
	}

	@Override
	public boolean isNumber() {
		return allMatch(CharacterKind::isNumber);
	}

	@Override
	public boolean anyMatchJapanese() {
		return anyMatch(CharacterKind::isJapanese);
	}

	@Override
	public boolean anyMatchDelimitor() {
		return anyMatch(CharacterKind::isDelimitor);
	}

	@Override
	public boolean anyMatchJapanesAddressDelimitor() {
		return anyMatch(CharacterKind::isJapanesAddressDelimitor);
	}

	@Override
	public boolean anyMatchNumber() {
		return anyMatch(CharacterKind::isNumber);
	}

	@Override
	public boolean anyMatchArabicNumber() {
		return anyMatch(CharacterKind.arabicNumber);
	}

	@Override
	public boolean anyMatchSymbol() {
		return anyMatch(CharacterKind.symbol);
	}

	@Override
	public boolean anyMatchAlphabet() {
		return anyMatch(CharacterKind.alphabet);
	}

	@Override
	public boolean anyMatchJapaneseNumber() {
		return anyMatch(CharacterKind.japaneseAddressNumber);
	}

	@Override
	public boolean anyMatchHiragana() {
		return anyMatch(CharacterKind.hiragana);
	}

	@Override
	public boolean anyMatchKatakana() {
		return anyMatch(CharacterKind.katakana);
	}

	@Override
	public boolean anyMatchDelimitorHyphen() {
		return anyMatch(CharacterKind.delimitorHyphen);
	}

	@Override
	public boolean anyMatchDelimitorSlash() {
		return anyMatch(CharacterKind.delimitorSlash);
	}

	@Override
	public boolean anyMatchDelimitorSpace() {
		return anyMatch(CharacterKind.delimitorSpace);
	}

	@Override
	public boolean anyMatchDelimitorComma() {
		return anyMatch(CharacterKind.delimitorComma);
	}

	@Override
	public boolean anyMatchDelimitorJapanese() {
		return anyMatch(CharacterKind.delimitorJapanese);
	}

	@Override
	public boolean anyMatchDelimitorJapaneseCyoumeAddress() {
		return anyMatch(CharacterKind.suffix丁目);
	}

	@Override
	public boolean anyMatchDelimitorJapaneseBanchiAddress() {
		return anyMatch(CharacterKind.suffix地番);
	}

	@Override
	public boolean anyMatchDelimitorJapaneseGouAddress() {
		return anyMatch(CharacterKind.suffix号);
	}

	@Override
	public boolean anyMatchNormal() {
		return anyMatch(CharacterKind.normal);
	}

	@Override
	public boolean isAllKind() {
		return true;
	}

	public CharacterKinds add(CharacterKinds characterKinds2) {
		List characterKinds = new ArrayList();
		characterKinds.addAll(this.characterKinds);
		characterKinds.addAll(characterKinds2.characterKinds);
		
		return new CharacterKinds(characterKinds);
	}

	@Override
	public String toString() {
		String collect = characterKinds.stream()
			.map(CharacterKind::name)
			.collect(Collectors.joining(",","[","]"));
		return collect;
	}
	
	public boolean isEmpty() {
		return characterKinds.isEmpty();
	}
	
//	public static StringAndCharacterKinds stringAndCharacterKindsOf(String string) {
//		return stringAndCharacterKindsOf(string , false);
//	}
	
	public static List codePointAndCharacterKinds(String string) {
		
		ListIterator listIterator = string.codePoints().boxed().collect(Collectors.toList()).listIterator();

		List results = new ArrayList();
		while (listIterator.hasNext()) {
			List codePointAndCharacterKinds = codePointAndCharacterKinds(listIterator);
			results.addAll(codePointAndCharacterKinds);
		}
		return results;
	}
	
	private static List codePointAndCharacterKinds(ListIterator codePointIterator){

		
		for(CharacterKind characterKind : CharacterKind.values()) {
			
			List matched = characterKind.matched(codePointIterator);
			if(matched.isEmpty()) {
				continue;
			}
			return create(matched , characterKind);
		}
		throw new IllegalArgumentException();
	}
	
	static List create(List wordOfDictionary,
			CharacterKind characterKind) {
		
		List collect = wordOfDictionary.stream()
			.map(codePoint->new CodePointAndCharacterKind(characterKind, codePoint))
			.collect(Collectors.toList());
		
		return collect;
	}

	@Override
	public boolean isTerminator() {
		return allMatch(CharacterKind.terminator);
	}
	
	/**
	 * @param string
	 * @param concatJapaneseSymbolToNormal
	 * @return StringAndCharacterKinds
	 * 
	 * concatJapaneseSymbolToNormal true -> "肉ー" is [normal("肉ー")]
	 * concatJapaneseSymbolToNormal false -> "肉ー" is [normal("肉"),delimitorJapaneseSymbol("ー")]
	 */
	public static CharacterKinds of(
			String string , 
			boolean concatJapaneseSymbolToNormal) {
		
		if(string == null || string.isEmpty()) {
			return CharacterKinds.empty();
		}
		
		List collect = codePointAndCharacterKinds(string);
		
		List> rans = new ArrayList>();
		
		CharacterKind last = null;
		
		var ran = new ArrayList();
		
		for (CodePointAndCharacterKind codePointAndCharacterKind : collect) {
			
			CharacterKind characterKind = codePointAndCharacterKind.characterKind;
			
			if(concatJapaneseSymbolToNormal && last != null && last.isJapanese() &&  characterKind.isJapanese()) {
				
				codePointAndCharacterKind = codePointAndCharacterKind.as(CharacterKind.normal);
				
				ran.add(codePointAndCharacterKind);

			}else if(last != null && (last.isHiragana() || last.isKatakana()) &&  characterKind == CharacterKind.cyouon) {
				
				codePointAndCharacterKind = codePointAndCharacterKind.as(last);
				ran.add(codePointAndCharacterKind);
				
			}else  if(last != characterKind) {
				
				if(false == ran.isEmpty()) {
					rans.add(new ArrayList<>(ran));
					ran.clear();
				}
				ran.add(codePointAndCharacterKind);
				last = characterKind;
				
			}else {
				ran.add(codePointAndCharacterKind);
			}
		}
		if(false == ran.isEmpty()) {
			rans.add(ran);
		}
		
		List results = new ArrayList<>();
		
		for (List sameKindList : rans) {
			
			CharacterKind characterKind = sameKindList.get(0).characterKind;
			
			int[] codePoints = new int[sameKindList.size()];

			int index =0;
			
			for (CodePointAndCharacterKind codePointAndCharacterKind : sameKindList) {
				
				codePoints[index++] = codePointAndCharacterKind.codePoint;
			}
			
			results.add(characterKind);
		}
		return new CharacterKinds(results);
	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy