All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.unlaxer.jaddress.parser.StringAndCharacterKinds Maven / Gradle / Ivy

package org.unlaxer.jaddress.parser;

import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.ListIterator;
import java.util.Set;
import java.util.function.BinaryOperator;
import java.util.function.Predicate;
import java.util.function.Supplier;
import java.util.stream.Collectors;

import org.unlaxer.Range;
import org.unlaxer.jaddress.normalizer.VariantNormalizer;

import io.vavr.Tuple2;

public class StringAndCharacterKinds implements List{
	
	static VariantNormalizer variantNormalizer = new VariantNormalizer();
	
	public static final StringAndCharacterKinds EMPTY = new StringAndCharacterKinds(Collections.emptyList());
	
	final List inner;
	
	final String joined;
	
	public StringAndCharacterKinds(String string) {
		this(StringAndCharacterKinds.of(string , false));
	}
	
	public StringAndCharacterKinds(List inner) {
		super();
		this.inner = inner;
		joined = inner.stream()
			.map(StringAndCharacterKind::get)
			.collect(Collectors.joining());
	}
	
	public int size() {
		return inner.size();
	}

	public boolean isEmpty() {
		return inner.isEmpty();
	}
	
	public boolean isPresent() {
		return false == inner.isEmpty();
	}

	public boolean contains(Object o) {
		return inner.contains(o);
	}

	public Iterator iterator() {
		return inner.iterator();
	}

	public Object[] toArray() {
		return inner.toArray();
	}

	public  T[] toArray(T[] a) {
		return inner.toArray(a);
	}

	public boolean add(StringAndCharacterKind e) {
		return inner.add(e);
	}

	public boolean remove(Object o) {
		return inner.remove(o);
	}

	public boolean containsAll(Collection c) {
		return inner.containsAll(c);
	}

	public boolean addAll(Collection c) {
		return inner.addAll(c);
	}

	public boolean addAll(int index, Collection c) {
		return inner.addAll(index, c);
	}

	public boolean removeAll(Collection c) {
		return inner.removeAll(c);
	}


	public boolean retainAll(Collection c) {
		return inner.retainAll(c);
	}

	public void clear() {
		inner.clear();
	}

	public boolean equals(Object o) {
		return inner.equals(o);
	}

	public int hashCode() {
		return inner.hashCode();
	}

	public StringAndCharacterKind get(int index) {
		return inner.get(index);
	}

	public StringAndCharacterKind set(int index, StringAndCharacterKind element) {
		return inner.set(index, element);
	}

	public void add(int index, StringAndCharacterKind element) {
		inner.add(index, element);
	}

	public StringAndCharacterKind remove(int index) {
		return inner.remove(index);
	}

	public int indexOf(Object o) {
		return inner.indexOf(o);
	}

	public int lastIndexOf(Object o) {
		return inner.lastIndexOf(o);
	}

	public ListIterator listIterator() {
		return inner.listIterator();
	}

	public ListIterator listIterator(int index) {
		return inner.listIterator(index);
	}
	
	public ListIterator listIteratorFromLast() {
		return inner.listIterator(inner.size());
	}

	public List subList(int fromIndexInclusive, int toIndexExclusive) {
		return inner.subList(fromIndexInclusive, toIndexExclusive);
	}
	
	public StringAndCharacterKinds subListAsStringAndCharacterKinds(ListIndex  fromIndexInclusive, ListIndex toIndexExclusive) {
		return new StringAndCharacterKinds(inner.subList(fromIndexInclusive.value, toIndexExclusive.value));
	}
	
	public String joined() {
		return joined;
	}
	
	public final static  BinaryOperator joiner = (left,right)->{
		
		left.inner();
		
		List sum = new ArrayList<>();
		
		sum.addAll(left.inner());
		sum.addAll(right.inner());
		
		StringAndCharacterKinds joined = new StringAndCharacterKinds(sum);
		
		return joined;
		
	};
	
	public List inner(){
		return inner;
	}
	
	public static StringAndCharacterKinds join(StringAndCharacterKinds left , StringAndCharacterKinds right) {
		
		return joiner.apply(left, right);
	}
	
	public StringAndCharacterKinds join(StringAndCharacterKinds adding) {
		if(adding.isEmpty()){
			return this;
		}
		return StringAndCharacterKinds.join(this , adding);
	}
	
	
	public static StringAndCharacterKinds empty() {
		return EMPTY;
	}
	
	List> characterKindIndexOf(CharacterKind... characterKinds){
		
		List> indexes = new ArrayList<>();
		
		int index = 0;
		for(StringAndCharacterKind stringAndCharacterKind : inner) {
			
			for (CharacterKind characterKind : characterKinds) {
				if(stringAndCharacterKind.characterKind == characterKind) {
					indexes.add(new Tuple2<>(index , stringAndCharacterKind));
					break;
				}
			}
			index++;
		}
		return indexes;
	}
	
	public StringAndCharacterKinds cutFilterchracterKindIndexOf(CharacterKind... characterKinds){
		
		List results  = new ArrayList<>();
		
		for(StringAndCharacterKind stringAndCharacterKind : inner) {
			
			boolean isMatch= false;
			
			for (CharacterKind characterKind : characterKinds) {
				if(stringAndCharacterKind.characterKind == characterKind) {
					isMatch = true;
					break;
				}
			}
			if(false == isMatch) {
				results.add(stringAndCharacterKind);
			}
		}
		return new StringAndCharacterKinds(results);
	}

	@Override
	public String toString() {
		
		return inner.stream()
			.map(StringAndCharacterKind::toString)
			.collect(Collectors.joining(",", "[", "]"));
	}
	
	public StringAndCharacterKinds strip() {
		String strip = joined.strip();
		if(strip.length() != joined.length()) {
			return StringAndCharacterKinds.of(strip,false);
		}
		return this;
	}
	
	static final Predicate notSpaceAndDelimitor =
		Predicate.not(characterKind-> 
			characterKind == CharacterKind.delimitorHyphen ||
			characterKind == CharacterKind.delimitorJapaneseSymbol ||
			characterKind == CharacterKind.delimitorSpace
		);
	
	public StringAndCharacterKinds stripIncludesSymbols() {
		return strip(notSpaceAndDelimitor);
	}
	
	public StringAndCharacterKinds strip(Predicate matchWithNonStrip) {
		
		ListIndex left = indexOf(notSpaceAndDelimitor , TopOrBottom.TOP);
		
		if(left.value == -1 ) {
			return StringAndCharacterKinds.empty();
		}
		
		ListIndex right = indexOf(notSpaceAndDelimitor , TopOrBottom.BOTTOM);
		
		if(left.value > right.value) {
			return StringAndCharacterKinds.empty();
		}
		
		List subList = inner.subList(left.value, right.value);
		
		return new StringAndCharacterKinds(subList);
	}
	
	public ListIndex indexOf(Predicate predicate , TopOrBottom from) {
		
		ListIterator iterator = from == TopOrBottom.TOP ?
			listIterator():
			listIteratorFromLast();
			
		Supplier checker = from == TopOrBottom.TOP ?
			()->iterator.hasNext():
			()->iterator.hasPrevious();
			
		Supplier fetcher = from == TopOrBottom.TOP ?
			()->iterator.next():
			()->iterator.previous();
			
		Supplier indexFetcher = from == TopOrBottom.TOP ?
				()->iterator.nextIndex():
				()->iterator.previousIndex()+1;
			
		Integer index = null;
		boolean match = false;
		while (checker.get()) {
			index = indexFetcher.get();
			StringAndCharacterKind stringAndCharacterKind = fetcher.get();
			CharacterKind characterKind = stringAndCharacterKind.characterKind;
			if(predicate.test(characterKind)) {
				match = true;
				break;
			}
		}
		return ListIndex.of(match ? index : -1);
	}

	
	public StringAndCharacterKinds substring(
			StringIndex  beginIndexInclusive, 
			StringIndex endIndexExclusive , 
			SeparatorKind leadingSeparator ,
			SeparatorKind tailingSeparator) {
		
		String substring = joined.substring(beginIndexInclusive.value, endIndexExclusive.value);
		
		return StringAndCharacterKinds.of(substring , false);
	}
	
	public StringAndCharacterKinds substring(
			StringIndex  beginIndexInclusive, 
			SeparatorKind leadingSeparator ,
			SeparatorKind tailingSeparator) {
		
		String substring = joined.substring(beginIndexInclusive.value);
		
		return StringAndCharacterKinds.of(substring , false);
	}

	
	public boolean contains(String word) {
		return joined().contains(word);
	}
	
	public StringIndex indexOf(String word , StringIndex fromIndex) {
		String normalize = variantNormalizer.normalizeForAddress(joined);
		String normalizedWord = variantNormalizer.normalizeForAddress(word);
		
		if(normalize.length() == joined.length() && word.length() == normalizedWord.length()) {
			return StringIndex.of(normalize.indexOf(normalizedWord, fromIndex.value));
		}
		return StringIndex.of(joined().indexOf(word, fromIndex.value));
	}
	
	public Tuple2 match(String word , StringIndex fromIndex , Set excludes){
		StringIndex indexOf = indexOf(word , fromIndex);
		if(indexOf.isValid()) {
			return new Tuple2<>(
				new Range(indexOf.value, indexOf.value+word.length()), 
				MatchKind.Match
			);
		}
		int indexFrom = fromIndex.value;
		String base = joined().substring(indexFrom).strip();
		
		String regulatedWord="";
		for(int i =0 ; i< word.length();i++) {
			String substring = word.substring(i,i+1);
			if(false == excludes.contains(substring)) {
				regulatedWord += substring;
			}
		}
		
		int pointer = 0;
		
		String notMatches="";
		
		List pointers = new ArrayList();
		for(int i =0 ; i< regulatedWord.length();i++) {
			String substring = regulatedWord.substring(i,i+1);

			int indexOfMatch = base.indexOf(substring,pointer);
			if(indexOfMatch == -1) {
				return new Tuple2<>(new Range(StringIndex.invalid().value), MatchKind.NoMatch);
			}
			if(pointer+1(range, MatchKind.NormalizedMatch);
		}
		// TODO fuzzy match?
		return new Tuple2<>(new Range(StringIndex.invalid().value), MatchKind.NoMatch);
	}
	
	private int matches(String word , Set matchStrings) {
		int counter=0;
		for(int i =0 ; i< word.length();i++) {
			String substring = word.substring(i,i+1);
			if(matchStrings.contains(substring)) {
				counter++;
			}
		}
		return counter;
	}
	
	/**
	 * @param string
	 * @param concatJapaneseSymbolToNormal
	 * @return StringAndCharacterKinds
	 * 
	 * concatJapaneseSymbolToNormal true -> "肉ー" is [normal("肉ー")]
	 * concatJapaneseSymbolToNormal false -> "肉ー" is [normal("肉"),delimitorJapaneseSymbol("ー")]
	 */
	public static StringAndCharacterKinds of(
			String string , 
			boolean concatJapaneseSymbolToNormal) {
		
		if(string == null || string.isEmpty()) {
			return StringAndCharacterKinds.empty();
		}
		
		List collect = CharacterKinds.codePointAndCharacterKinds(string);
		
		List> rans = new ArrayList>();
		
		CharacterKind last = null;
		
		var ran = new ArrayList();
		
		for (CodePointAndCharacterKind codePointAndCharacterKind : collect) {
			
			CharacterKind characterKind = codePointAndCharacterKind.characterKind;
			
			if(concatJapaneseSymbolToNormal && last != null && last.isJapanese() &&  characterKind.isJapanese()) {
				
				codePointAndCharacterKind = codePointAndCharacterKind.as(CharacterKind.normal);
				
				ran.add(codePointAndCharacterKind);

			}else if(last != null && (last.isHiragana() || last.isKatakana()) &&  characterKind == CharacterKind.cyouon) {
				
				codePointAndCharacterKind = codePointAndCharacterKind.as(last);
				ran.add(codePointAndCharacterKind);
				
			}else  if(last != characterKind) {
				
				if(false == ran.isEmpty()) {
					rans.add(new ArrayList<>(ran));
					ran.clear();
				}
				ran.add(codePointAndCharacterKind);
				last = characterKind;
				
			}else {
				ran.add(codePointAndCharacterKind);
			}
		}
		if(false == ran.isEmpty()) {
			rans.add(ran);
		}
		
		List results = new ArrayList<>();
		
		for (List sameKindList : rans) {
			
			CharacterKind characterKind = sameKindList.get(0).characterKind;
			
			int[] codePoints = new int[sameKindList.size()];

			int index =0;
			
			for (CodePointAndCharacterKind codePointAndCharacterKind : sameKindList) {
				
				codePoints[index++] = codePointAndCharacterKind.codePoint;
			}
			
			String ranString = new String(codePoints , 0 , codePoints.length);
			StringAndCharacterKind stringAndCharacterKind = new StringAndCharacterKind(characterKind, ranString);
			
			results.add(stringAndCharacterKind);
		}
		return new StringAndCharacterKinds(results);
	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy