org.unlaxer.jaddress.parser.StringAndCharacterKinds Maven / Gradle / Ivy
package org.unlaxer.jaddress.parser;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.ListIterator;
import java.util.Set;
import java.util.function.BinaryOperator;
import java.util.function.Predicate;
import java.util.function.Supplier;
import java.util.stream.Collectors;
import org.unlaxer.Range;
import org.unlaxer.jaddress.normalizer.VariantNormalizer;
import io.vavr.Tuple2;
public class StringAndCharacterKinds implements List{
static VariantNormalizer variantNormalizer = new VariantNormalizer();
public static final StringAndCharacterKinds EMPTY = new StringAndCharacterKinds(Collections.emptyList());
final List inner;
final String joined;
public StringAndCharacterKinds(String string) {
this(StringAndCharacterKinds.of(string , false));
}
public StringAndCharacterKinds(List inner) {
super();
this.inner = inner;
joined = inner.stream()
.map(StringAndCharacterKind::get)
.collect(Collectors.joining());
}
public int size() {
return inner.size();
}
public boolean isEmpty() {
return inner.isEmpty();
}
public boolean isPresent() {
return false == inner.isEmpty();
}
public boolean contains(Object o) {
return inner.contains(o);
}
public Iterator iterator() {
return inner.iterator();
}
public Object[] toArray() {
return inner.toArray();
}
public T[] toArray(T[] a) {
return inner.toArray(a);
}
public boolean add(StringAndCharacterKind e) {
return inner.add(e);
}
public boolean remove(Object o) {
return inner.remove(o);
}
public boolean containsAll(Collection> c) {
return inner.containsAll(c);
}
public boolean addAll(Collection extends StringAndCharacterKind> c) {
return inner.addAll(c);
}
public boolean addAll(int index, Collection extends StringAndCharacterKind> c) {
return inner.addAll(index, c);
}
public boolean removeAll(Collection> c) {
return inner.removeAll(c);
}
public boolean retainAll(Collection> c) {
return inner.retainAll(c);
}
public void clear() {
inner.clear();
}
public boolean equals(Object o) {
return inner.equals(o);
}
public int hashCode() {
return inner.hashCode();
}
public StringAndCharacterKind get(int index) {
return inner.get(index);
}
public StringAndCharacterKind set(int index, StringAndCharacterKind element) {
return inner.set(index, element);
}
public void add(int index, StringAndCharacterKind element) {
inner.add(index, element);
}
public StringAndCharacterKind remove(int index) {
return inner.remove(index);
}
public int indexOf(Object o) {
return inner.indexOf(o);
}
public int lastIndexOf(Object o) {
return inner.lastIndexOf(o);
}
public ListIterator listIterator() {
return inner.listIterator();
}
public ListIterator listIterator(int index) {
return inner.listIterator(index);
}
public ListIterator listIteratorFromLast() {
return inner.listIterator(inner.size());
}
public List subList(int fromIndexInclusive, int toIndexExclusive) {
return inner.subList(fromIndexInclusive, toIndexExclusive);
}
public StringAndCharacterKinds subListAsStringAndCharacterKinds(ListIndex fromIndexInclusive, ListIndex toIndexExclusive) {
return new StringAndCharacterKinds(inner.subList(fromIndexInclusive.value, toIndexExclusive.value));
}
public String joined() {
return joined;
}
public final static BinaryOperator joiner = (left,right)->{
left.inner();
List sum = new ArrayList<>();
sum.addAll(left.inner());
sum.addAll(right.inner());
StringAndCharacterKinds joined = new StringAndCharacterKinds(sum);
return joined;
};
public List inner(){
return inner;
}
public static StringAndCharacterKinds join(StringAndCharacterKinds left , StringAndCharacterKinds right) {
return joiner.apply(left, right);
}
public StringAndCharacterKinds join(StringAndCharacterKinds adding) {
if(adding.isEmpty()){
return this;
}
return StringAndCharacterKinds.join(this , adding);
}
public static StringAndCharacterKinds empty() {
return EMPTY;
}
List> characterKindIndexOf(CharacterKind... characterKinds){
List> indexes = new ArrayList<>();
int index = 0;
for(StringAndCharacterKind stringAndCharacterKind : inner) {
for (CharacterKind characterKind : characterKinds) {
if(stringAndCharacterKind.characterKind == characterKind) {
indexes.add(new Tuple2<>(index , stringAndCharacterKind));
break;
}
}
index++;
}
return indexes;
}
public StringAndCharacterKinds cutFilterchracterKindIndexOf(CharacterKind... characterKinds){
List results = new ArrayList<>();
for(StringAndCharacterKind stringAndCharacterKind : inner) {
boolean isMatch= false;
for (CharacterKind characterKind : characterKinds) {
if(stringAndCharacterKind.characterKind == characterKind) {
isMatch = true;
break;
}
}
if(false == isMatch) {
results.add(stringAndCharacterKind);
}
}
return new StringAndCharacterKinds(results);
}
@Override
public String toString() {
return inner.stream()
.map(StringAndCharacterKind::toString)
.collect(Collectors.joining(",", "[", "]"));
}
public StringAndCharacterKinds strip() {
String strip = joined.strip();
if(strip.length() != joined.length()) {
return StringAndCharacterKinds.of(strip,false);
}
return this;
}
static final Predicate notSpaceAndDelimitor =
Predicate.not(characterKind->
characterKind == CharacterKind.delimitorHyphen ||
characterKind == CharacterKind.delimitorJapaneseSymbol ||
characterKind == CharacterKind.delimitorSpace
);
public StringAndCharacterKinds stripIncludesSymbols() {
return strip(notSpaceAndDelimitor);
}
public StringAndCharacterKinds strip(Predicate matchWithNonStrip) {
ListIndex left = indexOf(notSpaceAndDelimitor , TopOrBottom.TOP);
if(left.value == -1 ) {
return StringAndCharacterKinds.empty();
}
ListIndex right = indexOf(notSpaceAndDelimitor , TopOrBottom.BOTTOM);
if(left.value > right.value) {
return StringAndCharacterKinds.empty();
}
List subList = inner.subList(left.value, right.value);
return new StringAndCharacterKinds(subList);
}
public ListIndex indexOf(Predicate predicate , TopOrBottom from) {
ListIterator iterator = from == TopOrBottom.TOP ?
listIterator():
listIteratorFromLast();
Supplier checker = from == TopOrBottom.TOP ?
()->iterator.hasNext():
()->iterator.hasPrevious();
Supplier fetcher = from == TopOrBottom.TOP ?
()->iterator.next():
()->iterator.previous();
Supplier indexFetcher = from == TopOrBottom.TOP ?
()->iterator.nextIndex():
()->iterator.previousIndex()+1;
Integer index = null;
boolean match = false;
while (checker.get()) {
index = indexFetcher.get();
StringAndCharacterKind stringAndCharacterKind = fetcher.get();
CharacterKind characterKind = stringAndCharacterKind.characterKind;
if(predicate.test(characterKind)) {
match = true;
break;
}
}
return ListIndex.of(match ? index : -1);
}
public StringAndCharacterKinds substring(
StringIndex beginIndexInclusive,
StringIndex endIndexExclusive ,
SeparatorKind leadingSeparator ,
SeparatorKind tailingSeparator) {
String substring = joined.substring(beginIndexInclusive.value, endIndexExclusive.value);
return StringAndCharacterKinds.of(substring , false);
}
public StringAndCharacterKinds substring(
StringIndex beginIndexInclusive,
SeparatorKind leadingSeparator ,
SeparatorKind tailingSeparator) {
String substring = joined.substring(beginIndexInclusive.value);
return StringAndCharacterKinds.of(substring , false);
}
public boolean contains(String word) {
return joined().contains(word);
}
public StringIndex indexOf(String word , StringIndex fromIndex) {
String normalize = variantNormalizer.normalizeForAddress(joined);
String normalizedWord = variantNormalizer.normalizeForAddress(word);
if(normalize.length() == joined.length() && word.length() == normalizedWord.length()) {
return StringIndex.of(normalize.indexOf(normalizedWord, fromIndex.value));
}
return StringIndex.of(joined().indexOf(word, fromIndex.value));
}
public Tuple2 match(String word , StringIndex fromIndex , Set excludes){
StringIndex indexOf = indexOf(word , fromIndex);
if(indexOf.isValid()) {
return new Tuple2<>(
new Range(indexOf.value, indexOf.value+word.length()),
MatchKind.Match
);
}
int indexFrom = fromIndex.value;
String base = joined().substring(indexFrom).strip();
String regulatedWord="";
for(int i =0 ; i< word.length();i++) {
String substring = word.substring(i,i+1);
if(false == excludes.contains(substring)) {
regulatedWord += substring;
}
}
int pointer = 0;
String notMatches="";
List pointers = new ArrayList();
for(int i =0 ; i< regulatedWord.length();i++) {
String substring = regulatedWord.substring(i,i+1);
int indexOfMatch = base.indexOf(substring,pointer);
if(indexOfMatch == -1) {
return new Tuple2<>(new Range(StringIndex.invalid().value), MatchKind.NoMatch);
}
if(pointer+1(range, MatchKind.NormalizedMatch);
}
// TODO fuzzy match?
return new Tuple2<>(new Range(StringIndex.invalid().value), MatchKind.NoMatch);
}
private int matches(String word , Set matchStrings) {
int counter=0;
for(int i =0 ; i< word.length();i++) {
String substring = word.substring(i,i+1);
if(matchStrings.contains(substring)) {
counter++;
}
}
return counter;
}
/**
* @param string
* @param concatJapaneseSymbolToNormal
* @return StringAndCharacterKinds
*
* concatJapaneseSymbolToNormal true -> "肉ー" is [normal("肉ー")]
* concatJapaneseSymbolToNormal false -> "肉ー" is [normal("肉"),delimitorJapaneseSymbol("ー")]
*/
public static StringAndCharacterKinds of(
String string ,
boolean concatJapaneseSymbolToNormal) {
if(string == null || string.isEmpty()) {
return StringAndCharacterKinds.empty();
}
List collect = CharacterKinds.codePointAndCharacterKinds(string);
List> rans = new ArrayList>();
CharacterKind last = null;
var ran = new ArrayList();
for (CodePointAndCharacterKind codePointAndCharacterKind : collect) {
CharacterKind characterKind = codePointAndCharacterKind.characterKind;
if(concatJapaneseSymbolToNormal && last != null && last.isJapanese() && characterKind.isJapanese()) {
codePointAndCharacterKind = codePointAndCharacterKind.as(CharacterKind.normal);
ran.add(codePointAndCharacterKind);
}else if(last != null && (last.isHiragana() || last.isKatakana()) && characterKind == CharacterKind.cyouon) {
codePointAndCharacterKind = codePointAndCharacterKind.as(last);
ran.add(codePointAndCharacterKind);
}else if(last != characterKind) {
if(false == ran.isEmpty()) {
rans.add(new ArrayList<>(ran));
ran.clear();
}
ran.add(codePointAndCharacterKind);
last = characterKind;
}else {
ran.add(codePointAndCharacterKind);
}
}
if(false == ran.isEmpty()) {
rans.add(ran);
}
List results = new ArrayList<>();
for (List sameKindList : rans) {
CharacterKind characterKind = sameKindList.get(0).characterKind;
int[] codePoints = new int[sameKindList.size()];
int index =0;
for (CodePointAndCharacterKind codePointAndCharacterKind : sameKindList) {
codePoints[index++] = codePointAndCharacterKind.codePoint;
}
String ranString = new String(codePoints , 0 , codePoints.length);
StringAndCharacterKind stringAndCharacterKind = new StringAndCharacterKind(characterKind, ranString);
results.add(stringAndCharacterKind);
}
return new StringAndCharacterKinds(results);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy