org.unlaxer.jaddress.parser.CharacterKinds Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of japanese-address-parser Show documentation
Show all versions of japanese-address-parser Show documentation
a simplejapanese address parser
The newest version!
package org.unlaxer.jaddress.parser;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.ListIterator;
import java.util.Set;
import java.util.function.Predicate;
import java.util.stream.Collectors;
public class CharacterKinds implements
SingleCharacterKindAllMatch ,
SingleCharacterKindAnyMatch ,
SingleCharacterKindNotMatch,
CombinedCharacterKindAllMatch ,
CombinedCharacterKindAnyMatch ,
CombinedCharacterKindNotMatch{
// TODO support sequential matching
final Set characterKinds;
public static final CharacterKinds EMPTY = new CharacterKinds();
public CharacterKinds(Collection characterKinds) {
super();
this.characterKinds = new HashSet<>(characterKinds);
}
public CharacterKinds(CharacterKind... characterKinds) {
super();
this.characterKinds = Set.of(characterKinds);
}
public CharacterKinds(StringAndCharacterKinds stringAndCharacterKinds) {
this(
stringAndCharacterKinds.inner.stream()
.map(StringAndCharacterKind::characterKind)
.collect(Collectors.toList())
);
}
public static CharacterKinds empty() {
return EMPTY;
}
public Set collection(){
return characterKinds;
}
public boolean allMatch(CharacterKind characterKind){
return characterKinds.stream()
.allMatch(kind->kind == characterKind);
}
public boolean anyMatch(CharacterKind characterKind){
return characterKinds.stream()
.anyMatch(kind->kind == characterKind);
}
public boolean anyMatch(Predicate super CharacterKind> predicate){
return characterKinds.stream()
.anyMatch(predicate);
}
public boolean notMatch(CharacterKind characterKind){
return false == characterKinds.stream()
.anyMatch(kind->kind == characterKind);
}
@Override
public boolean isArabicNumber() {
return allMatch(CharacterKind.arabicNumber);
}
@Override
public boolean isSymbol() {
return allMatch(CharacterKind.symbol);
}
@Override
public boolean isAlphabet() {
return allMatch(CharacterKind.alphabet);
}
@Override
public boolean isJapaneseNumber() {
return allMatch(CharacterKind.japaneseAddressNumber);
}
@Override
public boolean isHiragana() {
return allMatch(CharacterKind.hiragana);
}
@Override
public boolean isKatakana() {
return allMatch(CharacterKind.katakana);
}
@Override
public boolean isDelimitorHyphen() {
return allMatch(CharacterKind.delimitorHyphen);
}
@Override
public boolean isDelimitorSlash() {
return allMatch(CharacterKind.delimitorSlash);
}
@Override
public boolean isDelimitorSpace() {
return allMatch(CharacterKind.delimitorSpace);
}
@Override
public boolean isDelimitorComma() {
return allMatch(CharacterKind.delimitorComma);
}
@Override
public boolean isDelimitorJapanese() {
return allMatch(CharacterKind.delimitorJapanese);
}
@Override
public boolean isDelimitorJapaneseCyoumeAddress() {
return allMatch(CharacterKind.suffix丁目);
}
@Override
public boolean isDelimitorJapaneseBanchiAddress() {
return allMatch(CharacterKind.suffix地番);
}
@Override
public boolean isDelimitorJapaneseGouAddress() {
return allMatch(CharacterKind.suffix号);
}
@Override
public boolean isNormal() {
return allMatch(CharacterKind.normal);
}
boolean allMatch(Predicate super CharacterKind> predicate) {
return characterKinds.stream()
.allMatch(predicate);
}
@Override
public boolean isJapanese() {
return allMatch(CharacterKind::isJapanese);
}
@Override
public boolean isDelimitor() {
return allMatch(CharacterKind::isDelimitor);
}
@Override
public boolean isJapanesAddressDelimitor() {
return allMatch(CharacterKind::isJapanesAddressDelimitor);
}
@Override
public boolean isNumber() {
return allMatch(CharacterKind::isNumber);
}
@Override
public boolean anyMatchJapanese() {
return anyMatch(CharacterKind::isJapanese);
}
@Override
public boolean anyMatchDelimitor() {
return anyMatch(CharacterKind::isDelimitor);
}
@Override
public boolean anyMatchJapanesAddressDelimitor() {
return anyMatch(CharacterKind::isJapanesAddressDelimitor);
}
@Override
public boolean anyMatchNumber() {
return anyMatch(CharacterKind::isNumber);
}
@Override
public boolean anyMatchArabicNumber() {
return anyMatch(CharacterKind.arabicNumber);
}
@Override
public boolean anyMatchSymbol() {
return anyMatch(CharacterKind.symbol);
}
@Override
public boolean anyMatchAlphabet() {
return anyMatch(CharacterKind.alphabet);
}
@Override
public boolean anyMatchJapaneseNumber() {
return anyMatch(CharacterKind.japaneseAddressNumber);
}
@Override
public boolean anyMatchHiragana() {
return anyMatch(CharacterKind.hiragana);
}
@Override
public boolean anyMatchKatakana() {
return anyMatch(CharacterKind.katakana);
}
@Override
public boolean anyMatchDelimitorHyphen() {
return anyMatch(CharacterKind.delimitorHyphen);
}
@Override
public boolean anyMatchDelimitorSlash() {
return anyMatch(CharacterKind.delimitorSlash);
}
@Override
public boolean anyMatchDelimitorSpace() {
return anyMatch(CharacterKind.delimitorSpace);
}
@Override
public boolean anyMatchDelimitorComma() {
return anyMatch(CharacterKind.delimitorComma);
}
@Override
public boolean anyMatchDelimitorJapanese() {
return anyMatch(CharacterKind.delimitorJapanese);
}
@Override
public boolean anyMatchDelimitorJapaneseCyoumeAddress() {
return anyMatch(CharacterKind.suffix丁目);
}
@Override
public boolean anyMatchDelimitorJapaneseBanchiAddress() {
return anyMatch(CharacterKind.suffix地番);
}
@Override
public boolean anyMatchDelimitorJapaneseGouAddress() {
return anyMatch(CharacterKind.suffix号);
}
@Override
public boolean anyMatchNormal() {
return anyMatch(CharacterKind.normal);
}
@Override
public boolean isAllKind() {
return true;
}
public CharacterKinds add(CharacterKinds characterKinds2) {
List characterKinds = new ArrayList();
characterKinds.addAll(this.characterKinds);
characterKinds.addAll(characterKinds2.characterKinds);
return new CharacterKinds(characterKinds);
}
@Override
public String toString() {
String collect = characterKinds.stream()
.map(CharacterKind::name)
.collect(Collectors.joining(",","[","]"));
return collect;
}
public boolean isEmpty() {
return characterKinds.isEmpty();
}
// public static StringAndCharacterKinds stringAndCharacterKindsOf(String string) {
// return stringAndCharacterKindsOf(string , false);
// }
public static List codePointAndCharacterKinds(String string) {
ListIterator listIterator = string.codePoints().boxed().collect(Collectors.toList()).listIterator();
List results = new ArrayList();
while (listIterator.hasNext()) {
List codePointAndCharacterKinds = codePointAndCharacterKinds(listIterator);
results.addAll(codePointAndCharacterKinds);
}
return results;
}
private static List codePointAndCharacterKinds(ListIterator codePointIterator){
for(CharacterKind characterKind : CharacterKind.values()) {
List matched = characterKind.matched(codePointIterator);
if(matched.isEmpty()) {
continue;
}
return create(matched , characterKind);
}
throw new IllegalArgumentException();
}
static List create(List wordOfDictionary,
CharacterKind characterKind) {
List collect = wordOfDictionary.stream()
.map(codePoint->new CodePointAndCharacterKind(characterKind, codePoint))
.collect(Collectors.toList());
return collect;
}
@Override
public boolean isTerminator() {
return allMatch(CharacterKind.terminator);
}
/**
* @param string
* @param concatJapaneseSymbolToNormal
* @return StringAndCharacterKinds
*
* concatJapaneseSymbolToNormal true -> "肉ー" is [normal("肉ー")]
* concatJapaneseSymbolToNormal false -> "肉ー" is [normal("肉"),delimitorJapaneseSymbol("ー")]
*/
public static CharacterKinds of(
String string ,
boolean concatJapaneseSymbolToNormal) {
if(string == null || string.isEmpty()) {
return CharacterKinds.empty();
}
List collect = codePointAndCharacterKinds(string);
List> rans = new ArrayList>();
CharacterKind last = null;
var ran = new ArrayList();
for (CodePointAndCharacterKind codePointAndCharacterKind : collect) {
CharacterKind characterKind = codePointAndCharacterKind.characterKind;
if(concatJapaneseSymbolToNormal && last != null && last.isJapanese() && characterKind.isJapanese()) {
codePointAndCharacterKind = codePointAndCharacterKind.as(CharacterKind.normal);
ran.add(codePointAndCharacterKind);
}else if(last != null && (last.isHiragana() || last.isKatakana()) && characterKind == CharacterKind.cyouon) {
codePointAndCharacterKind = codePointAndCharacterKind.as(last);
ran.add(codePointAndCharacterKind);
}else if(last != characterKind) {
if(false == ran.isEmpty()) {
rans.add(new ArrayList<>(ran));
ran.clear();
}
ran.add(codePointAndCharacterKind);
last = characterKind;
}else {
ran.add(codePointAndCharacterKind);
}
}
if(false == ran.isEmpty()) {
rans.add(ran);
}
List results = new ArrayList<>();
for (List sameKindList : rans) {
CharacterKind characterKind = sameKindList.get(0).characterKind;
int[] codePoints = new int[sameKindList.size()];
int index =0;
for (CodePointAndCharacterKind codePointAndCharacterKind : sameKindList) {
codePoints[index++] = codePointAndCharacterKind.codePoint;
}
results.add(characterKind);
}
return new CharacterKinds(results);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy