com.yahoo.language.process.CharacterClasses Maven / Gradle / Ivy
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.language.process;
/**
* Determines the class of a given character. Use this rather than java.lang.Character.
*
* @author bratseth
*/
public class CharacterClasses {
/**
* Returns true for code points which are letters in unicode 3 or 4, plus some additional characters
* which are useful to view as letters even though not defined as such in unicode.
*/
public boolean isLetter(int c) {
if (Character.isLetter(c)) return true;
if (Character.isDigit(c) && ! isLatin(c)) return true; // Not considering these digits, so treat them as letters
// Some CJK punctuation defined as word characters
if (c == '\u3008' || c == '\u3009' || c == '\u300a' || c == '\u300b' ||
c == '\u300c' || c == '\u300d' || c == '\u300e' ||
c == '\u300f' || c == '\u3010' || c == '\u3011') {
return true;
}
int type = java.lang.Character.getType(c);
return type == java.lang.Character.NON_SPACING_MARK ||
type == java.lang.Character.COMBINING_SPACING_MARK ||
type == java.lang.Character.ENCLOSING_MARK;
}
/**
* Returns true if the character is in the class "other symbol" - emojis etc.
*/
public boolean isSymbol(int c) {
return Character.getType(c) == Character.OTHER_SYMBOL;
}
/**
* Returns true for code points which should be considered digits - same as java.lang.Character.isDigit
*/
public boolean isDigit(int c) {
return Character.isDigit(c);
}
/** Returns true if this is a latin digit (other digits are not consistently parsed into numbers by Java) */
public boolean isLatinDigit(int c) {
return Character.isDigit(c) && isLatin(c);
}
/** Returns true if this is a latin character */
public boolean isLatin(int c) {
return Character.UnicodeBlock.of(c).equals(Character.UnicodeBlock.BASIC_LATIN);
}
/**
* Convenience, returns isLetter(c) || isDigit(c)
*/
public boolean isLetterOrDigit(int c) {
return isLetter(c) || isDigit(c);
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy