![JAR search and dependency download from the Maven repository](/logo.png)
io.xlate.edi.internal.stream.tokenization.CharacterSet Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of staedi Show documentation
Show all versions of staedi Show documentation
Streaming API for EDI for Java
/*******************************************************************************
* Copyright 2017 xlate.io LLC, http://www.xlate.io
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
******************************************************************************/
package io.xlate.edi.internal.stream.tokenization;
import java.util.Arrays;
import java.util.Map;
import java.util.TreeMap;
public class CharacterSet {
private static final CharacterClass _SPACE = CharacterClass.SPACE;
private static final CharacterClass _LATNA = CharacterClass.LATIN_A;
private static final CharacterClass _LATNB = CharacterClass.LATIN_B;
private static final CharacterClass _LATNE = CharacterClass.LATIN_E;
private static final CharacterClass _LATNI = CharacterClass.LATIN_I;
private static final CharacterClass _LATNN = CharacterClass.LATIN_N;
private static final CharacterClass _LATNS = CharacterClass.LATIN_S;
private static final CharacterClass _LATNU = CharacterClass.LATIN_U;
private static final CharacterClass _LATNZ = CharacterClass.LATIN_Z;
private static final CharacterClass _ALNUM = CharacterClass.ALPHANUMERIC;
private static final CharacterClass _OTHER = CharacterClass.OTHER;
private static final CharacterClass _WHITE = CharacterClass.WHITESPACE;
private static final CharacterClass _CNTRL = CharacterClass.CONTROL;
private static final CharacterClass _INVLD = CharacterClass.INVALID;
/*
* This array maps the 128 ASCII characters into character classes. The
* remaining Unicode characters should be mapped to _OTHER. Control
* characters are errors.
*/
private static final CharacterClass[] prototype = {
_INVLD, /* 00 NUL */
_CNTRL, /* 01 SOH */
_CNTRL, /* 02 STX */
_CNTRL, /* 03 ETX */
_CNTRL, /* 04 EOT */
_CNTRL, /* 05 ENQ */
_CNTRL, /* 06 ACK */
_CNTRL, /* 07 BEL */
_INVLD, /* 08 BS */
_WHITE, /* 09 HT */
_WHITE, /* 0A LF */
_WHITE, /* 0B VT */
_WHITE, /* 0C FF */
_WHITE, /* 0D CR */
_INVLD, /* 0E SO */
_INVLD, /* 0F SI */
_INVLD, /* 10 DLE */
_CNTRL, /* 11 DC1 */
_CNTRL, /* 12 DC2 */
_CNTRL, /* 13 DC3 */
_CNTRL, /* 14 DC4 */
_CNTRL, /* 15 NAK */
_CNTRL, /* 16 SYN */
_CNTRL, /* 17 ETB */
_INVLD, /* 18 CAN */
_INVLD, /* 19 EM */
_INVLD, /* 1A SUB */
_INVLD, /* 1B ESC */
_CNTRL, /* 1C FS */
_CNTRL, /* 1D GS */
_CNTRL, /* 1E RS */
_CNTRL, /* 1F US */
_SPACE, /* 20 Space */
_OTHER, /* 21 ! */
_OTHER, /* 22 " */
_OTHER, /* 23 # */
_OTHER, /* 24 $ */
_OTHER, /* 25 % */
_OTHER, /* 26 & */
_OTHER, /* 27 ' */
_OTHER, /* 28 ( */
_OTHER, /* 29 ) */
_OTHER, /* 2A * */
_OTHER, /* 2B + */
_OTHER, /* 2C , */
_OTHER, /* 2D - */
_OTHER, /* 2E . */
_OTHER, /* 2F / */
_ALNUM, /* 30 0 */
_ALNUM, /* 31 1 */
_ALNUM, /* 32 2 */
_ALNUM, /* 33 3 */
_ALNUM, /* 34 4 */
_ALNUM, /* 35 5 */
_ALNUM, /* 36 6 */
_ALNUM, /* 37 7 */
_ALNUM, /* 38 8 */
_ALNUM, /* 39 9 */
_OTHER, /* 3A : */
_OTHER, /* 3B ';' */
_OTHER, /* 3C < */
_OTHER, /* 3D = */
_OTHER, /* 3E > */
_OTHER, /* 3F ? */
_OTHER, /* 40 @ */
_LATNA, /* 41 A */
_LATNB, /* 42 B */
_ALNUM, /* 43 C */
_ALNUM, /* 44 D */
_LATNE, /* 45 E */
_ALNUM, /* 46 F */
_ALNUM, /* 47 G */
_ALNUM, /* 48 H */
_LATNI, /* 49 I */
_ALNUM, /* 4A J */
_ALNUM, /* 4B K */
_ALNUM, /* 4C L */
_ALNUM, /* 4D M */
_LATNN, /* 4E N */
_ALNUM, /* 4F O */
_ALNUM, /* 50 P */
_ALNUM, /* 51 Q */
_ALNUM, /* 52 R */
_LATNS, /* 53 S */
_ALNUM, /* 54 T */
_LATNU, /* 55 U */
_ALNUM, /* 56 V */
_ALNUM, /* 57 W */
_ALNUM, /* 58 X */
_ALNUM, /* 59 Y */
_LATNZ, /* 5A Z */
_OTHER, /* 5B [ */
_OTHER, /* 5C \ */
_OTHER, /* 5D ] */
_OTHER, /* 5E ^ */
_OTHER, /* 5F _ */
_OTHER, /* 60 ` */
_ALNUM, /* 61 a */
_ALNUM, /* 62 b */
_ALNUM, /* 63 c */
_ALNUM, /* 64 d */
_ALNUM, /* 65 e */
_ALNUM, /* 66 f */
_ALNUM, /* 67 g */
_ALNUM, /* 68 h */
_ALNUM, /* 69 i */
_ALNUM, /* 6A j */
_ALNUM, /* 6B k */
_ALNUM, /* 6C l */
_ALNUM, /* 6D m */
_ALNUM, /* 6E n */
_ALNUM, /* 6F o */
_ALNUM, /* 70 p */
_ALNUM, /* 71 q */
_ALNUM, /* 72 r */
_ALNUM, /* 73 s */
_ALNUM, /* 74 t */
_ALNUM, /* 75 u */
_ALNUM, /* 76 v */
_ALNUM, /* 77 w */
_ALNUM, /* 78 x */
_ALNUM, /* 79 y */
_ALNUM, /* 7A z */
_OTHER, /* 7B '{' */
_OTHER, /* 7C | */
_OTHER, /* 7D '}' */
_OTHER, /* 7E ~ */
_INVLD /* 7F DEL */
};
private final CharacterClass[] list;
private final Map auxilary;
private final boolean extraneousIgnored;
public CharacterSet() {
this(false);
}
public CharacterSet(boolean extraneousIgnored) {
this.list = Arrays.copyOf(prototype, prototype.length);
this.auxilary = new TreeMap<>();
this.extraneousIgnored = extraneousIgnored;
}
public CharacterClass getClass(int character) {
return (character < list.length) ? list[character] : auxilary.getOrDefault(character, _OTHER);
}
public void reset() {
System.arraycopy(prototype, 0, list, 0, prototype.length);
auxilary.clear();
}
public void setClass(int character, CharacterClass clazz) {
if (character < list.length) {
list[character] = clazz;
} else {
auxilary.put(character, clazz);
}
}
public boolean isDelimiter(int character) {
switch (getClass(character)) {
case ELEMENT_DELIMITER:
case ELEMENT_REPEATER:
case SEGMENT_DELIMITER:
case COMPONENT_DELIMITER:
return true;
default:
return false;
}
}
public boolean isIgnored(int character) {
switch (getClass(character)) {
case CONTROL:
case INVALID:
case WHITESPACE:
return extraneousIgnored;
default:
return false;
}
}
public boolean isCharacterClass(int character, CharacterClass clazz) {
return getClass(character).equals(clazz);
}
public static boolean isValid(int character) {
if (character >= prototype.length) {
return true;
}
switch (prototype[character]) {
case CONTROL:
case INVALID:
return false;
default:
return true;
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy