
io.xlate.edi.internal.stream.tokenization.Lexer Maven / Gradle / Ivy
/*******************************************************************************
* Copyright 2017 xlate.io LLC, http://www.xlate.io
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
******************************************************************************/
package io.xlate.edi.internal.stream.tokenization;
import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CoderResult;
import java.util.ArrayDeque;
import java.util.Deque;
import java.util.logging.Logger;
import io.xlate.edi.internal.stream.LocationView;
import io.xlate.edi.internal.stream.StaEDIStreamLocation;
import io.xlate.edi.stream.Location;
public class Lexer {
private static final Logger LOGGER = Logger.getLogger(Lexer.class.getName());
private enum Mode {
INTERCHANGE,
SEGMENT,
COMPOSITE
}
private final Deque modes = new ArrayDeque<>();
private int input = 0;
private State state = State.INITIAL;
private State previous;
private interface Notifier {
boolean execute(State state, int start, int length);
}
private final Deque events = new ArrayDeque<>(20);
private final Deque stateQueue = new ArrayDeque<>(20);
private final Deque startQueue = new ArrayDeque<>(20);
private final Deque lengthQueue = new ArrayDeque<>(20);
private final InputStream stream;
private CharsetDecoder decoder;
private char[] readChar = new char[1];
private CharBuffer readCharBuf = CharBuffer.wrap(readChar);
private ByteBuffer readByteBuf = ByteBuffer.allocate(4);
private final StaEDIStreamLocation location;
private final CharacterSet characters;
private CharBuffer buffer = CharBuffer.allocate(4096);
private Dialect dialect;
private long binaryRemain = -1;
private InputStream binaryStream = null;
private Notifier isn;
private Notifier ien;
private Notifier ssn;
private Notifier sen;
private Notifier csn;
private Notifier cen;
private Notifier en;
private Notifier bn;
public Lexer(InputStream stream, Charset charset, EventHandler handler, StaEDIStreamLocation location, boolean extraneousIgnored) {
if (stream.markSupported()) {
this.stream = stream;
} else {
this.stream = new BufferedInputStream(stream);
}
this.decoder = charset.newDecoder();
this.location = location;
this.characters = new CharacterSet(extraneousIgnored);
isn = (notifyState, start, length) -> {
handler.interchangeBegin(dialect);
return true;
};
ien = (notifyState, start, length) -> {
handler.interchangeEnd();
dialect = null;
characters.reset();
return true;
};
ssn = (notifyState, start, length) -> {
String segmentTag = new String(buffer.array(), start, length);
location.incrementSegmentPosition(segmentTag);
return handler.segmentBegin(segmentTag);
};
sen = (notifyState, start, length) -> {
boolean eventsReady = handler.segmentEnd();
location.clearSegmentLocations();
return eventsReady;
};
csn = (notifyState, start, length) -> {
if (location.isRepeated()) {
location.incrementElementOccurrence();
} else {
location.incrementElementPosition();
}
return handler.compositeBegin(false);
};
cen = (notifyState, start, length) -> {
boolean eventsReady = handler.compositeEnd(false);
location.clearComponentPosition();
return eventsReady;
};
en = (notifyState, start, length) -> {
updateLocation(notifyState, location);
return handler.elementData(buffer.array(), start, length);
};
bn = (notifyState, start, length) -> {
updateLocation(notifyState, location);
return handler.binaryData(binaryStream);
};
}
public Dialect getDialect() {
return dialect;
}
public void setBinaryLength(long binaryLength) {
this.binaryRemain = binaryLength;
this.binaryStream = new InputStream() {
@Override
public int read() throws IOException {
int binaryInput = -1;
if (binaryRemain-- < 1 || (binaryInput = stream.read()) < 0) {
state = State.ELEMENT_END_BINARY;
} else {
location.incrementOffset(binaryInput);
}
return binaryInput;
}
};
enqueue(bn, 0);
state = State.ELEMENT_DATA_BINARY;
}
public void parse() throws IOException, EDIException {
if (nextEvent()) {
return;
}
if (state == State.INVALID) {
// Unable to proceed once the state becomes invalid
throw invalidStateError();
}
boolean eventsReady = false;
while (!eventsReady && (input = readCharacter()) > -1) {
location.incrementOffset(input);
CharacterClass clazz = characters.getClass(input);
previous = state;
state = state.transition(clazz);
LOGGER.finer(() -> "State " + previous + "(" + clazz + ") -> " + state);
switch (state) {
case INITIAL:
case TAG_SEARCH:
case HEADER_TAG_SEARCH:
break;
case HEADER_TAG_I:
case HEADER_TAG_N:
case HEADER_TAG_S:
case HEADER_TAG_U:
case TAG_1:
case TAG_2:
case TAG_3:
case TRAILER_TAG_I:
case TRAILER_TAG_E:
case TRAILER_TAG_A:
case TRAILER_TAG_U:
case TRAILER_TAG_N:
case TRAILER_TAG_Z:
case ELEMENT_DATA:
case TRAILER_ELEMENT_DATA:
buffer.put((char) input);
break;
case ELEMENT_INVALID_DATA:
if (!characters.isIgnored(input)) {
buffer.put((char) input);
}
break;
case HEADER_TAG_1: // U - When UNA is present
case HEADER_TAG_2: // N - When UNA is present
case HEADER_TAG_3: // B - When UNA is present
handleStateHeaderTag(input);
break;
case DATA_RELEASE:
// Skip this character - next character will be literal value
break;
case ELEMENT_DATA_BINARY:
handleStateElementDataBinary();
break;
case INTERCHANGE_CANDIDATE:
// ISA, UNA, or UNB was found
handleStateInterchangeCandidate(input);
break;
case HEADER_DATA:
case HEADER_INVALID_DATA:
handleStateHeaderData(input);
eventsReady = dialectConfirmed(State.TAG_SEARCH);
break;
case HEADER_SEGMENT_BEGIN:
dialect.appendHeader(characters, (char) input);
openSegment();
eventsReady = dialectConfirmed(State.ELEMENT_END);
break;
case HEADER_ELEMENT_END:
dialect.appendHeader(characters, (char) input);
handleElement();
eventsReady = dialectConfirmed(State.ELEMENT_END);
break;
case HEADER_COMPONENT_END:
dialect.appendHeader(characters, (char) input);
handleComponent();
eventsReady = dialectConfirmed(State.COMPONENT_END);
break;
case SEGMENT_BEGIN:
case TRAILER_BEGIN:
openSegment();
eventsReady = nextEvent();
break;
case SEGMENT_END:
closeSegment();
eventsReady = nextEvent();
break;
case SEGMENT_EMPTY:
emptySegment();
eventsReady = nextEvent();
break;
case COMPONENT_END:
handleComponent();
eventsReady = nextEvent();
break;
case ELEMENT_END:
case TRAILER_ELEMENT_END:
case ELEMENT_REPEAT:
handleElement();
eventsReady = nextEvent();
break;
case INTERCHANGE_END:
closeInterchange();
eventsReady = nextEvent();
break;
default:
if (characters.isIgnored(input)) {
state = previous;
} else if (clazz != CharacterClass.INVALID) {
throw invalidStateError();
} else {
throw error(EDIException.INVALID_CHARACTER);
}
}
}
if (input < 0) {
throw error(EDIException.INCOMPLETE_STREAM);
}
}
int readCharacter() throws IOException {
int next = stream.read();
if (next < 0) {
return -1;
}
boolean endOfInput = false;
boolean complete = false;
int position = 0;
readCharBuf.clear();
readByteBuf.clear();
readByteBuf.put((byte) next);
do {
readByteBuf.flip();
CoderResult cr = decoder.decode(readByteBuf, readCharBuf, endOfInput);
if (!cr.isUnderflow()) {
cr.throwException();
}
if (endOfInput) {
complete = true;
} else if (readCharBuf.position() > 0) {
// Single character successfully written to the CharBuffer
complete = true;
} else {
next = stream.read();
if (next < 0) {
endOfInput = true;
decoder.reset();
} else {
readByteBuf.limit(readByteBuf.capacity());
readByteBuf.position(++position);
readByteBuf.put((byte) next);
}
}
} while (!complete);
if (endOfInput) {
decoder.reset();
}
if (readCharBuf.position() == 0 && endOfInput) {
// Nothing was written to the CharBuffer
return -1;
}
return readChar[0];
}
void handleStateHeaderTag(int input) {
buffer.put((char) input);
dialect.appendHeader(characters, (char) input);
}
void handleStateElementDataBinary() {
/*
* Not all of the binary data has been consumed. I.e. #next was
* called before completion.
*/
if (--binaryRemain < 1) {
state = State.ELEMENT_END_BINARY;
}
}
void handleStateInterchangeCandidate(int input) throws EDIException {
buffer.put((char) input);
final char[] header = buffer.array();
final int length = buffer.position();
dialect = DialectFactory.getDialect(header, 0, length);
for (int i = 0; i < length; i++) {
dialect.appendHeader(characters, header[i]);
}
openInterchange();
openSegment();
}
void handleStateHeaderData(int input) throws EDIException {
dialect.appendHeader(characters, (char) input);
switch (characters.getClass(input)) {
case SEGMENT_DELIMITER:
closeSegment();
state = State.HEADER_TAG_SEARCH;
break;
case ELEMENT_DELIMITER:
case ELEMENT_REPEATER:
case COMPONENT_DELIMITER:
case RELEASE_CHARACTER:
break;
default:
if (dialect.getDecimalMark() != input && !characters.isIgnored(input)) {
buffer.put((char) input);
}
break;
}
}
private boolean dialectConfirmed(State confirmed) throws EDIException {
if (dialect.isConfirmed()) {
state = confirmed;
nextEvent();
return true;
} else if (dialect.isRejected()) {
buffer.clear();
clearQueues();
dialect = null;
state = State.INITIAL;
throw error(EDIException.INVALID_STATE, "Invalid header segment");
}
return false;
}
private EDIException invalidStateError() {
StringBuilder message = new StringBuilder();
message.append(": ");
message.append(state);
message.append(" (previous: ");
message.append(previous);
message.append("); input: '");
message.append((char) input);
message.append('\'');
return error(EDIException.INVALID_STATE, message);
}
private EDIException error(int code, CharSequence message) {
Location where = new LocationView(location);
return new EDIException(code, message.toString(), where);
}
private EDIException error(int code) {
Location where = new LocationView(location);
return new EDIException(code, where);
}
private static void updateLocation(State state, StaEDIStreamLocation location) {
if (state == State.ELEMENT_REPEAT) {
if (location.isRepeated()) {
updateElementOccurrence(location);
} else {
location.setElementOccurrence(1);
}
location.setRepeated(true);
} else if (location.isRepeated()) {
if (state != State.COMPONENT_END) {
updateElementOccurrence(location);
location.setRepeated(false);
}
} else {
location.setElementOccurrence(1);
}
switch (state) {
case COMPONENT_END:
case HEADER_COMPONENT_END:
location.incrementComponentPosition();
break;
default:
if (location.getComponentPosition() > 0) {
location.incrementComponentPosition();
} else if (location.getElementOccurrence() == 1) {
location.incrementElementPosition();
}
break;
}
}
static void updateElementOccurrence(StaEDIStreamLocation location) {
/*
* Only increment the position if we have not yet started
* the composite - i.e, only a single component is present.
*/
if (location.getComponentPosition() < 1) {
location.incrementElementOccurrence();
}
}
private boolean nextEvent() {
Notifier event = events.peek();
boolean eventsReady = false;
if (event != null) {
events.remove();
State nextState = stateQueue.remove();
int start = startQueue.remove();
int length = lengthQueue.remove();
eventsReady = event.execute(nextState, start, length);
}
if (events.isEmpty()) {
buffer.clear();
}
return eventsReady;
}
private void enqueue(Notifier task, int position) {
int start;
int length;
if (startQueue.isEmpty()) {
start = 0;
length = position;
} else {
start = startQueue.peekLast() + lengthQueue.peekLast();
length = position > 0 ? position - start : 0;
}
events.add(task);
stateQueue.add(this.state);
startQueue.add(start);
lengthQueue.add(length);
}
private void clearQueues() {
events.clear();
stateQueue.clear();
startQueue.clear();
lengthQueue.clear();
}
private void openInterchange() {
modes.push(Mode.INTERCHANGE);
enqueue(isn, 0);
}
private void closeInterchange() throws EDIException {
closeSegment();
popMode(Mode.INTERCHANGE);
enqueue(ien, 0);
}
private void openSegment() {
modes.push(Mode.SEGMENT);
enqueue(ssn, buffer.position());
}
private void closeSegment() throws EDIException {
handleElement();
popMode(Mode.SEGMENT);
enqueue(sen, 0);
}
private void emptySegment() throws EDIException {
openSegment();
popMode(Mode.SEGMENT);
enqueue(sen, 0);
}
private void handleElement() throws EDIException {
if (previous != State.ELEMENT_END_BINARY) {
addElementEvent();
}
if (inComposite()) {
closeComposite();
}
}
private void openComposite() {
modes.push(Mode.COMPOSITE);
enqueue(csn, 0);
}
private void handleComponent() {
if (!inComposite()) {
openComposite();
}
addElementEvent();
}
private void addElementEvent() {
enqueue(en, buffer.position());
}
private boolean inComposite() {
return modes.peek() == Mode.COMPOSITE;
}
private void closeComposite() throws EDIException {
popMode(Mode.COMPOSITE);
enqueue(cen, 0);
}
void popMode(Mode expected) throws EDIException {
if (modes.pop() != expected) {
throw error(EDIException.INVALID_STATE);
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy