io.xlate.edi.internal.stream.tokenization.Lexer Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of staedi Show documentation
Show all versions of staedi Show documentation
Streaming API for EDI for Java
/*******************************************************************************
* Copyright 2017 xlate.io LLC, http://www.xlate.io
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
******************************************************************************/
package io.xlate.edi.internal.stream.tokenization;
import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CoderResult;
import java.util.ArrayDeque;
import java.util.Deque;
import java.util.logging.Logger;
import io.xlate.edi.internal.stream.LocationView;
import io.xlate.edi.internal.stream.StaEDIStreamLocation;
import io.xlate.edi.stream.Location;
public class Lexer {
private static final Logger LOGGER = Logger.getLogger(Lexer.class.getName());
private enum Mode {
INTERCHANGE,
SEGMENT,
COMPOSITE
}
private final Deque modes = new ArrayDeque<>();
private int input = 0;
private State state = State.INITIAL;
private State previous;
private interface Notifier {
boolean execute(State state, int start, int length);
}
private final Deque events = new ArrayDeque<>(20);
private final Deque stateQueue = new ArrayDeque<>(20);
private final Deque startQueue = new ArrayDeque<>(20);
private final Deque lengthQueue = new ArrayDeque<>(20);
private final InputStream stream;
private CharsetDecoder decoder;
private char[] readChar = new char[1];
private CharBuffer readCharBuf = CharBuffer.wrap(readChar);
private ByteBuffer readByteBuf = ByteBuffer.allocate(4);
private final StaEDIStreamLocation location;
private final CharacterSet characters;
private CharBuffer buffer = CharBuffer.allocate(4096);
private Dialect dialect;
private long binaryRemain = -1;
private InputStream binaryStream = null;
private Notifier isn;
private Notifier ien;
private Notifier ssn;
private Notifier sen;
private Notifier csn;
private Notifier cen;
private Notifier en;
private Notifier bn;
public Lexer(InputStream stream, Charset charset, EventHandler handler, StaEDIStreamLocation location, boolean extraneousIgnored) {
if (stream.markSupported()) {
this.stream = stream;
} else {
this.stream = new BufferedInputStream(stream);
}
this.decoder = charset.newDecoder();
this.location = location;
this.characters = new CharacterSet(extraneousIgnored);
isn = (notifyState, start, length) -> {
handler.interchangeBegin(dialect);
return true;
};
ien = (notifyState, start, length) -> {
handler.interchangeEnd();
dialect = null;
characters.reset();
return true;
};
ssn = (notifyState, start, length) -> {
String segmentTag = new String(buffer.array(), start, length);
location.incrementSegmentPosition(segmentTag);
return handler.segmentBegin(segmentTag);
};
sen = (notifyState, start, length) -> {
boolean eventsReady = handler.segmentEnd();
location.clearSegmentLocations();
return eventsReady;
};
csn = (notifyState, start, length) -> {
if (location.isRepeated()) {
location.incrementElementOccurrence();
} else {
location.incrementElementPosition();
}
return handler.compositeBegin(false);
};
cen = (notifyState, start, length) -> {
boolean eventsReady = handler.compositeEnd(false);
location.clearComponentPosition();
return eventsReady;
};
en = (notifyState, start, length) -> {
updateLocation(notifyState, location);
return handler.elementData(buffer.array(), start, length);
};
bn = (notifyState, start, length) -> {
updateLocation(notifyState, location);
return handler.binaryData(binaryStream);
};
}
public Dialect getDialect() {
return dialect;
}
public void setBinaryLength(long binaryLength) {
this.binaryRemain = binaryLength;
this.binaryStream = new InputStream() {
@Override
public int read() throws IOException {
int binaryInput = -1;
if (binaryRemain-- < 1 || (binaryInput = stream.read()) < 0) {
state = State.ELEMENT_END_BINARY;
} else {
location.incrementOffset(binaryInput);
}
return binaryInput;
}
};
enqueue(bn, 0);
state = State.ELEMENT_DATA_BINARY;
}
public void parse() throws IOException, EDIException {
if (nextEvent()) {
return;
}
if (state == State.INVALID) {
// Unable to proceed once the state becomes invalid
throw invalidStateError();
}
boolean eventsReady = false;
while (!eventsReady && (input = readCharacter()) > -1) {
location.incrementOffset(input);
CharacterClass clazz = characters.getClass(input);
previous = state;
state = state.transition(clazz);
LOGGER.finer(() -> "State " + previous + "(" + clazz + ") -> " + state);
switch (state) {
case INITIAL:
case TAG_SEARCH:
case HEADER_TAG_SEARCH:
break;
case HEADER_TAG_I:
case HEADER_TAG_N:
case HEADER_TAG_S:
case HEADER_TAG_U:
case TAG_1:
case TAG_2:
case TAG_3:
case TRAILER_TAG_I:
case TRAILER_TAG_E:
case TRAILER_TAG_A:
case TRAILER_TAG_U:
case TRAILER_TAG_N:
case TRAILER_TAG_Z:
case ELEMENT_DATA:
case TRAILER_ELEMENT_DATA:
buffer.put((char) input);
break;
case ELEMENT_INVALID_DATA:
if (!characters.isIgnored(input)) {
buffer.put((char) input);
}
break;
case HEADER_TAG_1: // U - When UNA is present
case HEADER_TAG_2: // N - When UNA is present
case HEADER_TAG_3: // B - When UNA is present
handleStateHeaderTag(input);
break;
case DATA_RELEASE:
// Skip this character - next character will be literal value
break;
case ELEMENT_DATA_BINARY:
handleStateElementDataBinary();
break;
case INTERCHANGE_CANDIDATE:
// ISA, UNA, or UNB was found
handleStateInterchangeCandidate(input);
break;
case HEADER_DATA:
case HEADER_INVALID_DATA:
handleStateHeaderData(input);
eventsReady = dialectConfirmed(State.TAG_SEARCH);
break;
case HEADER_SEGMENT_BEGIN:
dialect.appendHeader(characters, (char) input);
openSegment();
eventsReady = dialectConfirmed(State.ELEMENT_END);
break;
case HEADER_ELEMENT_END:
dialect.appendHeader(characters, (char) input);
handleElement();
eventsReady = dialectConfirmed(State.ELEMENT_END);
break;
case HEADER_COMPONENT_END:
dialect.appendHeader(characters, (char) input);
handleComponent();
eventsReady = dialectConfirmed(State.COMPONENT_END);
break;
case SEGMENT_BEGIN:
case TRAILER_BEGIN:
openSegment();
eventsReady = nextEvent();
break;
case SEGMENT_END:
closeSegment();
eventsReady = nextEvent();
break;
case SEGMENT_EMPTY:
emptySegment();
eventsReady = nextEvent();
break;
case COMPONENT_END:
handleComponent();
eventsReady = nextEvent();
break;
case ELEMENT_END:
case TRAILER_ELEMENT_END:
case ELEMENT_REPEAT:
handleElement();
eventsReady = nextEvent();
break;
case INTERCHANGE_END:
closeInterchange();
eventsReady = nextEvent();
break;
default:
if (characters.isIgnored(input)) {
state = previous;
} else if (clazz != CharacterClass.INVALID) {
throw invalidStateError();
} else {
throw error(EDIException.INVALID_CHARACTER);
}
}
}
if (input < 0) {
throw error(EDIException.INCOMPLETE_STREAM);
}
}
int readCharacter() throws IOException {
int next = stream.read();
if (next < 0) {
return -1;
}
boolean endOfInput = false;
boolean complete = false;
int position = 0;
readCharBuf.clear();
readByteBuf.clear();
readByteBuf.put((byte) next);
do {
readByteBuf.flip();
CoderResult cr = decoder.decode(readByteBuf, readCharBuf, endOfInput);
if (!cr.isUnderflow()) {
cr.throwException();
}
if (endOfInput) {
complete = true;
} else if (readCharBuf.position() > 0) {
// Single character successfully written to the CharBuffer
complete = true;
} else {
next = stream.read();
if (next < 0) {
endOfInput = true;
decoder.reset();
} else {
readByteBuf.limit(readByteBuf.capacity());
readByteBuf.position(++position);
readByteBuf.put((byte) next);
}
}
} while (!complete);
if (endOfInput) {
decoder.reset();
}
if (readCharBuf.position() == 0 && endOfInput) {
// Nothing was written to the CharBuffer
return -1;
}
return readChar[0];
}
void handleStateHeaderTag(int input) {
buffer.put((char) input);
dialect.appendHeader(characters, (char) input);
}
void handleStateElementDataBinary() {
/*
* Not all of the binary data has been consumed. I.e. #next was
* called before completion.
*/
if (--binaryRemain < 1) {
state = State.ELEMENT_END_BINARY;
}
}
void handleStateInterchangeCandidate(int input) throws EDIException {
buffer.put((char) input);
final char[] header = buffer.array();
final int length = buffer.position();
dialect = DialectFactory.getDialect(header, 0, length);
for (int i = 0; i < length; i++) {
dialect.appendHeader(characters, header[i]);
}
openInterchange();
openSegment();
}
void handleStateHeaderData(int input) throws EDIException {
dialect.appendHeader(characters, (char) input);
switch (characters.getClass(input)) {
case SEGMENT_DELIMITER:
closeSegment();
state = State.HEADER_TAG_SEARCH;
break;
case ELEMENT_DELIMITER:
case ELEMENT_REPEATER:
case COMPONENT_DELIMITER:
case RELEASE_CHARACTER:
break;
default:
if (dialect.getDecimalMark() != input && !characters.isIgnored(input)) {
buffer.put((char) input);
}
break;
}
}
private boolean dialectConfirmed(State confirmed) throws EDIException {
if (dialect.isConfirmed()) {
state = confirmed;
nextEvent();
return true;
} else if (dialect.isRejected()) {
buffer.clear();
clearQueues();
dialect = null;
state = State.INITIAL;
throw error(EDIException.INVALID_STATE, "Invalid header segment");
}
return false;
}
private EDIException invalidStateError() {
StringBuilder message = new StringBuilder();
message.append(": ");
message.append(state);
message.append(" (previous: ");
message.append(previous);
message.append("); input: '");
message.append((char) input);
message.append('\'');
return error(EDIException.INVALID_STATE, message);
}
private EDIException error(int code, CharSequence message) {
Location where = new LocationView(location);
return new EDIException(code, message.toString(), where);
}
private EDIException error(int code) {
Location where = new LocationView(location);
return new EDIException(code, where);
}
private static void updateLocation(State state, StaEDIStreamLocation location) {
if (state == State.ELEMENT_REPEAT) {
if (location.isRepeated()) {
updateElementOccurrence(location);
} else {
location.setElementOccurrence(1);
}
location.setRepeated(true);
} else if (location.isRepeated()) {
if (state != State.COMPONENT_END) {
updateElementOccurrence(location);
location.setRepeated(false);
}
} else {
location.setElementOccurrence(1);
}
switch (state) {
case COMPONENT_END:
case HEADER_COMPONENT_END:
location.incrementComponentPosition();
break;
default:
if (location.getComponentPosition() > 0) {
location.incrementComponentPosition();
} else if (location.getElementOccurrence() == 1) {
location.incrementElementPosition();
}
break;
}
}
static void updateElementOccurrence(StaEDIStreamLocation location) {
/*
* Only increment the position if we have not yet started
* the composite - i.e, only a single component is present.
*/
if (location.getComponentPosition() < 1) {
location.incrementElementOccurrence();
}
}
private boolean nextEvent() {
Notifier event = events.peek();
boolean eventsReady = false;
if (event != null) {
events.remove();
State nextState = stateQueue.remove();
int start = startQueue.remove();
int length = lengthQueue.remove();
eventsReady = event.execute(nextState, start, length);
}
if (events.isEmpty()) {
buffer.clear();
}
return eventsReady;
}
private void enqueue(Notifier task, int position) {
int start;
int length;
if (startQueue.isEmpty()) {
start = 0;
length = position;
} else {
start = startQueue.peekLast() + lengthQueue.peekLast();
length = position > 0 ? position - start : 0;
}
events.add(task);
stateQueue.add(this.state);
startQueue.add(start);
lengthQueue.add(length);
}
private void clearQueues() {
events.clear();
stateQueue.clear();
startQueue.clear();
lengthQueue.clear();
}
private void openInterchange() {
modes.push(Mode.INTERCHANGE);
enqueue(isn, 0);
}
private void closeInterchange() throws EDIException {
closeSegment();
popMode(Mode.INTERCHANGE);
enqueue(ien, 0);
}
private void openSegment() {
modes.push(Mode.SEGMENT);
enqueue(ssn, buffer.position());
}
private void closeSegment() throws EDIException {
handleElement();
popMode(Mode.SEGMENT);
enqueue(sen, 0);
}
private void emptySegment() throws EDIException {
openSegment();
popMode(Mode.SEGMENT);
enqueue(sen, 0);
}
private void handleElement() throws EDIException {
if (previous != State.ELEMENT_END_BINARY) {
addElementEvent();
}
if (inComposite()) {
closeComposite();
}
}
private void openComposite() {
modes.push(Mode.COMPOSITE);
enqueue(csn, 0);
}
private void handleComponent() {
if (!inComposite()) {
openComposite();
}
addElementEvent();
}
private void addElementEvent() {
enqueue(en, buffer.position());
}
private boolean inComposite() {
return modes.peek() == Mode.COMPOSITE;
}
private void closeComposite() throws EDIException {
popMode(Mode.COMPOSITE);
enqueue(cen, 0);
}
void popMode(Mode expected) throws EDIException {
if (modes.pop() != expected) {
throw error(EDIException.INVALID_STATE);
}
}
}