All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.smooks.edi.edisax.BufferedSegmentReader Maven / Gradle / Ivy

The newest version!
/*-
 * ========================LICENSE_START=================================
 * smooks-edi-sax
 * %%
 * Copyright (C) 2020 Smooks
 * %%
 * Licensed under the terms of the Apache License Version 2.0, or
 * the GNU Lesser General Public License version 3.0 or later.
 *
 * SPDX-License-Identifier: Apache-2.0 OR LGPL-3.0-or-later
 *
 * ======================================================================
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * ======================================================================
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 3 of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 * =========================LICENSE_END==================================
 */
package org.smooks.edi.edisax;

import org.smooks.edi.edisax.model.internal.Delimiters;
import org.smooks.edi.edisax.util.EDIUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.InputSource;

import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.nio.charset.Charset;
import java.util.Stack;

/**
 * Buffered EDI Stream Segment reader.
 *
 * @author tfennelly
 */
public class BufferedSegmentReader {

    private static final int MAX_MARK_READ = 512;

    private static final Logger LOGGER = LoggerFactory.getLogger(BufferedSegmentReader.class);

    public static String IGNORE_CR_LF = "!$";

    private InputStream underlyingByteStream;
    private boolean marked = false;
    private Charset readEncoding;
    private Reader reader;
    private StringBuffer segmentBuffer = new StringBuffer(512);
    private String[] currentSegmentFields = null;
    private int currentSegmentNumber = 0;
    private Stack delimitersStack = new Stack();
    private Delimiters currentDelimiters;
    private BufferedSegmentListener segmentListener;
    private boolean ignoreNewLines;
    private int charReadCount = 0;


    /**
     * Construct the stream reader.
     *
     * @param ediInputSource EDI Stream input source.
     * @param rootDelimiters Root currentDelimiters.  New currentDelimiters can be pushed and popped.
     */
    public BufferedSegmentReader(InputSource ediInputSource, Delimiters rootDelimiters) {
        underlyingByteStream = ediInputSource.getByteStream();
        reader = ediInputSource.getCharacterStream();
        if (reader == null) {
            readEncoding = Charset.defaultCharset();
            reader = new InputStreamReader(underlyingByteStream, readEncoding);
        } else if (reader instanceof InputStreamReader) {
            readEncoding = Charset.forName(((InputStreamReader) reader).getEncoding());
        }
        this.currentDelimiters = rootDelimiters;
    }

    /**
     * Try mark the stream so we can support changing of the reader encoding.
     *
     * @see #changeEncoding(Charset)
     */
    public void mark() {
        if (underlyingByteStream != null) {
            if (underlyingByteStream.markSupported()) {
                // We don't support reader changing after we've read MAX_MARK_READ bytes...
                underlyingByteStream.mark(MAX_MARK_READ);
                marked = true;
            } else {
                LOGGER.debug("Unable to mark EDI Reader for rest (to change reader encoding).  Underlying InputStream type '" + underlyingByteStream.getClass().getName() + "' does not support mark.");
            }
        } else {
            LOGGER.debug("Unable to mark EDI Reader for rest (to change reader encoding).  BufferedSegmentReader instance does not have access to the underlying InputStream.");
        }
    }

    /**
     * Change the encoding used to read the underlying EDI data stream.
     * 

* {@link #mark()} should have been called first. * * @param encoding The new encoding. * @return The old/replaced encoding if known, otherwise null. * @throws IOException Failed to skip already read characters. */ public Charset changeEncoding(Charset encoding) throws IOException { if (underlyingByteStream == null) { throw new IllegalStateException("Unable to change stream read encoding to '" + encoding + "'. BufferedSegmentReader does not have access to the underlying stream."); } if (readEncoding != null && encoding.equals(readEncoding)) { return readEncoding; } if (!underlyingByteStream.markSupported()) { LOGGER.debug("Unable to to change stream read encoding on a stream that does not support 'mark'."); return readEncoding; } if (!marked) { LOGGER.debug("Unable to to change stream read encoding on a stream. 'mark' was not called, or was called and failed."); return readEncoding; } // reset the stream... try { underlyingByteStream.reset(); marked = false; } catch (IOException e) { LOGGER.debug("Unable to to change stream read encoding on stream because reset failed. Probably because the mark has been invalidated after reading more than " + MAX_MARK_READ + " bytes from the stream.", e); return readEncoding; } // Create a new reader and skip passed the already read characters... reader = new InputStreamReader(underlyingByteStream, encoding); underlyingByteStream.skip(charReadCount); try { return readEncoding; } finally { readEncoding = encoding; } } /** * Get the current delimiter set. * * @return the currentDelimiters The current delimiter set. */ public Delimiters getDelimiters() { return currentDelimiters; } /** * Push in a new {@link Delimiters} set into the reader. * * @param delimiters New delimiters. */ public void pushDelimiters(Delimiters delimiters) { delimitersStack.push(currentDelimiters); currentDelimiters = delimiters; } /** * Restore the parent delimiters set. *

* Be sure to {@link #getDelimitersStack() get the delimiters stack} and check * that it is not empty before popping. */ public void popDelimiters() { currentDelimiters = delimitersStack.pop(); } /** * Get the * * @return the delimitersStack */ public Stack getDelimitersStack() { return delimitersStack; } /** * Set ignore new lines in the EDI Stream. *

* Some EDI messages are formatted with new lines for readability and so the * new line characters should be ignored. * * @param ignoreNewLines True if new line characters should be ignored, otherwise false. */ public void setIgnoreNewLines(boolean ignoreNewLines) { this.ignoreNewLines = ignoreNewLines; } /** * Read a fixed number of characters from the input source. * * @param numChars The number of characters to read. * @return The characters in a String. If the end of the input source * was reached, the length of the string will be less than the requested number * of characters. * @throws IOException Error reading from input source. */ public String read(int numChars) throws IOException { segmentBuffer.setLength(0); try { return peek(numChars); } finally { segmentBuffer.setLength(0); } } /** * Peek a fixed number of characters from the input source. *

* Peek differs from {@link #read(int)} in that it leaves the * characters in the segment buffer. * * @param numChars The number of characters to peeked. * @return The characters in a String. If the end of the input source * was reached, the length of the string will be less than the requested number * of characters. * @throws IOException Error reading from input source. */ public String peek(int numChars) throws IOException { return peek(numChars, false); } /** * Peek a fixed number of characters from the input source. *

* Peek differs from {@link #read(int)} in that it leaves the * characters in the segment buffer. * * @param numChars The number of characters to peeked. * @param ignoreLeadingWhitespace Ignore leading whitespace. * @return The characters in a String. If the end of the input source * was reached, the length of the string will be less than the requested number * of characters. * @throws IOException Error reading from input source. */ public String peek(int numChars, boolean ignoreLeadingWhitespace) throws IOException { boolean ignoreCRLF; // Ignoring of new lines can be set as part of the segment delimiter, or // as a feature on the parser (the later is the preferred method)... ignoreCRLF = (currentDelimiters.ignoreCRLF() || ignoreNewLines); if (segmentBuffer.length() < numChars) { int c; if (ignoreLeadingWhitespace) { c = forwardPastWhitespace(); } else { c = readChar(); } while (c != -1) { if (ignoreCRLF && (c == '\n' || c == '\r')) { c = readChar(); continue; } segmentBuffer.append((char) c); if (segmentBuffer.length() == numChars) { break; } c = readChar(); } } int endIndex = Math.min(numChars, segmentBuffer.length()); return segmentBuffer.substring(0, endIndex); } /** * Set the segment listener. * * @param segmentListener The segment listener. */ public void setSegmentListener(BufferedSegmentListener segmentListener) { this.segmentListener = segmentListener; } /** * Move to the next EDI segment. *

* Simply reads and buffers the next EDI segment. Clears the current contents of * the buffer before reading. * * @return True if a "next" segment exists, otherwise false. * @throws IOException Error reading from EDI stream. */ public boolean moveToNextSegment() throws IOException { return moveToNextSegment(true); } /** * Move to the next EDI segment. *

* Simply reads and buffers the next EDI segment. * * @param clearBuffer Clear the segment buffer before reading. * @return True if a "next" segment exists, otherwise false. * @throws IOException Error reading from EDI stream. */ public boolean moveToNextSegment(boolean clearBuffer) throws IOException { char[] segmentDelimiter = currentDelimiters.getSegmentDelimiter(); int delimiterLen = segmentDelimiter.length; String escape = currentDelimiters.getEscape(); int escapeLen = escape != null ? escape.length() : 0; boolean ignoreCRLF; int c = readChar(); // Ignoring of new lines can be set as part of the segment delimiter, or // as a feature on the parser (the later is the preferred method)... ignoreCRLF = (currentDelimiters.ignoreCRLF() || ignoreNewLines); if (clearBuffer) { segmentBuffer.setLength(0); } currentSegmentFields = null; // We reached the end of the stream the last time this method was // called - see the while loop below... if (c == -1) { return false; } // Ignore leading whitespace on a segment... c = forwardPastWhitespace(c); boolean escapingMode = false; // Read the next segment... while (c != -1) { char theChar = (char) c; if (ignoreCRLF && (theChar == '\n' || theChar == '\r')) { c = readChar(); continue; } segmentBuffer.append((char) c); int segLen = segmentBuffer.length(); if (segLen >= delimiterLen) { boolean reachedSegEnd = true; for (int i = 0; i < delimiterLen; i++) { char segChar = segmentBuffer.charAt(segLen - 1 - i); char delimChar = segmentDelimiter[delimiterLen - 1 - i]; if (escapingMode) { if (segChar == delimChar) { segmentBuffer = segmentBuffer.delete(segLen - 2, segLen - 1); } escapingMode = false; reachedSegEnd = false; break; } else if (escape != null && escape.equals(Character.toString(segChar))) { escapingMode = true; } if (segChar != delimChar) { // Not the end of a segment reachedSegEnd = false; break; } } // We've reached the end of a segment... if (reachedSegEnd) { // Trim off the delimiter and break out... segmentBuffer.setLength(segLen - delimiterLen); break; } } c = readChar(); } if (LOGGER.isDebugEnabled()) { LOGGER.debug(segmentBuffer.toString()); } currentSegmentNumber++; if (segmentListener != null) { return segmentListener.onSegment(this); } else { return true; } } /** * Does the read have a segment buffered and ready for processing. * * @return True if a current segment exists, otherwise false. */ public boolean hasCurrentSegment() { if (segmentListener != null) { return segmentListener.onSegment(this); } else { return segmentBuffer.length() != 0; } } /** * Get the segment buffer. * * @return The segment buffer. */ public StringBuffer getSegmentBuffer() { return segmentBuffer; } /** * Get the current EDI segment fields. * * @return The current EDI segment fields array. * @throws IllegalStateException No current Segment. */ public String[] getCurrentSegmentFields() throws IllegalStateException { assertCurrentSegmentExists(); if (currentSegmentFields == null) { currentSegmentFields = EDIUtils.split(segmentBuffer.toString(), currentDelimiters.getField(), currentDelimiters.getEscape()); // If the segment delimiter is a LF, strip off any preceding CR characters... if (currentDelimiters.getSegment().equals("\n")) { int endIndex = currentSegmentFields.length - 1; if (currentSegmentFields[endIndex].endsWith("\r")) { int stringLen = currentSegmentFields[endIndex].length(); currentSegmentFields[endIndex] = currentSegmentFields[endIndex].substring(0, stringLen - 1); } } } return currentSegmentFields; } /** * Get the current segment "number". *

* The first segment is "segment number 1". * * @return The "number" of the current segment. */ public int getCurrentSegmentNumber() { return currentSegmentNumber; } private int forwardPastWhitespace() throws IOException { return forwardPastWhitespace(readChar()); } private int forwardPastWhitespace(int c) throws IOException { while (c != -1) { if (!Character.isWhitespace((char) c)) { // It's not whitespace... stop here and start processing the segment... break; } // It's whitespace... move to next character in segment stream... c = readChar(); } return c; } private int readChar() throws IOException { try { return reader.read(); } finally { charReadCount++; } } /** * Assert that there is a current segment. */ private void assertCurrentSegmentExists() { if (segmentBuffer.length() == 0) { throw new IllegalStateException("No current segment available. Possible conditions: \n" + "\t\t1. A call to moveToNextSegment() was not made, or \n" + "\t\t2. The last call to moveToNextSegment() returned false."); } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy