
org.tomitribe.swizzle.stream.DelimitedTokenReplacementInputStream Maven / Gradle / Ivy
The newest version!
/**
*
* Copyright 2003 David Blevins
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.tomitribe.swizzle.stream;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
public class DelimitedTokenReplacementInputStream extends FilteredInputStream {
private final ScanBuffer beginBuffer;
private final ScanBuffer endBuffer;
private InputStream value;
private final StreamTokenHandler handler;
public DelimitedTokenReplacementInputStream(InputStream in, String begin, String end, StreamTokenHandler tokenHandler) {
this(in, begin, end, tokenHandler, true);
}
public DelimitedTokenReplacementInputStream(InputStream in, String begin, String end, StreamTokenHandler tokenHandler, boolean caseSensitive) {
super(in);
this.handler = tokenHandler;
beginBuffer = new ScanBuffer(begin, caseSensitive);
endBuffer = new ScanBuffer(end, caseSensitive);
strategy = this::fillBeginBuffer;
}
private DelimitedTokenReplacementInputStream.StreamReadingStrategy strategy;
public int read() throws IOException {
return strategy.read();
}
// reading url (looking for end)
// flushing url
// regular read (looking for begin)
interface StreamReadingStrategy {
int read() throws IOException;
}
/**
* Step 1 is to seek out the begin token, but first we fill
* up the begin buffer to capacity.
*
* We know a match requires exactly this many bytes so don't
* bother calling match until we've read enough data.
*
* We don't care if there are -1s
*
* Once we finish with this step, any byte that overflows out
* of the beginBuffer is real data and can be read.
*
* There is a {@link ScanBuffer#flush()} method that resets a
* ScanBuffer, however we don't need to call it because all
* previous bytes are guaranteed to be overwritten. This gives
* us a littel performance boost over previous version of the
* code that did call flush.
*/
private int fillBeginBuffer() throws IOException {
// Fill up the beginBuffer
// This doubles as resetting the buffer as
// we know all prior contents are overwritten
for (int i = 0; i < beginBuffer.size(); i++) {
int stream = streamRead();
beginBuffer.append(stream);
}
// Check for a match
if (beginBuffer.match()) {
// Sometimes we might immediately find one
strategy = this::fillEndBuffer;
} else {
// Usually we need to keep looking
strategy = this::scanBegin;
}
return strategy.read();
}
/**
* We have a fully filled beginBuffer due all work
* being done by {@link #fillBeginBuffer()}
*
* Now we just need to check the buffer for a match
* each time we write a byte.
*
* If we don't have a match, we write the buffered byte
* that is now "overflowing" and being replaced.
*
* If the stream has ended, this method may end up
* returning many many -1s. We don't care. Effectively,
* this means we'll never leave this state.
*
* We do not write begin tokens to the stream, so once
* a match is found we immediately shift to fillEndBuffer
* and simply drop the begin token bytes.
*/
private int scanBegin() throws IOException {
int buffered = beginBuffer.append(streamRead());
if (beginBuffer.match()) {
strategy = this::fillEndBuffer;
}
return buffered;
}
/**
* Once reaching this state we know that the begin token
* has been found.
*
* Our goal is to now find the end token. While we are
* looking for the end token we need to track all the
* content in the middle because it will be passed to the
* StreamTokenHandler.
*
* Before we can do any of this, however, we must fill
* the endBuffer to capacity. Just like {@link #fillBeginBuffer()}
* we know there's a specific number of bytes we need, so
* we don't bother getting fancy until we've done this
* step.
*/
private int fillEndBuffer() throws IOException {
// Fill up the endBuffer
// This doubles as resetting the buffer as
// we know all prior contents are overwritten
for (int i = 0; i < endBuffer.size(); i++) {
int stream = streamRead();
endBuffer.append(stream);
}
if (endBuffer.match()) {
endBuffer.flush();
// Sometimes we immediately find the end token
// If this happens, there actually won't be a
// "middle" token and the StreamTokenHandler
// will get an empty string
strategy = this::startReplacement;
} else {
// Usually we need to keep looking for the end
// We will have a non-empty "middle" tokon to
// give the StreamTokenHandler
strategy = this::scanEnd;
}
return strategy.read();
}
StringBuilder token = new StringBuilder();
/**
* At this stage we are looking for the end token.
*
* Everything that comes off the stream at this point
* is being buffered. This buffer will grow until
* we either find the end token or run out of memory.
*
* We may never find the end token.
*/
private int scanEnd() throws IOException {
token = new StringBuilder();
while (true) {
int buffered = endBuffer.append(streamRead());
if (buffered != -1) {
token.append((char) buffered);
}
if (endBuffer.match()) {
endBuffer.flush();
strategy = this::startReplacement;
return strategy.read();
}
if (buffered == -1) {
strategy = this::endNotFound;
return strategy.read();
}
}
}
/**
* Once we reach this point we've found the end token
* and we potentially have "middle" token data saved up.
*
* We pass the "middle" token to the StreamTokenHandler,
* then shift over to making it the primary source of bytes.
*
* In the event the StreamTokenHandler decides not to give
* us any InputStream to read from, we consider our job done
* and move onto the first step, {@link #fillBeginBuffer()}
*/
private int startReplacement() throws IOException {
final String token;
if (this.token == null) {
token = "";
} else {
token = this.token.toString();
this.token = null;
}
value = handler.processToken(token);
if (value != null) {
strategy = this::flushReplacement;
} else {
strategy = this::fillBeginBuffer;
}
return strategy.read();
}
/**
* At this point we've read the begin and end tokens,
* passed all the text inbetween those two to our
* StreamTokenHandler who gave us an InputStream.
*
* Now we just read from that till it starts returning
* us some -1s indicating we're done with this token.
*
* Time to move back to the first step, {@link #fillBeginBuffer()}
* and start all over again.
*/
private int flushReplacement() throws IOException {
final int i = value.read();
if (i != -1) return i;
strategy = this::fillBeginBuffer;
return strategy.read();
}
/**
* Well, we tried to find the end token, but that didn't
* work so well, did it? We found ourselves reading
* and reading and reading and eventually hit the end of
* the stream.
*
* We got this far so we didn't run out of memory, yay.
*
* Let's do our best to pretend we didn't just buffer a
* truck-load of data and leave the stream looking untouched.
*
* Write the beginBuffer data, then write the buffered
* "middle" token data, then we're done.
*
*/
private int endNotFound() throws IOException {
strategy = this::drainBeginBuffer;
return strategy.read();
}
/**
* We don't normally write the begin token, but we never
* found the end token, so we flush it out.
*
* If the user didn't give us a stream with a matching
* end token, at least we'll give them all the data so
* they can see what we saw and maybe fix their data
* or their code.
*/
private int drainBeginBuffer() throws IOException {
final int buffered = beginBuffer.append(-1);
if (buffered != -1) return buffered;
value = new ByteArrayInputStream(token.toString().getBytes());
token = null;
strategy = this::drainTokenBuffer;
return strategy.read();
}
/**
* We've just finished writing the begin token, so
* now we write any buffered "middle" token data that
* came after it in the stream.
*
* Once we're done with this step, we're done.
*
* We know there's no end token because the stream
* is over.
*/
private int drainTokenBuffer() throws IOException {
final int buffered = value.read();
if (buffered != -1) return buffered;
strategy = this::done;
return strategy.read();
}
/**
* We know the stream is done, so we'll happily let
* the user get as many -1s as they "need"
*/
private int done() throws IOException {
return -1;
}
private int streamRead() throws IOException {
return super.read();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy