All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.norconex.commons.lang.io.CachedInputStream Maven / Gradle / Ivy

Go to download

Norconex Commons Lang is a Java library containing utility classes that complements the Java API and are not found in commonly available libraries (such as the great Apache Commons Lang, which it relies on).

There is a newer version: 2.0.2
Show newest version
/* Copyright 2014-2015 Norconex Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.norconex.commons.lang.io;

import java.io.BufferedInputStream;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.RandomAccessFile;
import java.nio.channels.Channels;
import java.nio.channels.FileChannel;

import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.io.output.NullOutputStream;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;

import com.norconex.commons.lang.file.FileUtil;
import com.norconex.commons.lang.io.CachedStreamFactory.MemoryTracker;

/**
 * {@link InputStream} wrapper that can be re-read any number of times.  This 
 * class will cache the wrapped input steam content the first time it is read, 
 * and subsequent read will use the cache.   
 * 

* To create new instances of {@link CachedInputStream}, use the * {@link CachedStreamFactory} class. Reusing the same factory * will ensure all {@link CachedInputStream} instances created share the same * combined maximum memory. Invoking one of the * newInputStream(...) methods on this class have the same effect. *

* In order to re-use this InputStream, you must call {@link #rewind()} first * on it. Once done reading the stream, you will get the -1 character as * expected, and it will remain at that until you rewind or dispose. *

* Starting reading the stream again will start reading bytes from the * beginning (re)using its internal cache. *

* Calling {@link #close()} has * no effect, and the cache data remains available for subsequent read. *

* To explicitly dispose of resources allocated to the cache, you can * use the {@link #dispose()} method. * Attempting to read a disposed instance will throw an {@link IOException}. * It is recommended you explicitly dispose of CachedInputStream * instances to speed up the release of resources. Otherwise, resources are * de-allocated automatically when the instance is finalized. *

* The internal cache stores read bytes into memory, up to to the * specified maximum cache size. If content exceeds * the cache limit, the cache transforms itself into a fast file-based cache * of unlimited size. Default memory cache size is 128 KB. *

* Starting with 1.6.0, mark(int) is supported. The mark * limit is always unlimited so the method argument is ignored. *

* @author Pascal Essiembre * @since 1.5.0 * @see CachedStreamFactory */ public class CachedInputStream extends InputStream implements ICachedStream { private static final Logger LOG = LogManager.getLogger(CachedInputStream.class); private static final int UNDEFINED_LENGTH = -42; private final CachedStreamFactory factory; private final MemoryTracker tracker; private InputStream inputStream; private byte[] memCache; private ByteArrayOutputStream memOutputStream; private File fileCache; private RandomAccessFile randomAccessFile; private boolean firstRead = true; private boolean needNewStream = false; private boolean cacheEmpty = true; private boolean disposed = false; private final File cacheDirectory; private int count; // total number of bytes read so far private int pos = 0; // byte position we are in private int markpos = 0; // position we want to go back to // undefined until a full read was performed private int length = UNDEFINED_LENGTH; /** * Caches the wrapped InputStream. * @param is InputStream to cache * @param cacheDirectory directory where to store large content */ /*default*/ CachedInputStream(CachedStreamFactory factory, InputStream is, File cacheDirectory) { super(); this.factory = factory; this.tracker = factory.new MemoryTracker(); memOutputStream = new ByteArrayOutputStream(); if (is instanceof BufferedInputStream) { this.inputStream = is; } else { this.inputStream = new BufferedInputStream(is); } if (cacheDirectory == null) { this.cacheDirectory = FileUtils.getTempDirectory(); } else { this.cacheDirectory = cacheDirectory; } } /** * Creates an input stream with an existing memory cache. * @param byteBuffer the InputStream cache. */ /*default*/ CachedInputStream( CachedStreamFactory factory, byte[] memCache) { this.factory = factory; this.tracker = factory.new MemoryTracker(); this.memCache = ArrayUtils.clone(memCache); this.cacheDirectory = null; this.firstRead = false; this.needNewStream = true; if (memCache != null) { this.length = memCache.length; } } /** * Creates an input stream with an existing file cache. * @param cacheFile the file cache */ /*default*/ CachedInputStream(CachedStreamFactory factory, File cacheFile) { this.factory = factory; this.tracker = factory.new MemoryTracker(); this.fileCache = cacheFile; this.cacheDirectory = null; this.firstRead = false; this.needNewStream = true; if (cacheFile != null && cacheFile.exists() && cacheFile.isFile()) { this.length = (int) cacheFile.length(); } } /** * Always true since 1.6.0. * @return true */ @Override public boolean markSupported() { return true; } /** * The read limit value is ignored. Limit is always unlimited. * Supported since 1.6.0. * @param readlimit any value (ignored) */ @Override public synchronized void mark(int readlimit) { markpos = pos; } /** * If no mark has previously been set, it resets to the beginning. * Supported since 1.6.0. */ @Override public synchronized void reset() throws IOException { pos = markpos; } /** * Whether caching is done in memory for this instance for what has been * read so far. Otherwise, file-based caching is used. * @return true if caching is in memory. */ public boolean isInMemory() { return fileCache == null; } @Override public int read() throws IOException { if (disposed) { throw new IOException("CachedInputStream has been disposed."); } int cursor = pos; if (cursor < count) { int val = -1; if (isInMemory()) { if (memOutputStream != null) { val = memOutputStream.getByte(cursor); } else { if (cursor >= memCache.length) { val = -1; } else { val = memCache[cursor]; } } } else { randomAccessFile.seek(cursor); val = randomAccessFile.read(); } if (val != -1) { pos++; } return val; } int b = realRead(); if (b != -1) { pos++; count++; } return b; } private int realRead() throws IOException { if (needNewStream) { createInputStreamFromCache(); } if (firstRead) { int read = inputStream.read(); if (read == -1) { return read; } if (randomAccessFile != null) { // Write to file cache randomAccessFile.write(read); } else if (!tracker.hasEnoughAvailableMemory(memOutputStream, 1)) { // Too big: create file cache and write to it. cacheToFile(); randomAccessFile.write(read); } else { // Write to memory cache memOutputStream.write(read); } cacheEmpty = false; return read; } int read = inputStream.read(); cacheEmpty = false; return read; } @Override public int read(byte[] b, int off, int len) throws IOException { if (disposed) { throw new IOException("CachedInputStream has been disposed."); } int cursor = pos; int read = 0; if (cursor < count) { int toRead = Math.min(len, count - cursor); if (isInMemory()) { if (memOutputStream != null) { byte[] bytes = new byte[toRead]; read = memOutputStream.getBytes(bytes, cursor); System.arraycopy(bytes, 0, b, off, toRead); } else { if (cursor >= memCache.length) { read = -1; } else { System.arraycopy(memCache, cursor, b, off, toRead); read = toRead; } } } else { randomAccessFile.seek(cursor); read = randomAccessFile.read(b, off, toRead); } if (read != -1) { pos += read; } } if (read != -1 && read < len) { int maxToRead = len - read; read = realRead(b, off + read, maxToRead); if (read != -1) { pos += read; count += read; } } return read; } private int realRead(byte[] b, int off, int len) throws IOException { if (needNewStream) { createInputStreamFromCache(); } int num = inputStream.read(b, off, len); cacheEmpty = false; if (num == -1) { return num; } if (firstRead) { if (randomAccessFile != null) { randomAccessFile.write(b, off, num); } else if (!tracker.hasEnoughAvailableMemory( memOutputStream, num)) { cacheToFile(); randomAccessFile.write(b, off, num); } else { memOutputStream.write(b, off, num); } } return num; } /** * If not already fully cached, forces the inner input stream to be * fully cached. * @throws IOException could not enforce full caching */ public void enforceFullCaching() throws IOException { if (firstRead) { IOUtils.copy(this, new NullOutputStream()); length = count; firstRead = false; } } /** * Rewinds this stream so it can be read again from the beginning. * If this input stream was not fully read at least once, it will * be fully read first, so its entirety is cached properly. */ public void rewind() { if (!cacheEmpty) { // Rewinding a stream that we not fully read will truncate // it. We finish reading it all to avoid that. if (firstRead) { try { enforceFullCaching(); } catch (IOException e) { throw new StreamException("Could not read entire stream " + "so rewind() can occur safely.", e); } } resetStream(); } } private void resetStream() { // Rewind IOUtils.closeQuietly(inputStream); IOUtils.closeQuietly(memOutputStream); IOUtils.closeQuietly(randomAccessFile); randomAccessFile = null; firstRead = false; needNewStream = true; if (memOutputStream != null) { LOG.debug("Creating memory cache from cached stream."); memCache = memOutputStream.toByteArray(); memOutputStream = null; } // Reset marking pos = 0; markpos = 0; count = 0; } public void dispose() throws IOException { if (memCache != null) { memCache = null; } if (inputStream != null) { inputStream.close(); inputStream = null; } if (memOutputStream != null) { memOutputStream.flush(); memOutputStream.close(); memOutputStream = null; } if (randomAccessFile != null) { randomAccessFile.close(); randomAccessFile = null; } if (fileCache != null) { FileUtil.delete(fileCache); LOG.debug("Deleted cache file: " + fileCache); } disposed = true; cacheEmpty = true; } @Override public int available() throws IOException { if (needNewStream) { createInputStreamFromCache(); } if (inputStream == null) { return 0; } return inputStream.available(); } /** * Gets the cache directory where temporary cache files are created. * @return the cache directory */ public final File getCacheDirectory() { return cacheDirectory; } /** * Returns true if was nothing to cache (no writing was * performed) or if the stream was closed. * @return true if empty */ public boolean isCacheEmpty() { return cacheEmpty; } public boolean isDisposed() { return disposed; } @Override public long getMemCacheSize() { if (memCache != null) { return memCache.length; } if (memOutputStream != null) { return memOutputStream.size(); } return 0; } /** *

Gets the length of the cached input stream. The length represents the * number of bytes that were read from this input stream, * after it was read entirely at least once.

*

Note: Invoking this method when this stream is only partially * read (on a first read) will force it to read entirely and cache the * inner input stream it wraps. To prevent an unnecessary read cycle, * it is always best to invoke this method after this stream was fully * read through normal use first. *

* @return the byte length * @since 1.6.1 */ public int length() { if (length == UNDEFINED_LENGTH) { if (LOG.isDebugEnabled()) { LOG.debug("Obtaining stream length before a stream " + "of unknown lenght was fully read. " + "This forces a full " + "read just to get the length. To avoid this extra " + "read cycle, consider calling " + "the length() method after the stream has been " + "fully read at least once through regular usage."); } // Reset marking int savedPos = pos; int savedMarkpos = markpos; try { enforceFullCaching(); resetStream(); //TODO investigate having a seek(int) method instead IOUtils.skip(this, savedPos); } catch (IOException e) { throw new StreamException("Could not read entire stream " + "to obtain its byte length.", e); } pos = savedPos; markpos = savedMarkpos; } return length; } /** * Creates a new {@link CachedInputStream} using the same factory settings * that were used to create this instance. * @param file file to create the input stream from * @return cached input stream */ public CachedInputStream newInputStream(File file) { return factory.newInputStream(file); } /** * Creates a new {@link CachedInputStream} using the same factory settings * that were used to create this instance. * @param is input stream * @return cached input stream */ public CachedInputStream newInputStream(InputStream is) { return factory.newInputStream(is); } public CachedStreamFactory getStreamFactory() { return factory; } private void cacheToFile() throws IOException { fileCache = File.createTempFile( "CachedInputStream-", "-temp", cacheDirectory); fileCache.deleteOnExit(); LOG.debug("Reached max cache size. Swapping to file: " + fileCache); randomAccessFile = new RandomAccessFile(fileCache, "rw"); randomAccessFile.write(memOutputStream.toByteArray()); memOutputStream = null; } @SuppressWarnings("resource") private void createInputStreamFromCache() throws FileNotFoundException { if (fileCache != null) { LOG.debug("Creating new input stream from file cache."); RandomAccessFile f = new RandomAccessFile(fileCache, "r"); FileChannel channel = f.getChannel(); inputStream = Channels.newInputStream(channel); } else { LOG.debug("Creating new input stream from memory cache."); inputStream = new ByteArrayInputStream(memCache); } needNewStream = false; } @Override protected void finalize() throws Throwable { dispose(); super.finalize(); } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy