
com.itextpdf.io.source.PagedChannelRandomAccessSource Maven / Gradle / Ivy
/*
This file is part of the iText (R) project.
Copyright (c) 1998-2023 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
AGPL licensing:
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
*/
package com.itextpdf.io.source;
import com.itextpdf.io.logs.IoLogMessageConstant;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.nio.channels.FileChannel;
import java.util.Iterator;
import java.util.LinkedList;
/**
* A RandomAccessSource that is based on an underlying {@link java.nio.channels.FileChannel}. The channel is mapped into memory using a paging scheme to allow for efficient reads of very large files.
* As an implementation detail, we use {@link GroupedRandomAccessSource} functionality, but override to make determination of the underlying
* mapped page more efficient - and to close each page as another is opened
*/
class PagedChannelRandomAccessSource extends GroupedRandomAccessSource implements IRandomAccessSource {
// these values were selected based on parametric testing with extracting text content from a 2.3GB file. These settings resulted in the best improvement over
// the single size MRU case (24% speed improvement)
public static final int DEFAULT_TOTAL_BUFSIZE = 1 << 26;
public static final int DEFAULT_MAX_OPEN_BUFFERS = 16;
/**
* The size of each of the buffers to use when mapping files into memory. This must be greater than 0 and less than {@link Integer#MAX_VALUE}
*/
private final int bufferSize;
/**
* The channel this source is based on
*/
private final FileChannel channel;
/**
* Most recently used list used to hold a number of mapped pages open at a time
*/
private final MRU mru;
/**
* Constructs a new {@link PagedChannelRandomAccessSource} based on the specified FileChannel, with a default buffer configuration.
* The default buffer configuration is currently 2^26 total paged bytes, spread across a maximum of 16 active buffers. This arrangement
* resulted in a 24% speed improvement over the single buffer case in parametric tests extracting text from a 2.3 GB file.
* @param channel the channel to use as the backing store
* @throws java.io.IOException if the channel cannot be opened or mapped
*/
public PagedChannelRandomAccessSource(FileChannel channel) throws java.io.IOException {
this(channel, DEFAULT_TOTAL_BUFSIZE, DEFAULT_MAX_OPEN_BUFFERS);
}
/**
* Constructs a new {@link PagedChannelRandomAccessSource} based on the specified FileChannel, with a specific buffer size
* @param channel the channel to use as the backing store
* @param totalBufferSize total buffer size
* @param maxOpenBuffers open buffers
* @throws java.io.IOException if the channel cannot be opened or mapped
*/
public PagedChannelRandomAccessSource(FileChannel channel, int totalBufferSize, int maxOpenBuffers) throws java.io.IOException {
super(buildSources(channel, totalBufferSize/maxOpenBuffers));
this.channel = channel;
this.bufferSize = totalBufferSize/maxOpenBuffers;
this.mru = new MRU(maxOpenBuffers);
}
/**
* Constructs a set of {@link MappedChannelRandomAccessSource}s for each page (of size bufferSize) of the underlying channel
* @param channel the underlying channel
* @param bufferSize the size of each page (the last page may be shorter)
* @return a list of sources that represent the pages of the channel
* @throws java.io.IOException if IO fails for any reason
*/
private static IRandomAccessSource[] buildSources(FileChannel channel, int bufferSize) throws java.io.IOException{
long size = channel.size();
if (size <= 0)
throw new java.io.IOException("File size must be greater than zero");
int bufferCount = (int)(size/bufferSize) + (size % bufferSize == 0 ? 0 : 1);
MappedChannelRandomAccessSource[] sources = new MappedChannelRandomAccessSource[bufferCount];
for (int i = 0; i < bufferCount; i++){
long pageOffset = (long)i*bufferSize;
long pageLength = Math.min(size - pageOffset, bufferSize);
sources[i] = new MappedChannelRandomAccessSource(channel, pageOffset, pageLength);
}
return sources;
}
/**
* {@inheritDoc}
*/
@Override
protected int getStartingSourceIndex(long offset) {
return (int) (offset / bufferSize);
}
/**
* {@inheritDoc}
* For now, close the source that is no longer being used. In the future, we may implement an MRU that allows multiple pages to be opened at a time
*/
@Override
protected void sourceReleased(IRandomAccessSource source) throws java.io.IOException {
IRandomAccessSource old = mru.enqueue(source);
if (old != null)
old.close();
}
/**
* {@inheritDoc}
* Ensure that the source is mapped. In the future, we may implement an MRU that allows multiple pages to be opened at a time
*/
@Override
protected void sourceInUse(IRandomAccessSource source) throws java.io.IOException {
((MappedChannelRandomAccessSource)source).open();
}
/**
* {@inheritDoc}
* Cleans the mapped bytebuffers and closes the channel
*/
@Override
public void close() throws java.io.IOException {
try {
super.close();
} finally {
try {
channel.close();
} catch (Exception ex) {
Logger logger = LoggerFactory.getLogger(PagedChannelRandomAccessSource.class);
logger.error(IoLogMessageConstant.FILE_CHANNEL_CLOSING_FAILED, ex);
}
}
}
private static class MRU {
/**
* The maximum number of entries held by this MRU
*/
private final int limit;
/**
* Backing list for managing the MRU
*/
private LinkedList queue = new LinkedList<>();
/**
* Constructs an MRU with the specified size
* @param limit the limit
*/
public MRU(int limit) {
this.limit = limit;
}
/**
* Adds an element to the MRU. If the element is already in the MRU, it is moved to the top.
* @param newElement the element to add
* @return the element that was removed from the MRU to make room for the new element, or null if no element needed to be removed
*/
public E enqueue(E newElement){
// TODO: this check may not be an effective optimization - the GroupedRandomAccessSource already tracks the 'current' source, so it seems unlikely that we would ever hit this code branch
if (queue.size() > 0 && queue.getFirst() == newElement)
return null;
for(Iterator it = queue.iterator(); it.hasNext();){
E element = it.next();
if (newElement == element){
it.remove();
queue.addFirst(newElement);
return null;
}
}
queue.addFirst(newElement);
if (queue.size() > limit)
return queue.removeLast();
return null;
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy