org.infinispan.lucene.cacheloader.DirectoryLoaderAdaptor Maven / Gradle / Ivy
package org.infinispan.lucene.cacheloader;
import java.io.IOException;
import java.nio.file.NoSuchFileException;
import java.util.Collections;
import java.util.HashSet;
import java.util.Set;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.infinispan.commons.marshall.StreamingMarshaller;
import org.infinispan.lucene.ChunkCacheKey;
import org.infinispan.lucene.FileCacheKey;
import org.infinispan.lucene.FileListCacheKey;
import org.infinispan.lucene.FileMetadata;
import org.infinispan.lucene.FileReadLockKey;
import org.infinispan.lucene.IndexScopedKey;
import org.infinispan.lucene.KeyVisitor;
import org.infinispan.lucene.impl.FileListCacheValue;
import org.infinispan.lucene.logging.Log;
import org.infinispan.marshall.core.MarshalledEntry;
import org.infinispan.marshall.core.MarshalledEntryImpl;
import org.infinispan.util.logging.LogFactory;
/**
* Contains the low-level logic to map the cache structure the the "native"
* Lucene format for a single Directory instance.
*
* @author Sanne Grinovero
* @since 5.2
*/
final class DirectoryLoaderAdaptor {
private static final Log log = LogFactory.getLog(DirectoryLoaderAdaptor.class, Log.class);
private final Directory directory;
private final LoadVisitor loadVisitor = new LoadVisitor();
private final ContainsKeyVisitor containsKeyVisitor = new ContainsKeyVisitor();
private final String indexName;
private final int autoChunkSize;
private final int affinitySegmentId;
/**
* Create a new DirectoryLoaderAdaptor.
*
* @param directory The {@link org.apache.lucene.store.Directory} to which delegate actual IO operations
* @param indexName the index name
* @param autoChunkSize index segments might be large; we'll split them in chunks of this amount of bytes
* @param affinitySegmentId
*/
protected DirectoryLoaderAdaptor(final Directory directory, String indexName, int autoChunkSize, int affinitySegmentId) {
this.directory = directory;
this.indexName = indexName;
this.autoChunkSize = autoChunkSize;
this.affinitySegmentId = affinitySegmentId;
}
/**
* Loads all "entries" from the CacheLoader; considering this is actually a Lucene index,
* that's going to transform segments in entries in a specific order, simplest entries first.
*
* @param entriesCollector loaded entries are collected in this set
* @param maxEntries to limit amount of entries loaded
*/
protected void loadAllEntries(final HashSet entriesCollector, final int maxEntries, StreamingMarshaller marshaller) {
int existingElements = entriesCollector.size();
int toLoadElements = maxEntries - existingElements;
if (toLoadElements <= 0) {
return;
}
HashSet keysCollector = new HashSet<>();
loadSomeKeys(keysCollector, Collections.EMPTY_SET, toLoadElements);
for (IndexScopedKey key : keysCollector) {
Object value = load(key);
if (value != null) {
MarshalledEntry cacheEntry = new MarshalledEntryImpl(key, value, null, marshaller);
entriesCollector.add(cacheEntry);
}
}
}
/**
* Load some keys in the collector, excluding some and to a maximum number of collected (non-excluded) keys.
* @param keysCollector the set where to add loaded keys to
* @param keysToExclude which keys should not be loaded. Warning: can be null! Means all keys are to be returned
* @param maxElements upper limit for collection
*/
private void loadSomeKeys(final HashSet keysCollector, final Set keysToExclude, final int maxElements) {
if (maxElements <= 0) {
return;
}
int collectedKeys = 0;
try {
//First we collect the (single) FileListCacheKey
String[] listAll = directory.listAll();
if (listAll.length != 0) {
FileListCacheKey rootKey = new FileListCacheKey(indexName, affinitySegmentId);
if (keysToExclude == null || !keysToExclude.contains(rootKey)) { //unless it was excluded
if (keysCollector.add(rootKey)) { //unless it was already collected
collectedKeys++;
}
}
}
//Now we collect first all FileCacheKey (keys for file metadata)
for (String fileName : listAll) {
if (collectedKeys >= maxElements) return;
FileCacheKey key = new FileCacheKey(indexName, fileName, affinitySegmentId);
if (keysToExclude == null || !keysToExclude.contains(key)) {
if (keysCollector.add(key)) {
if (++collectedKeys >= maxElements) return;
}
}
}
//Next we load the ChunkCacheKey (keys for file contents)
for (String fileName : listAll) {
int numChunksInt = figureChunksNumber(fileName);
for (int i = 0; i < numChunksInt; i++) {
//Inner loop: we actually have several Chunks per file name
ChunkCacheKey key = new ChunkCacheKey(indexName, fileName, i, autoChunkSize, affinitySegmentId);
if (keysToExclude == null || !keysToExclude.contains(key)) {
if (keysCollector.add(key)) {
if (++collectedKeys >= maxElements) return;
}
}
}
}
} catch (IOException e) {
throw log.exceptionInCacheLoader(e);
}
}
/**
* Guess in how many chunks we should split this file. Should return the same value consistently
* for the same file (segments are immutable) so that a full segment can be rebuilt from the upper
* layers without anyone actually specifying the chunks numbers.
*/
private int figureChunksNumber(String fileName) throws IOException {
long fileLength = directory.fileLength(fileName);
return figureChunksNumber(fileName, fileLength, autoChunkSize);
}
/**
* Index segment files might be larger than 2GB; so it's possible to have an autoChunksize
* which is too low to contain all bytes in a single array (overkill anyway).
* In this case we ramp up and try splitting with larger chunkSize values.
*/
public static int figureChunksNumber(final String fileName, final long fileLength, int chunkSize) {
if (chunkSize < 0) {
throw new IllegalStateException("Overflow in rescaling chunkSize. File way too large?");
}
final long numChunks = (fileLength % chunkSize == 0) ? (fileLength / chunkSize) : (fileLength / chunkSize) + 1;
if (numChunks > Integer.MAX_VALUE) {
log.rescalingChunksize(fileName, fileLength, chunkSize);
chunkSize = 32 * chunkSize;
return figureChunksNumber(fileName, fileLength, chunkSize);
}
else {
return (int)numChunks;
}
}
/**
* Closes the underlying Directory. After it's closed, no other invocations are expected on this Adapter; we don't check explicitly for it
* as the Directory instance takes care of it.
*/
protected void close() {
try {
directory.close();
}
catch (IOException e) {
//log but continue execution: we might want to try closing more instance
log.errorOnFSDirectoryClose(e);
}
}
/**
* Load the value for a specific key
*/
protected Object load(final IndexScopedKey key) {
try {
return key.accept(loadVisitor);
}
catch (Exception e) {
throw log.exceptionInCacheLoader(e);
}
}
/**
* @param key {@link org.infinispan.lucene.IndexScopedKey}
* @return true if the indexKey matches a loadable entry
*/
protected boolean containsKey(final IndexScopedKey key) {
try {
return key.accept(containsKeyVisitor);
}
catch (Exception e) {
throw log.exceptionInCacheLoader(e);
}
}
/**
* Load implementation for FileListCacheKey; must return a
* ConcurrentHashSet containing the names of all files in this Directory.
*/
private Object loadIntern() throws IOException {
final String[] listAll = directory.listAll();
return new FileListCacheValue(listAll);
}
/**
* Load implementation for FileCacheKey: must return the metadata of the
* requested file.
*/
private FileMetadata loadIntern(final FileCacheKey key) throws IOException {
final String fileName = key.getFileName();
final long fileLength = directory.fileLength(fileName);
// We're forcing the buffer size of a to-be-read segment to the full file size:
final int bufferSize = (int) Math.min(fileLength, (long)autoChunkSize);
final FileMetadata meta = new FileMetadata(bufferSize);
meta.setSize(fileLength);
return meta;
}
/**
* Loads the actual byte array from a segment, in the range of a specific chunkSize.
* Not that while the chunkSize is specified in this case, it's likely derived
* from the invocations of other loading methods.
*/
private byte[] loadIntern(final ChunkCacheKey key) throws IOException {
final String fileName = key.getFileName();
final long chunkId = key.getChunkId(); //needs to be long to upcast following operations
int bufferSize = key.getBufferSize();
final long seekTo = chunkId * bufferSize;
final byte[] buffer;
final IndexInput input = directory.openInput(fileName, IOContext.READ);
final long length = input.length();
try {
if (seekTo != 0) {
input.seek(seekTo);
}
bufferSize = (int) Math.min(length - seekTo, (long)bufferSize);
buffer = new byte[bufferSize];
input.readBytes(buffer, 0, bufferSize);
}
finally {
input.close();
}
return buffer;
}
/**
* ContainsKey implementation for chunk elements
*/
private Boolean containsKeyIntern(final ChunkCacheKey chunkCacheKey) throws IOException {
try {
final long length = directory.fileLength(chunkCacheKey.getFileName());
final int bufferSize = chunkCacheKey.getBufferSize();
final int chunkId = chunkCacheKey.getChunkId();
return Boolean.valueOf((chunkId * bufferSize) < (length + bufferSize));
}
catch (NoSuchFileException nfne) {
//Ok, we might check for file existence first.. but it's reasonable to be
//optimistic.
return Boolean.FALSE;
}
}
/**
* ContainsKey implementation for chunk elements
*/
protected Boolean containsKeyIntern(final FileCacheKey fileCacheKey) throws IOException {
for(String file: directory.listAll()) {
if(file.equals(fileCacheKey.getFileName())) {
return true;
}
}
return false;
}
/**
* Routes invocations to type-safe load operations
*/
private final class LoadVisitor implements KeyVisitor {
@Override
public Object visit(final FileListCacheKey fileListCacheKey) throws IOException {
return DirectoryLoaderAdaptor.this.loadIntern();
}
@Override
public Object visit(final ChunkCacheKey chunkCacheKey) throws IOException {
return DirectoryLoaderAdaptor.this.loadIntern(chunkCacheKey);
}
@Override
public Object visit(final FileCacheKey fileCacheKey) throws IOException {
return DirectoryLoaderAdaptor.this.loadIntern(fileCacheKey);
}
@Override
public Object visit(final FileReadLockKey fileReadLockKey) {
//ReadLocks should not leak to the actual storage
return null;
}
}
/**
* Routes invocations to type-safe containsKey operations
*/
private final class ContainsKeyVisitor implements KeyVisitor {
@Override
public Boolean visit(final FileListCacheKey fileListCacheKey) throws IOException {
//We already know this Directory exists, as it's a pre-condition for the creation if this.
//Also, all existing directories are able to list contained files.
return Boolean.TRUE;
}
@Override
public Boolean visit(final ChunkCacheKey chunkCacheKey) throws IOException {
return DirectoryLoaderAdaptor.this.containsKeyIntern(chunkCacheKey);
}
@Override
public Boolean visit(final FileCacheKey fileCacheKey) throws IOException {
return DirectoryLoaderAdaptor.this.containsKeyIntern(fileCacheKey);
}
@Override
public Boolean visit(final FileReadLockKey fileReadLockKey) {
//ReadLocks should not leak to the actual storage
return Boolean.FALSE;
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy