All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.trypticon.luceneupgrader.lucene3.internal.lucene.store.NRTCachingDirectory Maven / Gradle / Ivy

There is a newer version: 0.5.1
Show newest version
package org.trypticon.luceneupgrader.lucene3.internal.lucene.store;

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import java.io.IOException;
import java.util.Collection;
import java.util.HashSet;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;

import org.trypticon.luceneupgrader.lucene3.internal.lucene.index.ConcurrentMergeScheduler;
import org.trypticon.luceneupgrader.lucene3.internal.lucene.index.IndexFileNames;
import org.trypticon.luceneupgrader.lucene3.internal.lucene.index.IndexWriter;       // javadocs
import org.trypticon.luceneupgrader.lucene3.internal.lucene.index.MergePolicy;
import org.trypticon.luceneupgrader.lucene3.internal.lucene.index.MergeScheduler;
import org.trypticon.luceneupgrader.lucene3.internal.lucene.util.IOUtils;

// TODO
//   - let subclass dictate policy...?
//   - rename to MergeCacheingDir?  NRTCachingDir

/**
 * Wraps a {@link RAMDirectory}
 * around any provided delegate directory, to
 * be used during NRT search.  Make sure you pull the merge
 * scheduler using {@link #getMergeScheduler} and pass that to your
 * {@link IndexWriter}; this class uses that to keep track of which
 * merges are being done by which threads, to decide when to
 * cache each written file.
 *
 * 

This class is likely only useful in a near-real-time * context, where indexing rate is lowish but reopen * rate is highish, resulting in many tiny files being * written. This directory keeps such segments (as well as * the segments produced by merging them, as long as they * are small enough), in RAM.

* *

This is safe to use: when your app calls {IndexWriter#commit}, * all cached files will be flushed from the cached and sync'd.

* *

NOTE: this class is somewhat sneaky in its * approach for spying on merges to determine the size of a * merge: it records which threads are running which merges * by watching ConcurrentMergeScheduler's doMerge method. * While this works correctly, likely future versions of * this class will take a more general approach. * *

Here's a simple example usage: * *

 *   Directory fsDir = FSDirectory.open(new File("/path/to/index"));
 *   NRTCachingDirectory cachedFSDir = new NRTCachingDirectory(fsDir, 5.0, 60.0);
 *   IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_32, analyzer);
 *   conf.setMergeScheduler(cachedFSDir.getMergeScheduler());
 *   IndexWriter writer = new IndexWriter(cachedFSDir, conf);
 * 
* *

This will cache all newly flushed segments, all merges * whose expected segment size is <= 5 MB, unless the net * cached bytes exceeds 60 MB at which point all writes will * not be cached (until the net bytes falls below 60 MB).

* * @lucene.experimental */ public class NRTCachingDirectory extends Directory { private final RAMDirectory cache = new RAMDirectory(); private final Directory delegate; private final long maxMergeSizeBytes; private final long maxCachedBytes; private static final boolean VERBOSE = false; /** * We will cache a newly created output if 1) it's a * flush or a merge and the estimated size of the merged segment is <= * maxMergeSizeMB, and 2) the total cached bytes is <= * maxCachedMB */ public NRTCachingDirectory(Directory delegate, double maxMergeSizeMB, double maxCachedMB) { this.delegate = delegate; maxMergeSizeBytes = (long) (maxMergeSizeMB*1024*1024); maxCachedBytes = (long) (maxCachedMB*1024*1024); } @Override public LockFactory getLockFactory() { return delegate.getLockFactory(); } @Override public void setLockFactory(LockFactory lf) throws IOException { delegate.setLockFactory(lf); } @Override public String getLockID() { return delegate.getLockID(); } @Override public Lock makeLock(String name) { return delegate.makeLock(name); } @Override public void clearLock(String name) throws IOException { delegate.clearLock(name); } @Override public String toString() { return "NRTCachingDirectory(" + delegate + "; maxCacheMB=" + (maxCachedBytes/1024/1024.) + " maxMergeSizeMB=" + (maxMergeSizeBytes/1024/1024.) + ")"; } @Override public synchronized String[] listAll() throws IOException { final Set files = new HashSet(); for(String f : cache.listAll()) { files.add(f); } // LUCENE-1468: our NRTCachingDirectory will actually exist (RAMDir!), // but if the underlying delegate is an FSDir and mkdirs() has not // yet been called, because so far everything is a cached write, // in this case, we don't want to throw a NoSuchDirectoryException try { for(String f : delegate.listAll()) { // Cannot do this -- if lucene calls createOutput but // file already exists then this falsely trips: //assert !files.contains(f): "file \"" + f + "\" is in both dirs"; files.add(f); } } catch (NoSuchDirectoryException ex) { // however, if there are no cached files, then the directory truly // does not "exist" if (files.isEmpty()) { throw ex; } } return files.toArray(new String[files.size()]); } /** Returns how many bytes are being used by the * RAMDirectory cache */ public long sizeInBytes() { return cache.sizeInBytes(); } @Override public synchronized boolean fileExists(String name) throws IOException { return cache.fileExists(name) || delegate.fileExists(name); } @Override public synchronized long fileModified(String name) throws IOException { if (cache.fileExists(name)) { return cache.fileModified(name); } else { return delegate.fileModified(name); } } @Override @Deprecated /* @deprecated Lucene never uses this API; it will be * removed in 4.0. */ public synchronized void touchFile(String name) throws IOException { if (cache.fileExists(name)) { cache.touchFile(name); } else { delegate.touchFile(name); } } @Override public synchronized void deleteFile(String name) throws IOException { if (VERBOSE) { System.out.println("nrtdir.deleteFile name=" + name); } if (cache.fileExists(name)) { assert !delegate.fileExists(name): "name=" + name; cache.deleteFile(name); } else { delegate.deleteFile(name); } } @Override public synchronized long fileLength(String name) throws IOException { if (cache.fileExists(name)) { return cache.fileLength(name); } else { return delegate.fileLength(name); } } public String[] listCachedFiles() { return cache.listAll(); } @Override public IndexOutput createOutput(String name) throws IOException { if (VERBOSE) { System.out.println("nrtdir.createOutput name=" + name); } if (doCacheWrite(name)) { if (VERBOSE) { System.out.println(" to cache"); } try { delegate.deleteFile(name); } catch (IOException ioe) { // This is fine: file may not exist } return cache.createOutput(name); } else { try { cache.deleteFile(name); } catch (IOException ioe) { // This is fine: file may not exist } return delegate.createOutput(name); } } @Override public void sync(Collection fileNames) throws IOException { if (VERBOSE) { System.out.println("nrtdir.sync files=" + fileNames); } for(String fileName : fileNames) { unCache(fileName); } delegate.sync(fileNames); } @Override public synchronized IndexInput openInput(String name) throws IOException { if (VERBOSE) { System.out.println("nrtdir.openInput name=" + name); } if (cache.fileExists(name)) { if (VERBOSE) { System.out.println(" from cache"); } return cache.openInput(name); } else { return delegate.openInput(name); } } @Override public synchronized IndexInput openInput(String name, int bufferSize) throws IOException { if (cache.fileExists(name)) { return cache.openInput(name, bufferSize); } else { return delegate.openInput(name, bufferSize); } } /** Close this directory, which flushes any cached files * to the delegate and then closes the delegate. */ @Override public void close() throws IOException { // NOTE: technically we shouldn't have to do this, ie, // IndexWriter should have sync'd all files, but we do // it for defensive reasons... or in case the app is // doing something custom (creating outputs directly w/o // using IndexWriter): for(String fileName : cache.listAll()) { unCache(fileName); } cache.close(); delegate.close(); } private final ConcurrentHashMap merges = new ConcurrentHashMap(); public MergeScheduler getMergeScheduler() { return new ConcurrentMergeScheduler() { @Override protected void doMerge(MergePolicy.OneMerge merge) throws IOException { try { merges.put(Thread.currentThread(), merge); super.doMerge(merge); } finally { merges.remove(Thread.currentThread()); } } }; } /** Subclass can override this to customize logic; return * true if this file should be written to the RAMDirectory. */ protected boolean doCacheWrite(String name) { final MergePolicy.OneMerge merge = merges.get(Thread.currentThread()); //System.out.println(Thread.currentThread().getName() + ": CACHE check merge=" + merge + " size=" + (merge==null ? 0 : merge.estimatedMergeBytes)); return !name.equals(IndexFileNames.SEGMENTS_GEN) && (merge == null || merge.estimatedMergeBytes <= maxMergeSizeBytes) && cache.sizeInBytes() <= maxCachedBytes; } private final Object uncacheLock = new Object(); private void unCache(String fileName) throws IOException { // Only let one thread uncache at a time; this only // happens during commit() or close(): synchronized(uncacheLock) { if (VERBOSE) { System.out.println("nrtdir.unCache name=" + fileName); } if (!cache.fileExists(fileName)) { // Another thread beat us... return; } if (delegate.fileExists(fileName)) { throw new IOException("cannot uncache file=\"" + fileName + "\": it was separately also created in the delegate directory"); } final IndexOutput out = delegate.createOutput(fileName); IndexInput in = null; try { in = cache.openInput(fileName); in.copyBytes(out, in.length()); } finally { IOUtils.close(in, out); } // Lock order: uncacheLock -> this synchronized(this) { // Must sync here because other sync methods have // if (cache.fileExists(name)) { ... } else { ... }: cache.deleteFile(fileName); } } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy