org.archive.util.ObjectIdentityBdbManualCache Maven / Gradle / Ivy
Show all versions of heritrix-commons Show documentation
/*
* This file is part of the Heritrix web crawler (crawler.archive.org).
*
* Licensed to the Internet Archive (IA) by one or more individual
* contributors.
*
* The IA licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.archive.util;
import java.io.Closeable;
import java.io.Serializable;
import java.util.Iterator;
import java.util.Map.Entry;
import java.util.Set;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLong;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.archive.bdb.KryoBinding;
import com.google.common.cache.CacheBuilder;
import com.google.common.cache.RemovalListener;
import com.google.common.cache.RemovalNotification;
import com.sleepycat.bind.EntryBinding;
import com.sleepycat.bind.serial.StoredClassCatalog;
import com.sleepycat.bind.tuple.TupleBinding;
import com.sleepycat.collections.StoredSortedMap;
import com.sleepycat.je.Database;
import com.sleepycat.je.DatabaseConfig;
import com.sleepycat.je.DatabaseException;
import com.sleepycat.je.Environment;
/**
* A BDB JE backed object cache.
*
* Soft references to previously-instantiated objects are held so that
* unless/until an object is garbage collected, subsequent get()s will
* return the exact same object (avoiding redundant creation or disagreement
* about canonical object state).
*
* The backing disk is only guaranteed to be up-to-date after a flush
* of all dirty values to disk, as can be forced by sync().
*
*
*
*
* @author John Erik Halse
* @author stack
* @author gojomo
* @author paul baclace (conversion to ConcurrentMap)
*
*/
@SuppressWarnings("ALL")
public class ObjectIdentityBdbManualCache
implements ObjectIdentityCache, Closeable, Serializable {
private static final long serialVersionUID = 1L;
private static final Logger logger =
Logger.getLogger(ObjectIdentityBdbManualCache.class.getName());
/** The BDB JE database used for this instance. */
protected transient Database db;
/** in-memory map of new/recent/still-referenced-elsewhere instances */
protected transient ConcurrentMap memMap;
/** The Collection view of the BDB JE database used for this instance. */
protected transient StoredSortedMap diskMap;
protected transient ConcurrentMap dirtyItems;
protected AtomicLong count;
//
// USAGE STATS
//
/** Count of times we got an object from in-memory cache */
private AtomicLong cacheHit = new AtomicLong(0);
/** Count of times the {@link ObjectIdentityBdbManualCache#get} method was called. */
private AtomicLong countOfGets = new AtomicLong(0);
/** Count of every time disk-based map provided non-null object */
private AtomicLong diskHit = new AtomicLong(0);
/** Count of times Supplier was used for new object */
private AtomicLong supplierUsed = new AtomicLong(0);
/** count of {@link #sync()} use */
transient private AtomicLong useStatsSyncUsed = new AtomicLong(0);
/** Count of times Supplier was used for new object */
private AtomicLong evictions = new AtomicLong(0);
/**
* Constructor. You must call
* {@link #initialize(Environment, String, Class, StoredClassCatalog)}
* to finish construction. Construction is two-stepped to support
* reconnecting a deserialized CachedBdbMap with its backing bdbje
* database.
*/
public ObjectIdentityBdbManualCache() {
super();
dirtyItems = CacheBuilder.newBuilder()
.maximumSize(10000)
.expireAfterWrite(5, TimeUnit.MINUTES)
.removalListener(new RemovalListener() {
@Override
public void onRemoval(RemovalNotification stringVRemovalNotification) {
evictions.incrementAndGet();
diskMap.put(stringVRemovalNotification.getKey(), stringVRemovalNotification.getValue());
}
})
.build()
.asMap();
}
/**
* Call this method when you have an instance when you used the
* default constructor or when you have a deserialized instance that you
* want to reconnect with an extant bdbje environment.
* @param env
* @param dbName
* @param valueClass
* @param classCatalog
* @throws DatabaseException
*/
public void initialize(final Environment env, String dbName,
final Class valueClass, final StoredClassCatalog classCatalog)
throws DatabaseException {
// TODO: tune capacity for actual threads, expected size of key caches?
this.memMap = CacheBuilder.newBuilder()
.concurrencyLevel(64)
.initialCapacity(8192)
.softValues()
.build()
.asMap();
this.db = openDatabase(env, dbName);
this.diskMap = createDiskMap(this.db, classCatalog, valueClass);
// keep a record of items that must be persisted; auto-persist if
// unchanged after 5 minutes, or more than 10K would collect
this.count = new AtomicLong(diskMap.size());
}
@SuppressWarnings("unchecked")
protected StoredSortedMap createDiskMap(Database database,
StoredClassCatalog classCatalog, Class valueClass) {
EntryBinding keyBinding = TupleBinding.getPrimitiveBinding(String.class);
EntryBinding valueBinding = TupleBinding.getPrimitiveBinding(valueClass);
if(valueBinding == null) {
valueBinding =
new KryoBinding(valueClass);
// new SerialBinding(classCatalog, valueClass);
// new BenchmarkingBinding(new EntryBinding[] {
// new KryoBinding(valueClass),
// new RecyclingSerialBinding(classCatalog, valueClass),
// }, valueClass);
}
return new StoredSortedMap(database, keyBinding, valueBinding, true);
}
protected Database openDatabase(final Environment environment,
final String dbName) throws DatabaseException {
DatabaseConfig dbConfig = new DatabaseConfig();
dbConfig.setTransactional(false);
dbConfig.setAllowCreate(true);
dbConfig.setDeferredWrite(true);
return environment.openDatabase(null, dbName, dbConfig);
}
/* (non-Javadoc)
* @see org.archive.util.ObjectIdentityCache#close()
*/
public synchronized void close() {
// Close out my bdb db.
if (this.db != null) {
try {
sync();
this.db.sync();
this.db.close();
} catch (DatabaseException e) {
logger.log(Level.WARNING,"problem closing ObjectIdentityBdbCache",e);
} finally {
this.db = null;
}
}
}
protected void finalize() throws Throwable {
close();
super.finalize();
}
/* (non-Javadoc)
* @see org.archive.util.ObjectIdentityCache#get(java.lang.String)
*/
public V get(final String key) {
return getOrUse(key,null);
}
/* (non-Javadoc)
* @see org.archive.util.ObjectIdentityCache#get(java.lang.String, org.archive.util.ObjectIdentityBdbCache)
*/
public V getOrUse(final String key, Supplier supplierOrNull) {
countOfGets.incrementAndGet();
if (countOfGets.get() % 10000 == 0) {
logCacheSummary();
}
// check mem cache
V val = memMap.get(key);
if(val != null) {
// the concurrent garden path: in memory and valid
cacheHit.incrementAndGet();
val.setIdentityCache(this);
return val;
}
val = diskMap.get(key);
V prevVal;
if(val == null) {
// never yet created, consider creating
if(supplierOrNull==null) {
return null;
}
val = supplierOrNull.get();
supplierUsed.incrementAndGet();
// putting initial value directly into diskMap
// (rather than just the memMap until page-out)
// ensures diskMap.keySet() provides complete view
prevVal = diskMap.putIfAbsent(key, val);
if(prevVal!=null) {
// we lost a race; discard our local creation in favor of disk version
diskHit.incrementAndGet();
val = prevVal;
} else {
// we uniquely added a new key
count.incrementAndGet();
}
} else {
diskHit.incrementAndGet();
}
prevVal = memMap.putIfAbsent(key, val); // fill memMap or lose race gracefully
if(prevVal != null) {
val = prevVal;
}
val.setIdentityCache(this);
return val;
}
/* (non-Javadoc)
* @see org.archive.util.ObjectIdentityCache#keySet()
*/
public Set keySet() {
return diskMap.keySet();
}
/**
* Summary to log, if at FINE level
*/
private void logCacheSummary() {
if (logger.isLoggable((Level.FINE))) {
logger.fine(composeCacheSummary());
}
}
protected String composeCacheSummary() {
long totalHits = cacheHit.get() + diskHit.get();
if (totalHits < 1) {
return "";
}
long cacheHitPercent
= (cacheHit.get() * 100) / totalHits;
StringBuilder sb = new StringBuilder(120);
sb.append("DB name:")
.append(getDatabaseName())
.append(", ")
.append(" hit%: ")
.append(cacheHitPercent)
.append("%, gets=")
.append(countOfGets.get())
.append(" memHits=")
.append(cacheHit.get())
.append(" diskHits=")
.append(diskHit.get())
.append(" supplieds=")
.append(supplierUsed.get())
.append(" inMemItems=")
.append(memMap.size())
.append(" dirtyItems=")
.append(dirtyItems.size())
.append(" evictions=")
.append(evictions.get())
.append(" syncs=")
.append(useStatsSyncUsed.get());
return sb.toString();
}
/* (non-Javadoc)
* @see org.archive.util.ObjectIdentityCache#size()
*/
public int size() {
if(db==null) {
return 0;
}
return (int) count.get();
}
protected String getDatabaseName() {
String name = "DbName-Lookup-Failed";
try {
if (this.db != null) {
name = this.db.getDatabaseName();
}
} catch (DatabaseException e) {
// Ignore.
}
return name;
}
/**
* Sync all in-memory map entries to backing disk store.
*/
public synchronized void sync() {
String dbName = null;
// Sync. memory and disk.
useStatsSyncUsed.incrementAndGet();
long startTime = 0;
if (logger.isLoggable(Level.FINE)) {
dbName = getDatabaseName();
startTime = System.currentTimeMillis();
logger.fine(dbName + " start sizes: disk " + this.diskMap.size() +
", mem " + this.memMap.size());
}
Iterator> iter = dirtyItems.entrySet().iterator();
while(iter.hasNext()) {
Entry entry = iter.next();
iter.remove();
diskMap.put(entry.getKey(), entry.getValue());
}
try {
this.db.sync();
} catch (DatabaseException e) {
throw new RuntimeException(e);
}
if (logger.isLoggable(Level.FINE)) {
logger.fine(dbName + " sync took " +
(System.currentTimeMillis() - startTime) + "ms. " +
"Finish sizes: disk " +
this.diskMap.size() + ", mem " + this.memMap.size());
}
}
@Override
public void dirtyKey(String key) {
V val = memMap.get(key);
if(val==null) {
logger.severe("dirty key not in memory should be impossible");
}
dirtyItems.put(key,val);
}
/*@Override
public void onRemoval(RemovalNotification stringVRemovalNotification) {
evictions.incrementAndGet();
diskMap.put(stringVRemovalNotification.getKey(), stringVRemovalNotification.getValue());
}*/
}