All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.unister.semweb.drums.api.DRUMS Maven / Gradle / Ivy

The newest version!
/* Copyright (C) 2012-2013 Unister GmbH
 * 
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License along
 * with this program; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */
package com.unister.semweb.drums.api;

import java.io.File;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.carrotsearch.hppc.IntObjectOpenHashMap;
import com.carrotsearch.hppc.cursors.IntObjectCursor;
import com.unister.semweb.drums.DRUMSParameterSet;
import com.unister.semweb.drums.bucket.Bucket;
import com.unister.semweb.drums.bucket.BucketContainer;
import com.unister.semweb.drums.bucket.BucketContainerException;
import com.unister.semweb.drums.bucket.DynamicMemoryAllocater;
import com.unister.semweb.drums.bucket.hashfunction.AbstractHashFunction;
import com.unister.semweb.drums.file.FileLockException;
import com.unister.semweb.drums.file.HeaderIndexFile;
import com.unister.semweb.drums.file.IndexForHeaderIndexFile;
import com.unister.semweb.drums.storable.AbstractKVStorable;
import com.unister.semweb.drums.storable.GeneralStorable;
import com.unister.semweb.drums.sync.SyncManager;
import com.unister.semweb.drums.sync.synchronizer.ISynchronizerFactory;
import com.unister.semweb.drums.sync.synchronizer.SynchronizerFactory;
import com.unister.semweb.drums.sync.synchronizer.UpdateOnlySynchronizer;
import com.unister.semweb.drums.util.AbstractKVStorableComparator;
import com.unister.semweb.drums.util.ByteArrayComparator;
import com.unister.semweb.drums.util.KeyUtils;

/**
 * An instance of this class provides access to a DRUMS-table. The instance allows managing the storage of
 * {@link AbstractKVStorable}s. Use the static methods in {@link DRUMSInstantiator} to get an instance of {@link DRUMS}.
*
* Use the method {@link #insertOrMerge(AbstractKVStorable...)} to insert or merge records.
* To update records use the method {@link #update(AbstractKVStorable...)}.
* Single selects can be performed by the method {@link #select(byte[])}.
* To perform range selects, a {@link DRUMSReader} should be instantiated ({@link #getReader()}.
* The whole table is scanned best, using an {@link DRUMSIterator} ({@link #getIterator()}. * * @author Nils Thieme, Martin Nettling * * @param * an implementation of {@link AbstractKVStorable}, e.g. {@link GeneralStorable} */ public class DRUMS { private static final Logger logger = LoggerFactory.getLogger(DRUMS.class); /** the accessmode for DRUMS */ public enum AccessMode { READ_ONLY, READ_WRITE; } /** the hashfunction, decides where to search for element, or where to store it */ private AbstractHashFunction hashFunction; /** an array, containing all used buckets */ private Bucket[] buckets; /** the container for all buckets */ private BucketContainer bucketContainer; /** the Synchronizer factory, is needed to decide how to insert/update elements */ private ISynchronizerFactory synchronizerFactory; /** the buffer manages the different synchronize-processes */ private SyncManager syncManager; /** a prototype of the elements to store */ private Data prototype; /** A pointer to the {@link DRUMSParameterSet} used by this {@link DRUMS} */ protected DRUMSParameterSet gp; protected DRUMSReader reader_instance; /** * This constructor should only be called by factory methods from this package. * * @param hashFunction * a consistent hash-function * @param accessMode * can be read or read/write * @param gp * contains all needed settings * @throws Exception */ protected DRUMS(AbstractHashFunction hashFunction, AccessMode accessMode, DRUMSParameterSet gp) throws IOException { this.prototype = gp.getPrototype(); this.hashFunction = hashFunction; this.gp = gp; DynamicMemoryAllocater.instantiate(gp); gp.MIN_ELEMENT_IN_BUCKET_BEFORE_SYNC = (int) ((gp.BUCKET_MEMORY - gp.BUCKET_MEMORY % gp.MEMORY_CHUNK) / hashFunction.getNumberOfBuckets() / prototype.getSize() / 2); logger.info("Setted MIN_ELEMENT_IN_BUCKET_BEFORE_SYNC to {}", gp.MIN_ELEMENT_IN_BUCKET_BEFORE_SYNC); if (accessMode == AccessMode.READ_WRITE) { @SuppressWarnings("unchecked") Bucket[] tmp = new Bucket[hashFunction.getNumberOfBuckets()]; buckets = tmp; for (int i = 0; i < hashFunction.getNumberOfBuckets(); i++) { buckets[i] = new Bucket(i, gp); String tmpFileName = gp.DATABASE_DIRECTORY + "/" + hashFunction.getFilename(i); if (!new File(tmpFileName).exists()) { HeaderIndexFile tmpFile; try { tmpFile = new HeaderIndexFile(tmpFileName, HeaderIndexFile.AccessMode.READ_WRITE, 1, gp); tmpFile.close(); } catch (FileLockException e) { logger.error("Can't create file {}, because file is locked by another process.", tmpFileName); } catch (IOException e) { logger.error("Can't create file {}. {}", tmpFileName, e); throw e; } } } bucketContainer = new BucketContainer(buckets, hashFunction); synchronizerFactory = new SynchronizerFactory(); syncManager = new SyncManager(bucketContainer, synchronizerFactory, gp); syncManager.start(); } } /** * Sets the {@link SynchronizerFactory}. * * @param factory */ public void setSynchronizerFactory(ISynchronizerFactory factory) { this.synchronizerFactory = factory; this.syncManager.setSynchronizer(factory); } /** @return a pointer to the local {@link BucketContainer} */ public BucketContainer getBucketContainer() { return this.bucketContainer; } /** @return a pointer to the local {@link SyncManager} */ public SyncManager getSyncManager() { return this.syncManager; } /** * Adds or merges the given data. If all memory buckets are full, this method is blocking the calling thread.
*
* A merge calls the method {@link Data#merge(AbstractKVStorable)}. * * @param toPersist * data to insert or update * @throws DRUMSException * if an unexpected error occurs * @throws InterruptedException * if the call blocks and the current thread is interrupted */ public void insertOrMerge(Data... toPersist) throws DRUMSException, InterruptedException { try { bucketContainer.addToCache(toPersist); } catch (BucketContainerException ex) { // This exception should never be thrown because the hash function should map all keys to a bucket. throw new DRUMSException(ex); } } /** * Updates the given data. Be careful, ONLY updates are provided. If the given array contains elements, which are * not already stored in the underlying DRUMS-table, they will be not taken into account during synchronization.
*
* This method uses the {@link UpdateOnlySynchronizer}, which by itself uses the Data's implemented update-function * ({@link Data#update(AbstractKVStorable)}) to update elements. If you want to merge objects, use * {@link #insertOrMerge(Data ...)} instead. * * @param records * the data to update * * @throws IOException */ public void update(Data... records) throws IOException { // ############ reorder data IntObjectOpenHashMap> bucketDataMapping = new IntObjectOpenHashMap>(); int bucketId; for (Data d : records) { bucketId = hashFunction.getBucketId(d.getKey()); if (!bucketDataMapping.containsKey(bucketId)) { bucketDataMapping.put(bucketId, new ArrayList()); } bucketDataMapping.get(bucketId).add(d); } for (IntObjectCursor> entry : bucketDataMapping) { UpdateOnlySynchronizer synchronizer = new UpdateOnlySynchronizer(gp.DATABASE_DIRECTORY + "/" + hashFunction.getFilename(entry.key), gp); @SuppressWarnings("unchecked") Data[] toUpdate = (Data[]) entry.value.toArray(new AbstractKVStorable[entry.value.size()]); Arrays.sort(toUpdate, new AbstractKVStorableComparator()); synchronizer.upsert(toUpdate); } } /** * Selects all existing records to the keys in the given array. * * @param keys * the keys to look for * @return a list of all found elements * @throws DRUMSException */ public List select(byte[]... keys) throws DRUMSException { List result = new ArrayList(); IntObjectOpenHashMap> bucketKeyMapping = getBucketKeyMapping(keys); String filename; for (IntObjectCursor> entry : bucketKeyMapping) { filename = gp.DATABASE_DIRECTORY + "/" + hashFunction.getFilename(entry.key); HeaderIndexFile indexFile = null; try { indexFile = new HeaderIndexFile(filename, HeaderIndexFile.AccessMode.READ_ONLY, gp.HEADER_FILE_LOCK_RETRY, gp); ArrayList keyList = entry.value; result.addAll(searchForData(indexFile, keyList.toArray(new byte[keyList.size()][]))); } catch (FileLockException ex) { logger.error("Could not access the file {} within {} retries. The file seems to be locked.", filename, gp.HEADER_FILE_LOCK_RETRY); throw new DRUMSException(ex); } catch (IOException ex) { logger.error("An exception occurred while trying to get objects from the file {}.", filename, ex); throw new DRUMSException(ex); } finally { if (indexFile != null) { indexFile.close(); } } } return result; } /** * Reads numberToRead elements (or less if there are not enough elements) from the bucket with the * given bucketId beginning at the element offset. * * @param bucketId * the id of the bucket where to read the elements from * @param elementOffset * the byte offset, where to start reading * @param numberToRead * the number of elements to read * @return ArrayList containing the data-objects * @throws FileLockException * @throws IOException */ public List read(int bucketId, int elementOffset, int numberToRead) throws FileLockException, IOException { String filename = gp.DATABASE_DIRECTORY + "/" + hashFunction.getFilename(bucketId); HeaderIndexFile indexFile = new HeaderIndexFile(filename, HeaderIndexFile.AccessMode.READ_ONLY, gp.HEADER_FILE_LOCK_RETRY, gp); List result = new ArrayList(); // where to start long actualOffset = elementOffset * gp.getElementSize(); // get the complete buffer ByteBuffer dataBuffer = ByteBuffer.allocate(numberToRead * gp.getElementSize()); indexFile.read(actualOffset, dataBuffer); dataBuffer.flip(); byte[] dataArray = new byte[gp.getElementSize()]; while (dataBuffer.position() < dataBuffer.limit()) { dataBuffer.get(dataArray); @SuppressWarnings("unchecked") Data copy = (Data)prototype.fromByteBuffer(ByteBuffer.wrap(dataArray)); result.add(copy); } indexFile.close(); return result; } /** * This method maps all keys in the given array to their corresponding buckets and returns the determined mapping. * * @param keys * the keys to search for * @return a mapping from bucket-id to a list of keys. */ protected IntObjectOpenHashMap> getBucketKeyMapping(byte[]... keys) { IntObjectOpenHashMap> bucketKeyMapping = new IntObjectOpenHashMap>(); int bucketId; for (byte[] key : keys) { bucketId = hashFunction.getBucketId(key); if (!bucketKeyMapping.containsKey(bucketId)) { bucketKeyMapping.put(bucketId, new ArrayList()); } bucketKeyMapping.get(bucketId).add(key); } return bucketKeyMapping; } /** * Searches for the {@link AbstractKVStorable}-records corresponding the given keys within the given indexFile. This * is done by using the {@link IndexForHeaderIndexFile} from the given {@link HeaderIndexFile}. If you want to do * this in a more sequential way, try to use the method {@link #read(int, int, int)} or use an {@link DRUMSIterator} * . ({@link #getIterator()}) * * @param indexFile * {@link HeaderIndexFile}, where to search for the keys * @param keys * the keys to search for * @return an {@link ArrayList} which contains the found records. Can be less than the number of requested keys. * @throws IOException */ public List searchForData(HeaderIndexFile indexFile, byte[]... keys) throws IOException { Arrays.sort(keys, new ByteArrayComparator()); List result = new ArrayList(); IndexForHeaderIndexFile index = indexFile.getIndex(); // Pointer to the Index int actualChunkIdx = 0, lastChunkIdx = -1; long actualChunkOffset = 0, oldChunkOffset = -1; int indexInChunk = 0; ByteBuffer workingBuffer = ByteBuffer.allocate((int) indexFile.getChunkSize()); byte[] tmpB = new byte[gp.getElementSize()]; // stores temporarily the bytestream of an object for (byte[] key : keys) { // get actual chunkIndex actualChunkIdx = index.getChunkId(key); actualChunkOffset = index.getStartOffsetOfChunk(actualChunkIdx); // if it is the same chunk as in the last step, use the old readbuffer if (actualChunkIdx != lastChunkIdx) { // if we have read a chunk if (oldChunkOffset > -1) { // indexFile.write(oldChunkOffset, workingBuffer); indexFile.read(oldChunkOffset, workingBuffer); indexInChunk = 0; } // read a new part to the readBuffer indexFile.read(actualChunkOffset, workingBuffer); } // find offset in workingBuffer indexInChunk = findElementInReadBuffer(workingBuffer, key, indexInChunk); if (indexInChunk == -1) { indexInChunk = 0; continue; } // read element from workingBuffer workingBuffer.position(indexInChunk); workingBuffer.get(tmpB); @SuppressWarnings("unchecked") Data copy = (Data)prototype.fromByteBuffer(ByteBuffer.wrap(tmpB)); result.add(copy); if (indexInChunk == -1) { logger.warn("Element with key {} was not found.", key); indexInChunk = 0; } lastChunkIdx = actualChunkIdx; // remember last chunk oldChunkOffset = actualChunkOffset; // remember last offset of the last chunk } return result; } /** * @return the number of elements in the database. * @throws IOException * @throws FileLockException */ public long size() throws FileLockException, IOException { long size = 0L; for (int bucketId = 0; bucketId < hashFunction.getNumberOfBuckets(); bucketId++) { HeaderIndexFile headerIndexFile = new HeaderIndexFile(gp.DATABASE_DIRECTORY + "/" + hashFunction.getFilename(bucketId), gp.HEADER_FILE_LOCK_RETRY, gp); size += headerIndexFile.getFilledUpFromContentStart() / gp.getElementSize(); headerIndexFile.close(); } return size; } /** * Searches for the given key in workingBuffer, beginning at the given index. Remember: The records in the * given workingBuffer have to be ordered ascending. * * @param workingBuffer * the ByteBuffer to work on * @param key * the key to find * @param indexInChunk * the start position of reading the workingBuffer * @return the byteOffset where the key was found.
* -1 if the key wasn't found */ public int findElementInReadBuffer(ByteBuffer workingBuffer, byte[] key, int indexInChunk) { workingBuffer.position(indexInChunk); int minElement = indexInChunk / gp.getElementSize(); int numberOfEntries = workingBuffer.limit() / gp.getElementSize(); // binary search int maxElement = numberOfEntries - 1; int midElement; int comp; byte[] tempKey = new byte[gp.getKeySize()]; while (minElement <= maxElement) { midElement = minElement + (maxElement - minElement) / 2; indexInChunk = midElement * gp.getElementSize(); workingBuffer.position(indexInChunk); workingBuffer.get(tempKey); comp = KeyUtils.compareKey(key, tempKey, gp.getKeySize()); if (comp == 0) { return indexInChunk; } else if (comp < 0) { maxElement = midElement - 1; } else { minElement = midElement + 1; } } return -1; } /** * Instantiates a new {@link DRUMSIterator} and returns it. * * @return a new {@link DRUMSIterator} */ public DRUMSIterator getIterator() { return new DRUMSIterator(hashFunction, gp); } /** * Returns a {@link DRUMSReader}. If the {@link DRUMSReader} was not instantiated yet, it will be instantiated. If * there exists an instance, but the files were already closed, the files will be reopened.
*
* Remember, if you have an opened Reader. You can't synchronize buckets. * * @return a pointer to the internal {@link DRUMSReader}-instance * @throws IOException * @throws FileLockException */ public DRUMSReader getReader() throws FileLockException, IOException { if (reader_instance != null && !reader_instance.filesAreOpened) { reader_instance.openFiles(); } else { reader_instance = new DRUMSReader(this); } return reader_instance; } /** * Joins all the DRUMS-table. * * @throws InterruptedException */ public void join() throws InterruptedException { syncManager.join(); } /** * Closes the DRUMS. * * @throws InterruptedException */ public void close() throws InterruptedException { if (reader_instance != null) { reader_instance.closeFiles(); } reader_instance = null; // you can only close a syncmanager, when drums was opened for READ_WRITE if (syncManager != null) { syncManager.shutdown(); syncManager.join(); } } /** * Enables the force mode of DRUMS. All buckets will be synchronized independent from its fill level and last * sync-time.
* Be careful: All buckets will be affected. Even if a bucket contains only one element, the synchronizer is going * to synchronize this bucket.
* This method doesn't block the insert-methods. */ public void enableForceMode() { syncManager.startForceMode(); } /** * Disable the force mode of DRUMS. Buckets will be synchronized only if constraints like fill-level and last * sync-time are fulfilled.
*/ public void disableForceMode() { syncManager.stopForceMode(); } /** @return the size of one record to store in bytes */ public int getElementSize() { return gp.getElementSize(); } /** @return the size of the key of one record */ public int getElementKeySize() { return gp.getKeySize(); } /** @return the underlying hash-function */ public AbstractHashFunction getHashFunction() { return this.hashFunction; } /** * sets a new HashFunction. Be careful with this method. Overwriting the hashfunction, when elements are already * inserted may cause, that you won't find those elements by select. * * @param hashfunction * the hash-function to set */ public void setHashFunction(AbstractHashFunction hashfunction) { this.hashFunction = hashfunction; } /** @return the database-directory */ public String getDatabaseDirectory() { return gp.DATABASE_DIRECTORY; } /** @return a pointer to the prototype. This is not a clone. */ public Data getPrototype() { return prototype; } /** @return the {@link DRUMSParameterSet} that are used within the {@link DRUMS} */ public DRUMSParameterSet getGlobalParameters() { return gp; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy