All Downloads are FREE. Search and download functionalities are using the official Maven repository.

convex.etch.Etch Maven / Gradle / Ivy

The newest version!
package convex.etch;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.channels.FileChannel.MapMode;
import java.nio.channels.FileLock;
import java.util.ArrayList;
import java.util.Arrays;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import convex.core.Constants;
import convex.core.data.AArrayBlob;
import convex.core.data.ACell;
import convex.core.data.Blob;
import convex.core.data.Hash;
import convex.core.data.Ref;
import convex.core.data.RefSoft;
import convex.core.exceptions.BadFormatException;
import convex.core.util.Counters;
import convex.core.util.Shutdown;
import convex.core.util.Utils;

/**
 * A stupid, fast database for immutable data you want carved in stone.
 *
 * We solve the cache invalidation problem, quite effectively, by never changing anything. Once a value
 * is written for a given key, it cannot be changed. Etch is indifferent to the exact meaning of keys,
 * but they must have a fixed length of 32 bytes (256 bits).
 *
 * It is intended that keys are pseudo-random hash values, which will result in desirable distributions
 * of data for the radix tree structure.
 *
 * Radix tree index blocks are arrays of 8 byte pointers.
 *
 * To avoid creating too many index blocks when collisions occur, a chained entry list inside is created
 * in unused space in index blocks. Once there is no more space, chains are collapsed to a new index block.
 *
 * Header of file is 42 bytes as follows:
 * - Magic number 0xe7c6 (2 bytes)
 * - Database length in bytes (8 bytes)
 * - Root hash (32 bytes)
 * 
 * Pointers in index blocks are of 4 possible types, determined by the two high bits (MSBs):
 * - 00 high bits: pointer to data
 * - 01 high bits: pointer to next index node
 * - 10 high bits: start of chained entry list
 * - 11 high bits: continuation of chained entry list
 *
 * Data is stored as:
 * - 32 bytes key
 * - X bytes monotonic label of which
 *    - 1 byte status
 *    - 8 bytes Memory Size 
 * - 2 bytes data length N (a short)
 * - N byes actual data
 */
public class Etch {
	// structural constants for data block
	static final int KEY_SIZE=32;
	static final int MAX_LEVEL=60; // 2 bytes + 1 byte + 58 hex digits for 29 remaining bytes

	static final int LABEL_SIZE=1+8; // Flags (byte) plus Memory Size (long)
	static final int LENGTH_SIZE=2;
	static final int POINTER_SIZE=8;

	// constants for memory mapping buffers into manageable regions
	static final long MAX_REGION_SIZE=1<<30; // 1GB seems reasonable, note JVM 2GB limit :-/
	static final long REGION_MARGIN=65536; // 64k margin for writes past end of current buffer

	/**
	 * Magic number for Etch files, must be first 2 bytes
	 */
	static final byte[] MAGIC_NUMBER=Utils.hexToBytes("e7c6");
	
	/**
	 * Version number
	 */
	static final short ETCH_VERSION=1;

	static final int SIZE_HEADER_MAGIC=2;
	static final int SIZE_HEADER_VERSION=2;
	static final int SIZE_HEADER_FILESIZE=8;
	static final int SIZE_HEADER_ROOT=32;
	
	static final int ZLEN=16384;
	static final byte[] ZERO_ARRAY=new byte[ZLEN];

	/**
	 * Length of Etch header. Used to be 42 until we added the version number
	 *
	 * "The Ultimate Answer to Life, The Universe and Everything is... 42!"
     * - Douglas Adams, The Hitchhiker's Guide to the Galaxy
	 */
	static final int SIZE_HEADER=SIZE_HEADER_MAGIC+SIZE_HEADER_VERSION+SIZE_HEADER_FILESIZE+SIZE_HEADER_ROOT;

	protected static final long OFFSET_VERSION = SIZE_HEADER_MAGIC; // Skip past magic number
	protected static final long OFFSET_FILE_SIZE = OFFSET_VERSION+SIZE_HEADER_VERSION; // Skip past version
	protected static final long OFFSET_ROOT_HASH = OFFSET_FILE_SIZE+SIZE_HEADER_FILESIZE; // Skip past file size

	/**
	 * Start position of first index block
	 * This is immediately after a long data length pointer at the start of the file
	 */
	static final long INDEX_START=SIZE_HEADER;

	static final long TYPE_MASK = 0xC000000000000000L;
	static final long PTR_PLAIN = 0x0000000000000000L; // direct pointer to data
	static final long PTR_INDEX = 0x4000000000000000L; // pointer to index block
	static final long PTR_START = 0x8000000000000000L; // start of chained entries
	static final long PTR_CHAIN = 0xC000000000000000L; // chained entries after start

	private static final Logger log=LoggerFactory.getLogger(Etch.class.getName());

	/**
	 * Temporary byte array on a thread local basis.
	 */
	private final ThreadLocal tempArray=new ThreadLocal<>() {
		@Override
		public byte[]  initialValue() {
			return new byte[2048];
		}
	};

	/**
	 * Internal pointer to end of database
	 */
	private static long tempIndex=0;

	private final File file;
	private final String fileName;
	private final RandomAccessFile data;

	/**
	 * List of MappedByteBuffers for each region of the database file.
	 */
	private final ArrayList regionMap=new ArrayList<>();

	private long dataLength=0;

	private boolean BUILD_CHAINS=true;
	private EtchStore store;

	private Etch(File dataFile) throws IOException {
		// Ensure we have a RandomAccessFile that exists
		this.file=dataFile;
		if (!dataFile.exists()) dataFile.createNewFile();
		this.data=new RandomAccessFile(dataFile,"rw");

		this.fileName = dataFile.getName();

		// Try to exclusively lock the Etch database file
		FileChannel fileChannel=this.data.getChannel();
		FileLock lock=fileChannel.tryLock();
		if (lock==null) {
			log.error("Unable to obtain lock on file: {}",dataFile);
			throw new IOException("File lock failed");
		}

		// at this point, we have an exclusive lock on the database file.

		if (dataFile.length()==0) {
			// Need to populate  new file, with data length long and initial index block
			MappedByteBuffer mbb=seekMap(0);

			// write Header
			byte[] temp=new byte[SIZE_HEADER];
			System.arraycopy(MAGIC_NUMBER, 0, temp, 0, SIZE_HEADER_MAGIC);
			Utils.writeShort(temp, (int)OFFSET_VERSION, ETCH_VERSION);
			mbb.put(temp);
			
			dataLength=SIZE_HEADER; // advance past initial long

			// add an index block
			mbb=seekMap(SIZE_HEADER);
			long indexStart=appendNewIndexBlock(0);
			assert(indexStart==INDEX_START);

			// ensure data length is initially correct
			writeDataLength();
		} else {
			// existing file, so need to read the length pointer
			MappedByteBuffer mbb=seekMap(0);
			byte[] check=new byte[2];
			mbb.get(check);
			if(!Arrays.equals(MAGIC_NUMBER, check)) {
				throw new IOException("Bad magic number! Probably not an Etch file: "+dataFile);
			}
			short version=mbb.getShort();
			if (version!=ETCH_VERSION) throw new IOException("Bad Etch version: expected "+ETCH_VERSION+" but was "+version+ " in "+dataFile);

			long length = mbb.getLong();
			dataLength=length;
		}

		// shutdown hook to close file / release lock
		convex.core.util.Shutdown.addHook(Shutdown.ETCH,this::close);
	}

	/**
	 * Create an Etch instance using a temporary file.
	 * @return The new Etch instance
	 * @throws IOException If an IO error occurs
	 */
	public static Etch createTempEtch() throws IOException {
		Etch newEtch =  createTempEtch("etch-"+tempIndex);
		tempIndex++;
		return newEtch;
	}

	/**
	 * Create an Etch instance using a temporary file with a specific file prefix.
	 * @param prefix temporary file prefix to use
	 * @return The new Etch instance
	 * @throws IOException If an IO error occurs
	 */
	public static Etch createTempEtch(String prefix) throws IOException {
		File data = File.createTempFile(prefix+"-", null);
		if (Constants.ETCH_DELETE_TEMP_ON_EXIT) data.deleteOnExit();
		return new Etch(data);
	}

	/**
	 * Create an Etch instance using the specified file
	 * @param file File with which to create Etch instance
	 * @return The new Etch instance
	 * @throws IOException If an IO error occurs
	 */
	public static Etch create(File file) throws IOException {
		Etch etch= new Etch(file);
		log.debug("Etch created on file: {} with data length: {}", file, etch.dataLength);
		return etch;
	}

	/**
	 * Gets a MappedByteBuffer for a given position, seeking to the specified location.
	 * Type flags are ignored if included in the position pointer.
	 *
	 * @param position Target position for the MappedByteBuffer
	 * @return MappedByteBuffer instance with correct position.
	 * @throws IOException
	 */
	private MappedByteBuffer seekMap(long position) throws IOException {
		position=rawPointer(position); // ensure we don't have any pesky type bits

		if ((position<0)||(position>dataLength)) {
			throw new EtchCorruptionError("Seek out of range in Etch file: position="+Utils.toHexString(position)+ " dataLength="+Utils.toHexString(dataLength)+" file="+file.getName());
		}

		MappedByteBuffer mbb=(MappedByteBuffer)((ByteBuffer)getInternalBuffer(position)).duplicate();

		mbb.position(Utils.checkedInt(position%MAX_REGION_SIZE));
		return mbb;
	}

	/**
	 * Gets the internal mapped byte buffer for the specified region of the Etch database
	 * 
	 * @param position Position for which to get buffer
	 * @return Mapped Byte Buffer for specified region
	 * @throws IOException
	 */
	private MappedByteBuffer getInternalBuffer(long position) throws IOException {
		int regionIndex=Utils.checkedInt(position/MAX_REGION_SIZE); // 1GB chunks

		// Get current mapped region, or null if out of range
		int regionMapSize=regionMap.size();
		MappedByteBuffer mbb=(regionIndex Ref write(AArrayBlob key, Ref value) throws IOException {
		return write(key,0,value,INDEX_START);
	}

	private  Ref write(AArrayBlob key, int level, Ref ref, long indexPosition) throws IOException {
		if (level>=MAX_LEVEL) {
			throw new Error("Max Level exceeded for key: "+key);
		}

		int isize=indexSize(level);
		int mask=isize-1;
		final int digit=getDigit(key,level);
		
		long slotValue=readSlot(indexPosition,digit);
		long type=slotType(slotValue);

		if (slotValue==0L) {
			// empty location, so simply write new value
			return writeNewData(indexPosition,digit,key,ref,PTR_PLAIN);

		} else if (type==PTR_INDEX) {
			// recursively check next level of index
			long newIndexPosition=rawPointer(slotValue); // clear high bits
			return write(key,level+1,ref,newIndexPosition);

		} else if (type==PTR_PLAIN) {
			// existing data pointer (non-zero)
			// check if we have the same value first, otherwise need to resolve conflict
			// This should have the current (potential collision) key in tempArray
			if (checkMatchingKey(key,slotValue)) {
				return updateInPlace(slotValue,ref);
			}

			// we need to check the next slot position to see if we can extend to a chain
			int nextDigit=(digit+1)%isize;
			long nextSlotValue=readSlot(indexPosition,nextDigit);

			// if next slot is empty, we can make a chain!
			if (BUILD_CHAINS&&(nextSlotValue==0L)) {
				// update current slot to be the start of a chain
				writeSlot(indexPosition,digit,slotValue|PTR_START);

				// write new data pointer to next slot
				long newDataPointer=appendData(key,ref);
				writeSlot(indexPosition,nextDigit,newDataPointer|PTR_CHAIN);

				return ref;
			}
			
			// have collision, so create new index node including the existing pointer
			int nextLevel=level+1;
			// Note: temp should contain key from checkMatchingKey!
			byte[] temp=tempArray.get();
			int nextDigitOfCollided=getDigit(Blob.wrap(temp,0,KEY_SIZE),nextLevel);
			long newIndexPosition=appendLeafIndex(nextLevel,nextDigitOfCollided,slotValue);

			// put index pointer into this index block, setting flags for index node
			writeSlot(indexPosition,digit,newIndexPosition|PTR_INDEX);

			// recursively write this key
			return write(key,nextLevel,ref,newIndexPosition);
		} else if (type==PTR_START) {
			// first check if the start pointer is the right value. if so, just update in place
			if (checkMatchingKey(key, slotValue)) {
				return updateInPlace(slotValue,ref);
			}

			// now scan slots, looking for either the right value or an empty space
			int i=1;
			while (i0);
		int isize=indexSize(level);
		int mask=isize-1;
		int indexBlockLength=POINTER_SIZE*isize;
		digit=digit&mask;
		
		long position=dataLength;
		byte[] temp=tempArray.get();
		Arrays.fill(temp, 0,indexBlockLength,(byte)0x00);
		
		int ix=POINTER_SIZE*digit; // compute position in block. note: should be already masked above
		Utils.writeLong(temp, ix,dataPointer); // single node
		MappedByteBuffer mbb=seekMap(position);
		mbb.put(temp,0,indexBlockLength); // write index block
		
		// set the datalength to the last available byte in the file after adding index block
		setDataLength(position+indexBlockLength);
		return position;
	}

	/**
	 * Reads a Blob from the database, returning null if not found
	 * @param key Key to read from Store
	 * @return Blob containing the data, or null if not found
	 * @throws IOException If an IO error occurs
	 */
	public  RefSoft read(AArrayBlob key) throws IOException {
		Counters.etchRead++;

		long pointer=seekPosition(key);
		if (pointer<0) {
			Counters.etchMiss++;
			return null; // not found
		}
		
		return read(key,pointer);
	}
	
	/**
	 * Reads a Cell from the specified location in an Etch file. WARNING: does not perform any validation
	 * @param  Type of Cell expected
	 * @param ptr Pointer offset into Etch file. Type flags are ignored.
	 * @return Cell value (may be null)
	 * @throws IOException In event of IO Error
	 */
	@SuppressWarnings("unchecked")
	public  T readCell(long ptr) throws IOException {
		ptr=rawPointer(ptr);
		return (T)(read(null,ptr).getValue());
	}
		
	public  RefSoft read(AArrayBlob key,long pointer) throws IOException {
		MappedByteBuffer mbb;
		if (key==null) {
			mbb=seekMap(pointer);
			byte[] bs=new byte[KEY_SIZE];
			mbb.get(bs);
			key=Hash.wrap(bs);
		} else {
			// seek to correct position, skipping over key
			mbb=seekMap(pointer+KEY_SIZE);
		}
		
		// get flags byte
		byte flagByte=mbb.get();

		// Get memory size
		long memorySize=mbb.getLong();

		// get Data length
		short length=mbb.getShort();
		byte[] bs=new byte[length];
		mbb.get(bs);
		Blob encoding= Blob.wrap(bs);
		try {
			Hash hash=Hash.wrap(key);
			T cell=store.decode(encoding);
			cell.getEncoding().attachContentHash(hash);

			if (memorySize>0) {
				// need to attach memory size for cell
				cell.attachMemorySize(memorySize);
			}

			RefSoft ref=RefSoft.create(store,cell, (int)flagByte);
			cell.attachRef(ref);

			return ref;
		} catch (BadFormatException e) {
			throw new Error("Failed to read data in etch store: "+encoding.toHexString()+" flags = "+Utils.toHexString(flagByte)+" length ="+length+" pointer = "+Utils.toHexString(pointer)+ " memorySize="+memorySize,e);
		}
	}

	/**
	 * Flushes any changes to persistent storage.
	 * @throws IOException If an IO error occurs
	 */
	public synchronized void flush() throws IOException {
		for (MappedByteBuffer mbb: regionMap) {
			if (mbb!=null) mbb.force();
		}
		data.getChannel().force(false);
	}

	/**
	 * Gets the position of a value in the data file from the index
	 * @param key Key value
	 * @return data file offset or -1 if not found
	 * @throws IOException
	 */
	private long seekPosition(AArrayBlob key) throws IOException {
		return seekPosition(key,0,INDEX_START);
	}

	/**
	 * Gets the slot value at the specified digit position in an index block. Doesn't affect temp array.
	 * @param indexPosition Position of index block
	 * @param digit Position of slot within index block
	 * @return Pointer value (including type bits in MSBs)
	 * @throws IOException In case of IO Error
	 */
	public long readSlot(long indexPosition, int digit) throws IOException {
		long pointerIndex=indexPosition+POINTER_SIZE*digit;
		MappedByteBuffer mbb=seekMap(pointerIndex);
		long pointer=mbb.getLong();
		return pointer;
	}

	/**
	 * Creates and writes a new data pointer at the specified position, storing the key/value
	 * and applying the specified type to the pointer stored in the slot
	 *
	 * @param position Position to write the data pointer
	 * @param key Key for the data
	 * @param value Value of the data
	 * @return
	 * @throws IOException
	 */
	private  Ref writeNewData(long indexPosition, int digit, AArrayBlob key, Ref value, long type) throws IOException {
		long newDataPointer=appendData(key,value)|type;
		writeSlot(indexPosition, digit, newDataPointer);
		return value;
	}

    /**
     * Updates a Ref in place at the specified position. Assumes data not changed.
     * @param position Slot value containing position in storage file
     * @param ref
     * @return
     * @throws IOException
     */
	private  Ref updateInPlace(long position, Ref ref) throws IOException {
		// ensure we have a raw position
		position=rawPointer(position);
		
		// Seek to status location
		MappedByteBuffer mbb=seekMap(position+KEY_SIZE);

		// Get current stored values
		int currentFlags=mbb.get();
		int newFlags=Ref.mergeFlags(currentFlags,ref.getFlags()); // idempotent flag merge

		long currentSize=mbb.getLong();

		if (currentFlags==newFlags) return ref;

		// We have a status change, need to increase status of store
		mbb=seekMap(position+KEY_SIZE);
		mbb.put((byte)newFlags);

		// maybe update size, if not already persisted
		if ((currentSize==0L)&&((newFlags&Ref.STATUS_MASK)>=Ref.PERSISTED)) {
			mbb.putLong(ref.getValue().getMemorySize());
		}

		return ref.withFlags(newFlags);	// reflect merged flags
	}

	/**
	 * Writes a slot value to an index block.
	 *
	 * @param indexPosition
	 * @param digit Digit radix position in index block
	 * @param slotValue
	 * @throws IOException
	 */
	private void writeSlot(long indexPosition, int digit, long slotValue) throws IOException {
		long position=indexPosition+digit*POINTER_SIZE;
		MappedByteBuffer mbb=seekMap(position);
		mbb.putLong(slotValue);
	}
	
	public void visitIndex(IEtchIndexVisitor v) throws IOException {
		int[] bs=new int[32];
		visitIndex(v,bs,0,INDEX_START);
	}

	private void visitIndex(IEtchIndexVisitor v, int[] digits, int level, long indexPointer) throws IOException {
		v.visit(this, level, digits, indexPointer);
		int n=indexSize(level);
		for (int i=0; i=MAX_LEVEL) {
			throw new Error("Etch index level exceeded for key: "+key);
		}
		int isize=indexSize(level);
		int mask=isize-1;
		int digit=getDigit(key,level);
		long slotValue=readSlot(indexPosition,digit);
		
		long type=(slotValue&TYPE_MASK);
		if (slotValue==0) {
			// Empty slot i.e. not found
			return -1;
		} else if (type==PTR_INDEX) {
			// recursively check next index node
			long newIndexPosition=rawPointer(slotValue);
			return seekPosition(key,level+1,newIndexPosition);
		} else if (type==PTR_PLAIN) {
			if (checkMatchingKey(key,slotValue)) return slotValue;
			return -1;
		} else if (type==PTR_CHAIN) {
			// continuation of chain from some previous index, therefore key can't be present
			return -1;
		} else if (type==PTR_START) {
			synchronized (this) {
				// start of chain, so scan chain of entries
				int i=0;
				while (i>4):v)&0xf;
	}
	
	/**
	 * Gets the radix index digit for the specified level
	 * @param dp Data pointer into store
	 * @param level Level of Etch store index to get digit for
	 * @return
	 * @throws IOException 
	 */
	private int getDigit(long dp, int level) throws IOException {
		if (level==0) {
			MappedByteBuffer mbb=seekMap(dp);
			return mbb.getShort()&0xffff;
		} 
		if (level==1) {
			MappedByteBuffer mbb=seekMap(dp+(level+1));
			return mbb.get()&0xFF;
		}
		int bi=(level+4)/2;      // level 2,3 maps to 3 etc.
		boolean hi=(level&1)==0; // we want high byte if even
		MappedByteBuffer mbb=seekMap(dp+bi);
		byte v= mbb.get();
		return (hi?(v>>4):v)&0xf;		
	}

	/**
	 * Gets the index block size for a given level
	 * @param level Level of index block in Etch store
	 * @return Index block size as number of entries
	 */
	public int indexSize(int level) {
		if (level==0) return 65536;
		if (level==1) return 256;
		return 16;
	}

	/**
	 * Append a new index block to the store file. The new Index block will be initially empty,
	 * i.e. filled completely with zeros.
	 * WARNING: Overwrites temp array!
	 * @return The location of the newly added index block.
	 * @throws IOException
	 */
	private long appendNewIndexBlock(int level) throws IOException {
		if (level>=MAX_LEVEL) {
			// Invalid level! Prepare to output error
			throw new Error("Overflowing key size - key collision?");
		}
		
		int isize=indexSize(level);
		int sizeBytes=isize*POINTER_SIZE;
		
		long position=dataLength;
		MappedByteBuffer mbb=null;
		
		// set the datalength to the last available byte in the file
		setDataLength(position+sizeBytes);
		
		// Use temporary zero array to fill new index block
		for (int ix=0; ix ref) throws IOException {
		assert(key.count()==KEY_SIZE);
		Counters.etchWrite++;

		// Get relevant values for writing
		// probably need to call these first, might move mbb position?
		ACell cell=ref.getValue();
		Blob encoding=cell.getEncoding();
		int status=ref.getStatus();

		long memorySize=0L;
		if (status>=Ref.PERSISTED) {
			memorySize=cell.getMemorySize();
		}

		// position ready for append
		final long position=dataLength;
		MappedByteBuffer mbb=seekMap(position);

		// append key
		mbb.put(key.getInternalArray(),key.getInternalOffset(),KEY_SIZE);

		// append flags (1 byte)
		int flags=ref.flagsWithStatus(Math.max(ref.getStatus(),Ref.STORED));
		mbb.put((byte)(flags)); // currently all flags fit in one byte

		// append Memory Size (8 bytes). Initialised to 0L if STORED only.
		mbb.putLong(memorySize);

		// append blob length
		short length=Utils.checkedShort(encoding.count());
		if (length==0) {
			// Blob b=cell.createEncoding();
			throw new Error("Etch trying to write zero length encoding for: "+Utils.getClassName(cell));
		}
		mbb.putShort(length);

		// append blob value
		mbb.put(encoding.getInternalArray(),encoding.getInternalOffset(),length);

		// set the datalength to the last available byte in the file
		setDataLength(position+KEY_SIZE+LABEL_SIZE+LENGTH_SIZE+length);

		// return file position for added data
		return position;
	}

	/**
	 * Sets the total db dataLength in memory. This is the last position in the database
	 * that new data can be written too.
	 *
	 * @param value The new data length to be set
	 *
	 */
	private void setDataLength(long value) {
		// we can never go back! If we do then we will be corrupting the database
		if (value < dataLength) {
			throw new Error("PANIC! New data length is less than the old data length");
		}
		dataLength = value;
	}

	public File getFile() {
		return file;
	}

	public String getFileName() {
		return fileName;
	}

	public synchronized Hash getRootHash() throws IOException {
		MappedByteBuffer mbb=seekMap(OFFSET_ROOT_HASH);
		byte[] bs=new byte[Hash.LENGTH];
		mbb.get(bs);
		return Hash.wrap(bs);
	}

	/**
	 * Writes the root data hash to the Store
	 * @param h Hash value to write
	 * @throws IOException If IO Error occurs
	 */
	public synchronized void setRootHash(Hash h) throws IOException {
		MappedByteBuffer mbb=seekMap(OFFSET_ROOT_HASH);
		byte[] bs=h.getBytes();
		assert(bs.length==Hash.LENGTH);
		mbb.put(bs);
	}

	public void setStore(EtchStore etchStore) {
		this.store=etchStore;
	}

	/**
	 * Gets the type code for an index slot value
	 * @param slot Raw slot value
	 * @return Type code
	 */
	public long extractType(long slot) {
		return slot&TYPE_MASK;
	}




}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy