All Downloads are FREE. Search and download functionalities are using the official Maven repository.

aQute.libg.cafs.CAFS Maven / Gradle / Ivy

The newest version!
package aQute.libg.cafs;

import static aQute.lib.io.IO.copy;
import static java.nio.charset.StandardCharsets.UTF_8;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.Closeable;
import java.io.DataInput;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.RandomAccessFile;
import java.nio.channels.FileChannel;
import java.nio.channels.FileLock;
import java.security.DigestInputStream;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.Arrays;
import java.util.Iterator;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.zip.CRC32;
import java.util.zip.Deflater;
import java.util.zip.DeflaterOutputStream;
import java.util.zip.InflaterInputStream;

import aQute.lib.index.Index;
import aQute.lib.io.IO;
import aQute.libg.cryptography.SHA1;

/**
 * CAFS implements a SHA-1 based file store. The basic idea is that every file
 * in the universe has a unique SHA-1. Hard to believe but people smarter than
 * me have come to that conclusion. This class maintains a compressed store of
 * SHA-1 identified files. So if you have the SHA-1, you can get the contents.
 * This makes it easy to store a SHA-1 instead of the whole file or maintain a
 * naming scheme. An added advantage is that it is always easy to verify you get
 * the right stuff. The SHA-1 Content Addressable File Store is the core
 * underlying idea in Git.
 */
public class CAFS implements Closeable, Iterable {
	final static byte[]	CAFS;
	final static byte[]	CAFE;
	final static String	INDEXFILE		= "index.idx";
	final static String	STOREFILE		= "store.cafs";
	final static String	ALGORITHM		= "SHA-1";
	final static int	KEYLENGTH		= 20;
	final static int	HEADERLENGTH	= 4				// CAFS
		+ 4												// flags
		+ 4												// compressed length
		+ 4												// uncompressed length
		+ KEYLENGTH										// key
		+ 2												// header checksum
	;

	final File			home;
	Index				index;
	RandomAccessFile	store;
	FileChannel			channel;

	static {
		try {
			CAFS = "CAFS".getBytes(UTF_8);
			CAFE = "CAFE".getBytes(UTF_8);
		} catch (Throwable e) {
			throw new ExceptionInInitializerError(e);
		}
	}

	/**
	 * Constructor for a Content Addressable File Store
	 * 
	 * @param home
	 * @param create
	 * @throws Exception
	 */
	public CAFS(File home, boolean create) throws Exception {
		this.home = home;
		if (!home.isDirectory()) {
			if (create) {
				IO.mkdirs(home);
			} else
				throw new IllegalArgumentException("CAFS requires a directory with create=false");
		}

		index = new Index(new File(home, INDEXFILE), KEYLENGTH);
		store = new RandomAccessFile(new File(home, STOREFILE), "rw");
		channel = store.getChannel();
		if (store.length() < 0x100) {
			if (create) {
				store.write(CAFS);
				for (int i = 1; i < 64; i++)
					store.writeInt(0);
				channel.force(true);
			} else
				throw new IllegalArgumentException("Invalid store file, length is too short " + store);
			System.err.println(store.length());
		}
		store.seek(0);
		if (!verifySignature(store, CAFS))
			throw new IllegalArgumentException("Not a valid signature: CAFS at start of file");

	}

	/**
	 * Store an input stream in the CAFS while calculating and returning the
	 * SHA-1 code.
	 * 
	 * @param in The input stream to store.
	 * @return The SHA-1 code.
	 * @throws Exception if anything goes wrong
	 */
	public SHA1 write(InputStream in) throws Exception {

		Deflater deflater = new Deflater();
		MessageDigest md = MessageDigest.getInstance(ALGORITHM);
		DigestInputStream din = new DigestInputStream(in, md);
		ByteArrayOutputStream bout = new ByteArrayOutputStream();
		DeflaterOutputStream dout = new DeflaterOutputStream(bout, deflater);
		copy(din, dout);

		synchronized (store) {
			// First check if it already exists
			SHA1 sha1 = new SHA1(md.digest());

			long search = index.search(sha1.digest());
			if (search > 0)
				return sha1;

			byte[] compressed = bout.toByteArray();

			// we need to append this file to our store,
			// which requires a lock. However, we are in a race
			// so others can get the lock between us getting
			// the length and someone else getting the lock.
			// So we must verify after we get the lock that the
			// length was unchanged.
			FileLock lock = null;
			try {
				long insertPoint;
				int recordLength = compressed.length + HEADERLENGTH;

				while (true) {
					insertPoint = store.length();
					lock = channel.lock(insertPoint, recordLength, false);

					if (store.length() == insertPoint)
						break;

					// We got the wrong lock, someone else
					// got in between reading the length
					// and locking
					lock.release();
				}
				int totalLength = deflater.getTotalIn();
				store.seek(insertPoint);
				update(sha1.digest(), compressed, totalLength);
				index.insert(sha1.digest(), insertPoint);
				return sha1;
			} finally {
				if (lock != null)
					lock.release();
			}
		}
	}

	/**
	 * Read the contents of a sha 1 key.
	 * 
	 * @param sha1 The key
	 * @return An Input Stream on the content or null of key not found
	 * @throws Exception
	 */
	public InputStream read(final SHA1 sha1) throws Exception {
		synchronized (store) {
			long offset = index.search(sha1.digest());
			if (offset < 0)
				return null;

			byte[] readSha1;
			byte[] buffer;
			store.seek(offset);
			if (!verifySignature(store, CAFE))
				throw new IllegalArgumentException("No signature");

			int flags = store.readInt();
			int compressedLength = store.readInt();
			int uncompressedLength = store.readInt();
			readSha1 = new byte[KEYLENGTH];
			store.read(readSha1);
			SHA1 rsha1 = new SHA1(readSha1);

			if (!sha1.equals(rsha1))
				throw new IOException("SHA1 read and asked mismatch: " + sha1 + " " + rsha1);

			short crc = store.readShort(); // Read CRC
			if (crc != checksum(flags, compressedLength, uncompressedLength, readSha1))
				throw new IllegalArgumentException("Invalid header checksum: " + sha1);

			buffer = new byte[compressedLength];
			store.readFully(buffer);
			return getSha1Stream(sha1, buffer, uncompressedLength);
		}
	}

	public boolean exists(byte[] sha1) throws Exception {
		return index.search(sha1) >= 0;
	}

	public void reindex() throws Exception {
		long length;
		synchronized (store) {
			length = store.length();
			if (length < 0x100)
				throw new IllegalArgumentException("Store file is too small, need to be at least 256 bytes: " + store);
		}

		try (RandomAccessFile in = new RandomAccessFile(new File(home, STOREFILE), "r")) {
			byte[] signature = new byte[4];
			in.readFully(signature);
			if (!Arrays.equals(CAFS, signature))
				throw new IllegalArgumentException("Store file does not start with CAFS: " + in);

			in.seek(0x100);
			File ixf = new File(home, "index.new");
			Index index = new Index(ixf, KEYLENGTH);

			while (in.getFilePointer() < length) {
				long entry = in.getFilePointer();
				SHA1 sha1 = verifyEntry(in);
				index.insert(sha1.digest(), entry);
			}

			synchronized (store) {
				index.close();
				File indexFile = new File(home, INDEXFILE);
				IO.rename(ixf, indexFile);
				this.index = new Index(indexFile, KEYLENGTH);
			}
		}
	}

	@Override
	public void close() throws IOException {
		synchronized (store) {
			try {
				store.close();
			} finally {
				index.close();
			}
		}
	}

	private SHA1 verifyEntry(RandomAccessFile in) throws IOException, NoSuchAlgorithmException {
		byte[] signature = new byte[4];
		in.readFully(signature);
		if (!Arrays.equals(CAFE, signature))
			throw new IllegalArgumentException("File is corrupted: " + in);

		/* int flags = */in.readInt();
		int compressedSize = in.readInt();
		int uncompressedSize = in.readInt();
		byte[] key = new byte[KEYLENGTH];
		in.readFully(key);
		SHA1 sha1 = new SHA1(key);

		byte[] buffer = new byte[compressedSize];
		in.readFully(buffer);

		try (InputStream xin = getSha1Stream(sha1, buffer, uncompressedSize)) {
			xin.skip(uncompressedSize);
		}
		return sha1;
	}

	private boolean verifySignature(DataInput din, byte[] org) throws IOException {
		byte[] read = new byte[org.length];
		din.readFully(read);
		return Arrays.equals(read, org);
	}

	private InputStream getSha1Stream(final SHA1 sha1, byte[] buffer, final int total) throws NoSuchAlgorithmException {
		ByteArrayInputStream in = new ByteArrayInputStream(buffer);
		InflaterInputStream iin = new InflaterInputStream(in) {
			int					count		= 0;
			final MessageDigest	digestx		= MessageDigest.getInstance(ALGORITHM);
			final AtomicBoolean	calculated	= new AtomicBoolean();

			@Override
			public int read(byte[] data, int offset, int length) throws IOException {
				int size = super.read(data, offset, length);
				if (size <= 0)
					eof();
				else {
					count += size;
					this.digestx.update(data, offset, size);
				}
				return size;
			}

			@Override
			public int read() throws IOException {
				int c = super.read();
				if (c < 0)
					eof();
				else {
					count++;
					this.digestx.update((byte) c);
				}
				return c;
			}

			void eof() throws IOException {
				if (calculated.getAndSet(true))
					return;

				if (count != total)
					throw new IOException(
						"Counts do not match. Expected to read: " + total + " Actually read: " + count);

				SHA1 calculatedSha1 = new SHA1(digestx.digest());
				if (!sha1.equals(calculatedSha1))
					throw (new IOException(
						"SHA1 caclulated and asked mismatch, asked: " + sha1 + ", \nfound: " + calculatedSha1));
			}

			@Override
			public void close() throws IOException {
				eof();
				super.close();
			}
		};
		return iin;
	}

	/**
	 * Update a record in the store, assuming the store is at the right
	 * position.
	 * 
	 * @param sha1 The checksum
	 * @param compressed The compressed length
	 * @param totalLength The uncompressed length
	 * @throws IOException The exception
	 */
	private void update(byte[] sha1, byte[] compressed, int totalLength) throws IOException {
		// System.err.println("pos: " + store.getFilePointer());
		store.write(CAFE); // 00-03 Signature
		store.writeInt(0); // 04-07 Flags for the future
		store.writeInt(compressed.length); // 08-11 Length deflated data
		store.writeInt(totalLength); // 12-15 Length
		store.write(sha1); // 16-35
		store.writeShort(checksum(0, compressed.length, totalLength, sha1));
		store.write(compressed);
		channel.force(false);
	}

	short checksum(int flags, int compressedLength, int totalLength, byte[] sha1) {
		CRC32 crc = new CRC32();
		crc.update(flags);
		crc.update(flags >> 8);
		crc.update(flags >> 16);
		crc.update(flags >> 24);
		crc.update(compressedLength);
		crc.update(compressedLength >> 8);
		crc.update(compressedLength >> 16);
		crc.update(compressedLength >> 24);
		crc.update(totalLength);
		crc.update(totalLength >> 8);
		crc.update(totalLength >> 16);
		crc.update(totalLength >> 24);
		crc.update(sha1);
		return (short) crc.getValue();
	}

	@Override
	public Iterator iterator() {

		return new Iterator() {
			long position = 0x100;

			@Override
			public boolean hasNext() {
				synchronized (store) {
					try {
						return position < store.length();
					} catch (IOException e) {
						throw new RuntimeException(e);
					}
				}
			}

			@Override
			public SHA1 next() {
				synchronized (store) {
					try {
						store.seek(position);
						byte[] signature = new byte[4];
						store.readFully(signature);
						if (!Arrays.equals(CAFE, signature))
							throw new IllegalArgumentException("No signature");

						int flags = store.readInt();
						int compressedLength = store.readInt();
						int totalLength = store.readInt();
						byte[] sha1 = new byte[KEYLENGTH];
						store.readFully(sha1);
						short crc = store.readShort();
						if (crc != checksum(flags, compressedLength, totalLength, sha1))
							throw new IllegalArgumentException("Header checksum fails");

						position += HEADERLENGTH + compressedLength;
						return new SHA1(sha1);
					} catch (IOException e) {
						throw new RuntimeException(e);
					}
				}
			}

			@Override
			public void remove() {
				throw new UnsupportedOperationException("Remvoe not supported, CAFS is write once");
			}
		};
	}

	public boolean isEmpty() throws IOException {
		synchronized (store) {
			return store.getFilePointer() <= 256;
		}
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy