org.zoodb.jdo.internal.server.index.PagedOidIndex Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of parent Show documentation
ZooDB Java JDO Object Database.
The newest version!
/*
 * Copyright 2009-2013 Tilmann Zaeschke. All rights reserved.
 * 
 * This file is part of ZooDB.
 * 
 * ZooDB is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 * 
 * ZooDB is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with ZooDB.  If not, see .
 * 
 * See the README and COPYING files for further information. 
 */
package org.zoodb.jdo.internal.server.index;

import java.util.Iterator;
import java.util.List;
import java.util.NoSuchElementException;

import javax.jdo.JDOFatalDataStoreException;

import org.zoodb.jdo.internal.server.DiskIO.DATA_TYPE;
import org.zoodb.jdo.internal.server.StorageChannel;
import org.zoodb.jdo.internal.server.index.PagedUniqueLongLong.LLEntry;

/**
 * B-Tree like index structure.
 * There are two types of nodes, leaf nodes and inner nodes. Leaf nodes contain key-value pairs.
 * Inner nodes do not contain values. The contain n keys and (n||n+1) page references.
 * 
 * This paged OidIndex can be optimized towards the following properties:
 * - Unique entries
 * - (almost) ordered insertion.
 * - Hopefully (almost) consecutive insertion (unused values only after delete).
 * 
 * Deletion:
 * - Normal BTree deletion: 
 *   if (nEntry < min) then copy entries from prev/next pages
 *   -> if (nEntry < min then) two reads + two writes for every committed update
 *   -> pages are at least half filled -> Reasonable use of space
 *   Improvement: Distribute to prev&next page as soon as possible
 *   -> better use of space
 *   -> always 3 reads
 * - TZ deletion: 
 *   if (prev+this <= nEntries) then merge pages
 *   -> perfectly fine for leaf pages, could be improved to prev+this+next
 *   -> 2(3) reads, 1 writes (2 during merge).
 *   -> can lead to bad trees if used on inner pages and significant deletion in a deep tree;
 *      but still, badness is unlikely to be very bad, no unnecessary leafpages will ever be 
 *      created. TODO: check properly.
 *   -> Improvement: Store leaf size in inner page -> avoids reading neighbouring pages
 *      1 read per delete (1/2 during merge) But: inner pages get smaller. -> short values! 
 *      -> short values can be compressed (cutting of leading 0), because max value depends on
 *         page size, which is fixed. For OID pages: 64/1KB -> 6bit; 4KB->8bit; 16KB->10bit;  
 * - Naive deletion:
 *   Delete leaves only when page is empty
 *   -> Can completely prevent tree shrinkage.
 *   -> 1 read, 1 write
 *    
 * -> So far we go with the naive delete TODO!!!!!!
 * -> Always keep in mind: read access is most likely much more frequent than write access (insert,
 *    update/delete). 
 * -> Also keep in mind: especially inner index nodes are likely to be cached anyway, so they
 *    do not require a re-read. Caching is likely to occur until the index gets much bigger that
 *    memory.
 * -> To support previous statement, and in general: Make garbage collection of leaves easier than
 *    for inner nodes. E.g. reuse (gc-able) page buffers? TODO   
 * 
 * Pages as separate Objects vs hardlinked pages (current implementation).
 * Treating pages as objects is appealing, because most updates require only a single rewrite of the
 * local page and an update of the OID entry. But for OID-indices this is not advisable, because
 * any OID update triggers another OID update until an update fall on a page that is already dirty.
 * Another disadvantage is that values are 64bit, rather than 32bit page IDs. This could be helped
 * by always using page IDs and then again checking all objects on that page (reduced page-read
 * vs. increased CPU usage).  
 * 
 * @author Tilmann Zaeschke
 *
 */
public class PagedOidIndex {

	private static final long MIN_OID = 100;
	
	public static final class FilePos {
		final int page;
		final int offs;
		final long oid;
		private FilePos(LLEntry e) {
			this.oid = e.getKey();
//			this.page = (int)(pos >> 32);
//			this.offs = (int)(pos & 0x000000007FFFFFFF);
			this.page = BitTools.getPage(e.getValue());
			this.offs = BitTools.getOffs(e.getValue());
		}
		public int getPage() {
			return page;
		}
		public int getOffs() {
			return offs;
		}
		public long getOID() {
			return oid;
		}
		@Override
		public String toString() {
			return "FilePos::page=" + page + " ofs=" + offs + " oid=" + oid;
		}
	}
	

	static class OidIterator implements Iterator {

		private final LLIterator iter;
		
		public OidIterator(PagedUniqueLongLong root, long minKey, long maxKey) {
			iter = (LLIterator) root.iterator(minKey, maxKey);
		}

		@Override
		public boolean hasNext() {
			return iter.hasNextULL();
		}

		@Override
		public FilePos next() {
			LLEntry e = iter.nextULL();
			return new FilePos(e);
		}

		@Override
		public void remove() {
			iter.remove();
		}
	}
	
	/**
	 * Not really needed for OIDS, but used for testing indices.
	 */
	static class DescendingOidIterator implements Iterator {

		private final Iterator iter;
		
		public DescendingOidIterator(PagedUniqueLongLong root, long maxKey, long minKey) {
			iter = root.descendingIterator(maxKey, minKey);
		}

		@Override
		public boolean hasNext() {
			return iter.hasNext();
		}

		@Override
		public FilePos next() {
			LLEntry e = iter.next();
			return new FilePos(e);
		}

		@Override
		public void remove() {
			iter.remove();
		}
	}
	
	
	private transient long lastAllocatedInMemory = MIN_OID;
	private transient PagedUniqueLongLong idx;
	
	/**
	 * Constructor for creating new index. 
	 * @param file
	 */
	public PagedOidIndex(StorageChannel file) {
		idx = new PagedUniqueLongLong(DATA_TYPE.OID_INDEX, file);
	}

	/**
	 * Constructor for reading index from disk.
	 * @param lastUsedOid This parameter indicated the last used OID. It can be derived from 
	 * index.getMaxValue(), because this would allow reuse of OIDs if the latest objects are 
	 * deleted. This might cause a problem if references to the deleted objects still exist.
	 */
	public PagedOidIndex(StorageChannel file, int pageId, long lastUsedOid) {
		idx = new PagedUniqueLongLong(DATA_TYPE.OID_INDEX, file, pageId);
		lastAllocatedInMemory = lastUsedOid;
		if (lastAllocatedInMemory < MIN_OID) {
			lastAllocatedInMemory = MIN_OID;
		}
	}

	public void insertLong(long oid, int schPage, int schOffs) {
		long newVal = (((long)schPage) << 32) | (long)schOffs;
		idx.insertLong(oid, newVal);
		if (oid > lastAllocatedInMemory) {
			lastAllocatedInMemory = oid;
		}
	}

	/**
	 * @param oid key
	 * @return the previous value
	 * @throws NoSuchElementException if key is not found
	 */
	public long removeOid(long oid) {
		return idx.removeLong(oid);
	}

	/**
	 * @param oid key
	 * @param failValue The value to return in case the key has no entry.
	 * @return the previous value
	 */
	public long removeOidNoFail(long oid, long failValue) {
		return idx.removeLongNoFail(oid, failValue);
	}

	/**
	 * 
	 * @param oid
	 * @return FilePos instance or null, if the OID is not known.
	 */
	public FilePos findOid(long oid) {
		LLEntry e = idx.findValue(oid);
		return e == null ? null : new FilePos(e);
	}

	public LLEntry findOidGetLong(long oid) {
		return idx.findValue(oid);
	}

	public long[] allocateOids(int oidAllocSize) {
		long l1 = lastAllocatedInMemory;
		long l2 = l1 + oidAllocSize;

		long[] ret = new long[(int) (l2-l1)];
		for (int i = 0; i < l2-l1; i++ ) {
			ret[i] = l1 + i + 1;
		}
		
		lastAllocatedInMemory += oidAllocSize;
		if (lastAllocatedInMemory < 0) {
			throw new JDOFatalDataStoreException("OID overflow after alloc: " + oidAllocSize + 
					" / " + lastAllocatedInMemory);
		}
		//do not set dirty here!
		return ret;
	}

	public OidIterator iterator() {
		return new OidIterator(idx, 0, Long.MAX_VALUE);
	}

	public void print() {
		idx.print();
	}

	public long getMaxValue() {
		long m = idx.getMaxValue();
		return m > 0 ? m : MIN_OID; 
	}

	public int statsGetLeavesN() {
		return idx.statsGetLeavesN();
	}

	public int statsGetInnerN() {
		return idx.statsGetInnerN();
	}

	public int write() {
		return idx.write();
	}

	public Iterator descendingIterator() {
		return new DescendingOidIterator(idx, Long.MAX_VALUE, 0);
	}

	public long getLastUsedOid() {
		return lastAllocatedInMemory;
	}
	
	public List debugPageIds() {
	    return idx.debugPageIds();
	}

	public void revert(int pageId) {
		idx = new PagedUniqueLongLong(idx.getDataType(), idx.file, pageId);
	}
}