All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.bigdata.mdi.MetadataIndex Maven / Gradle / Ivy

/**

Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016.  All rights reserved.

Contact:
     SYSTAP, LLC DBA Blazegraph
     2501 Calvert ST NW #106
     Washington, DC 20008
     [email protected]

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/
package com.bigdata.mdi;

import java.io.Externalizable;
import java.io.IOException;
import java.io.ObjectInput;
import java.io.ObjectOutput;
import java.util.UUID;


import com.bigdata.btree.BTree;
import com.bigdata.btree.Checkpoint;
import com.bigdata.btree.DefaultTupleSerializer;
import com.bigdata.btree.IndexMetadata;
import com.bigdata.btree.keys.IKeyBuilderFactory;
import com.bigdata.btree.view.FusedView;
import com.bigdata.io.LongPacker;
import com.bigdata.journal.ICommitter;
import com.bigdata.journal.IResourceManager;
import com.bigdata.rawstore.IRawStore;
import com.bigdata.service.MetadataService;

/**
 * A metadata index for the partitions of a distributed index. There is one
 * metadata index for each distributed index. The keys of the metadata index are
 * the first key that would be directed into the corresponding index segment,
 * e.g., a separator key (this is just the standard btree semantics).
 * The values are serialized {@link PartitionLocator} objects.
 * 

* Note: At this time the recommended scale-out approach for the metadata index * is to place the metadata indices on a {@link MetadataService} (the same * {@link MetadataService} may be used for an arbitrary #of scale-out indices) * and to replicate the state for the {@link MetadataService} onto * failover {@link MetadataService}s. Since the {@link MetadataIndex} may grow * without bound, you simply need to have enough disk on hand for it (the size * requirements are quite modest). Further, the {@link MetadataService} MUST NOT * be used to hold the data for the scale-out indices themselves since the * {@link MetadataIndex} can not undergo {@link IResourceManager#overflow()}. *

* One advantage of this approach is that the {@link MetadataIndex} is * guaranteed to hold all historical states of the partition definitions for * each index - effectively it is an immortal store for the partition metadata. * On the other hand it is not possible to compact the metadata index without * taking the database offline. * * @author Bryan Thompson * @version $Id$ * * @todo The {@link MetadataIndex} does NOT support either overflow (it may NOT * be a {@link FusedView}) NOR key-range splits. There are several issues * involved: *

* (a) How to track the next partition identifier to be assigned to an * index partition for the managed index. Currently this value is written * in the {@link MetadataIndexCheckpoint} record and is propagated to the * new backing store on overflow. However, if the metadata index is split * into partitions then additional care MUST be taken to use only the * value of that field on the 'meta-meta' index. *

* (b) how to locate the partitions of the metadata index itself. * * @todo one way to locate the metadata-index partitions is to hash partition * the metadata index and range queries can be flooded to all partitions. * the #of metadata service nodes can be changed by a suitable broadcast * event in which clients have to change to the new hash basis. this * feature can be generalized to provide hash partitioned indices as well * as key-range partitioned indices. * * @todo A metadata index can be recovered by a distributed process running over * the data services. Each data service reports all index partitions. The * reports are collected and the index is rebuilt from the reports. Much * like a map/reduce job. */ public class MetadataIndex extends BTree implements IMetadataIndex { /** * Used to implement find(byte[] key). */ private transient final MetadataIndexView view; public MetadataIndexMetadata getIndexMetadata() { return (MetadataIndexMetadata) super.getIndexMetadata(); } public IndexMetadata getScaleOutIndexMetadata() { return getIndexMetadata().getManagedIndexMetadata(); } /** * Returns the value to be assigned to the next partition created on this * {@link MetadataIndex} and then increments the counter. The counter will * be made restart-safe iff the index is dirty, the index is registered as * an {@link ICommitter}, and the store on which the index is stored is * committed. *

* Note: The metadata index uses a 32-bit partition identifier rather than * the {@link #getCounter()}. The reason is that the {@link Counter} uses * the partition identifier in the high word and a partition local counter * in the low word. Therefore we have to centralize the assignment of the * partition identifier, even when the metadata index is itself split into * partitions. Requests for partition identifiers need to be directed to the * root partition (L0) for the {@link MetadataIndex}. */ public int incrementAndGetNextPartitionId() { final int tmp = nextPartitionId++; /* * Notify listener that the index is dirty. */ fireDirtyEvent(); return tmp; } private int nextPartitionId; /** * Create a new {@link MetadataIndex}. * * @param store * The backing store. * @param indexUUID * The unique identifier for the metadata index. * @param managedIndexMetadata * The metadata template for the managed scale-out index. */ public static MetadataIndex create(IRawStore store, UUID indexUUID, IndexMetadata managedIndexMetadata) { final MetadataIndexMetadata metadata = new MetadataIndexMetadata( managedIndexMetadata.getName(), indexUUID, managedIndexMetadata); /* * @todo the metadata index should use probably delete markers so that * we can do compacting merges on it, but that is really moot until we * support overflow of the metadataService and partitioning of the * metadata index. */ metadata.setDeleteMarkers(true); /* * Override the implementation class. */ metadata.setBTreeClassName(MetadataIndex.class.getName()); /* * Override the checkpoint record implementation class. */ metadata.setCheckpointClassName(MetadataIndexCheckpoint.class.getName()); /* * Tuple serializer. */ metadata.setTupleSerializer(PartitionLocatorTupleSerializer.newInstance()); return (MetadataIndex) BTree.create(store, metadata); } /** * Required ctor. * * @param store * @param checkpoint * @param metadata */ public MetadataIndex(IRawStore store, Checkpoint checkpoint, IndexMetadata metadata, boolean readOnly) { super(store, checkpoint, metadata, readOnly); /* * copy the initial value from the checkpoint record. */ nextPartitionId = ((MetadataIndexCheckpoint)checkpoint).getNextPartitionId(); view = new MetadataIndexView(this); } /** * Extended to require a checkpoint if {@link #incrementAndGetNextPartitionId()} has been * invoked. */ public boolean needsCheckpoint() { if(nextPartitionId != ((MetadataIndexCheckpoint)getCheckpoint()).getNextPartitionId()) { return true; } return super.needsCheckpoint(); } /** * Extends the {@link Checkpoint} record to store the next partition * identifier to be assigned by the metadata index. * * @author Bryan Thompson * @version $Id$ */ public static class MetadataIndexCheckpoint extends Checkpoint { /** * */ private static final long serialVersionUID = 6482587101150014793L; private int nextPartitionId; /** * The immutable value of the nextPartitionId counter * stored in the metadata record. */ public int getNextPartitionId() { return nextPartitionId; } /** * De-serialization constructor. */ public MetadataIndexCheckpoint() { } /** * @param btree */ public MetadataIndexCheckpoint(BTree btree) { super(btree); nextPartitionId = ((MetadataIndex)btree).nextPartitionId; } /** * Create the initial checkpoint record for the initial metadata index. * * @param metadata */ public MetadataIndexCheckpoint(IndexMetadata metadata) { super(metadata); // The first partitionId is zero(0). nextPartitionId = 0; } /** * Create the initial checkpoint record when the metadata index * overflows onto a new backing store. * * @param metadata */ public MetadataIndexCheckpoint(IndexMetadata metadata, Checkpoint oldCheckpoint) { super(metadata, oldCheckpoint); // propagate the value of this field onto the new backing store. nextPartitionId = ((MetadataIndexCheckpoint)oldCheckpoint).nextPartitionId; } final transient private static int VERSION0 = 0x0; @Override public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException { super.readExternal(in); final int version = (int) LongPacker.unpackLong(in); if (version != 0) throw new IOException("Unknown version: " + version); nextPartitionId = in.readInt(); } @Override public void writeExternal(ObjectOutput out) throws IOException { super.writeExternal(out); LongPacker.packLong(out, VERSION0); out.writeInt(nextPartitionId); } } /** * Extends the {@link IndexMetadata} record to hold the metadata template * for the managed scale-out index. * * @author Bryan Thompson * @version $Id$ */ public static class MetadataIndexMetadata extends IndexMetadata implements Externalizable { private static final long serialVersionUID = -7309267778881420043L; private IndexMetadata scaleOutIndexMetadata; /** * The managed index metadata */ public final IndexMetadata getManagedIndexMetadata() { return scaleOutIndexMetadata; } /** * De-serialization constructor. */ public MetadataIndexMetadata() { } /** * First time constructor. * * @param name * The name of the managed index. The name of the metadata * index is given by * {@link MetadataService#getMetadataIndexName(String)} * @param indexUUID * The UUID of the metadata index. * @param managedIndexMetadata * The metadata template for the managed index. */ public MetadataIndexMetadata(String managedIndexName, UUID indexUUID, IndexMetadata managedIndexMetadata) { super(MetadataService.getMetadataIndexName(managedIndexName), indexUUID); if(managedIndexMetadata == null) { throw new IllegalArgumentException(); } this.scaleOutIndexMetadata = managedIndexMetadata; } private static final transient int VERSION0 = 0x0; public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException { super.readExternal(in); final int version = (int) LongPacker.unpackLong(in); if (version != VERSION0) { throw new IOException("Unknown version: version=" + version); } scaleOutIndexMetadata = (IndexMetadata) in.readObject(); } public void writeExternal(ObjectOutput out) throws IOException { super.writeExternal(out); LongPacker.packLong(out, VERSION0); out.writeObject(scaleOutIndexMetadata); } } public PartitionLocator get(final byte[] key) { // return (PartitionLocator)SerializerUtil.deserialize(lookup(key)); // automatic de-serialization using the ITupleSerializer. return (PartitionLocator)super.lookup((Object)key); } public PartitionLocator find(final byte[] key) { return view.find(key); } /** * Used to (de-)serialize {@link PartitionLocator}s in the * {@link MetadataIndex}. * * @author Bryan Thompson * @version $Id$ */ public static class PartitionLocatorTupleSerializer extends DefaultTupleSerializer { /** * */ private static final long serialVersionUID = -4178430896409893596L; public static PartitionLocatorTupleSerializer newInstance() { return new PartitionLocatorTupleSerializer(getDefaultKeyBuilderFactory()); } /** * De-serialization ctor only. */ public PartitionLocatorTupleSerializer() { } /** * @param keyBuilderFactory */ public PartitionLocatorTupleSerializer( IKeyBuilderFactory keyBuilderFactory) { super(keyBuilderFactory); } /** * The initial version (no additional persistent state). */ private final static transient byte VERSION0 = 0; /** * The current version. */ private final static transient byte VERSION = VERSION0; public void readExternal(final ObjectInput in) throws IOException, ClassNotFoundException { super.readExternal(in); final byte version = in.readByte(); switch (version) { case VERSION0: break; default: throw new UnsupportedOperationException("Unknown version: " + version); } } public void writeExternal(final ObjectOutput out) throws IOException { super.writeExternal(out); out.writeByte(VERSION); } } // PartitionLocatorTupleSerializer /** * Passes the notice along to the {@link #view}. It caches de-serialized * locators and needs to drop them from its cache if they become stale. */ public void staleLocator(PartitionLocator locator) { view.staleLocator(locator); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy