All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.bigdata.counters.store.CounterSetBTree Maven / Gradle / Ivy

/*

Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016.  All rights reserved.

Contact:
     SYSTAP, LLC DBA Blazegraph
     2501 Calvert ST NW #106
     Washington, DC 20008
     [email protected]

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

*/
/*
 * Created on Mar 22, 2009
 */

package com.bigdata.counters.store;

import java.util.Arrays;
import java.util.Iterator;
import java.util.UUID;
import java.util.Vector;
import java.util.concurrent.TimeUnit;
import java.util.regex.Pattern;

import org.apache.log4j.Logger;

import com.bigdata.btree.BTree;
import com.bigdata.btree.Checkpoint;
import com.bigdata.btree.DefaultTupleSerializer;
import com.bigdata.btree.IIndex;
import com.bigdata.btree.IRangeQuery;
import com.bigdata.btree.ITuple;
import com.bigdata.btree.ITupleIterator;
import com.bigdata.btree.IndexMetadata;
import com.bigdata.btree.keys.ASCIIKeyBuilderFactory;
import com.bigdata.btree.keys.IKeyBuilder;
import com.bigdata.btree.keys.IKeyBuilderFactory;
import com.bigdata.btree.keys.KVO;
import com.bigdata.btree.keys.KeyBuilder;
import com.bigdata.counters.CounterSet;
import com.bigdata.counters.DefaultInstrumentFactory;
import com.bigdata.counters.History;
import com.bigdata.counters.HistoryInstrument;
import com.bigdata.counters.ICounter;
import com.bigdata.counters.ICounterNode;
import com.bigdata.counters.ICounterSet;
import com.bigdata.counters.IHistoryEntry;
import com.bigdata.counters.IInstrument;
import com.bigdata.counters.PeriodEnum;
import com.bigdata.counters.History.SampleIterator;
import com.bigdata.counters.ICounterSet.IInstrumentFactory;
import com.bigdata.io.SerializerUtil;
import com.bigdata.rawstore.IRawStore;
import com.bigdata.sparse.SparseRowStore;
import com.bigdata.util.Bytes;

/**
 * An API encapsulating for writing and querying counter sets. The data are
 * written onto an {@link IIndex}. The {@link IIndex} may be local or remote.
 * 

* The multipart key is used. The first component is the milliseconds of the * associated timestamp value rounded down to an even number of minutes and * represented a long. The second component is the fully qualified path of the * counter. The last component is the exact timestamp (in milliseconds) of the * sampled counter value, represented as a long. These are formatted into an * unsigned byte[] following the standard practice. *

* The value stored under the key is the counter value. Normally counter values * are doubles or longs, but you can store any of the counter value types which * are supported by the {@link SparseRowStore}. *

* Using this approach, writes of the same counter value with different * timestamps will be recorded as different tuples in the {@link IIndex} and you * can store counter values sampled at rates of once per second while retaining * good compression for the keys in the index. * * @author Bryan Thompson * @version $Id$ * * FIXME Reading through per-minute counters from a CounterSetBTree grows slow * very quickly. * *

 * There are 21750988 counter values covering Fri Apr 03 15:51:57 EDT 2009 to
 * Sat Apr 04 08:45:05 EDT 2009. Took 60 seconds to record each hour of data on
 * the disk.  1.2G of XML data expanded to 2.6G on the journal
 * 
* * In order to improve performance, put the counter paths in a separate * dictionary and apply the regex there. Once we have the set of matched paths * we can scatter range queries against the BTree and drag back the data for * those counters (this would also make Unicode counter names viable). If the * key was then [pathId,timestamp] we could do ordered reads of just the * necessary key range for each desired counter. Prefix compression would still * be efficent for this representation. While the data arrive in history blocks, * we would still need to buffer them for ordered writes since otherwise the * writes would be scattered by the first key component (pathId). *

* I would have to encapsulate the counters as a counter for this to work, much * like the RDF DB. There would be two relations: the dictionary and the * timestamped values. *

* Space efficient encoding of the counter values would also help quite a bit - * it is Java default serialization, but we only store Long, Double or String. * All values for a given counter should have the same data type (it is required * by how we allocate the History) so the data type can be part of the * dictionary and that can be used to decode the value. (If values tend to be * close then a delta encoding would help.) */ public class CounterSetBTree extends BTree { protected static transient final Logger log = Logger .getLogger(CounterSetBTree.class); /** * @param store * @param checkpoint * @param metadata */ public CounterSetBTree(IRawStore store, Checkpoint checkpoint, IndexMetadata metadata, boolean readOnly) { super(store, checkpoint, metadata, readOnly); } static private final transient int INITIAL_CAPACITY = Bytes.kilobyte32; /** * Create a new instance. * * @param store * The backing store. * * @return The new instance. */ static public CounterSetBTree create(final IRawStore store) { final IndexMetadata metadata = new IndexMetadata(UUID.randomUUID()); metadata.setBTreeClassName(CounterSetBTree.class.getName()); metadata.setTupleSerializer(new CounterSetBTreeTupleSerializer( new ASCIIKeyBuilderFactory(INITIAL_CAPACITY))); return (CounterSetBTree) BTree.create(store, metadata); } static public CounterSetBTree createTransient() { final IndexMetadata metadata = new IndexMetadata(UUID.randomUUID()); metadata.setBTreeClassName(CounterSetBTree.class.getName()); metadata.setTupleSerializer(new CounterSetBTreeTupleSerializer( new ASCIIKeyBuilderFactory(INITIAL_CAPACITY))); return (CounterSetBTree) BTree.createTransient(metadata); } /** * A representation of a timestamped performance counter value as stored in * the {@link CounterSetBTree}. The minutes, path, and timestamp fields are * recovered from the key. The counter value is recovered from the value. * * @author Bryan Thompson * @version $Id$ */ static public class Entry { // key public final String path; public final long timestamp; // value public final Object value; public Entry(final long timestamp, final String path, final Object value) { this.timestamp = timestamp; this.path = path; this.value = value; } public String toString() { return getClass().getName()+// "{ path="+path+// ", value="+value+// ", timestamp="+timestamp+// "}"; } /** * Return the depth of the path in the performance counter hierarchy * (counts the #of '/' characters in the path). * * @return The depth. */ public int getDepth() { int depth = 0; final int len = path.length(); for (int i = 0; i < len; i++) { if (path.charAt(i) == '/') { depth++; } } return depth; } } /** * Encapsulates key and value formation. The key is formed from the minutes, * the path, and the timestamp. The value is the performance counter value * for a specific timestamp. * * @author Bryan Thompson * @version $Id$ */ static protected class CounterSetBTreeTupleSerializer extends DefaultTupleSerializer { /** * */ private static final long serialVersionUID = -887369151228567134L; /** * De-serialization ctor. */ public CounterSetBTreeTupleSerializer() { super(); } /** * Ctor when creating a new instance. * * @param keyBuilderFactory */ public CounterSetBTreeTupleSerializer( final IKeyBuilderFactory keyBuilderFactory) { super(keyBuilderFactory); } /** * Return the unsigned byte[] key. * * @param obj * An {@link ICounter} or {@link Entry}. */ @Override public byte[] serializeKey(final Object obj) { if (obj == null) throw new IllegalArgumentException(); if(obj instanceof ICounter) { return serializeKey((ICounter)obj); } else if(obj instanceof Entry) { return serializeKey((Entry)obj); } else { throw new UnsupportedOperationException(obj.getClass().getName()); } } public byte[] serializeKey(final ICounter c) { final long timestamp = c.lastModified(); return getKeyBuilder().reset()// .append(TimeUnit.MILLISECONDS.toMinutes(timestamp))// .appendASCII(c.getPath())// .append(timestamp)// .getKey(); } public byte[] serializeKey(final Entry e) { return getKeyBuilder().reset()// .append(TimeUnit.MILLISECONDS.toMinutes(e.timestamp))// .appendASCII(e.path)// .append(e.timestamp)// .getKey(); } /** * Overridden to serialize just {@link Entry#value} as the value * component of the B+Tree tuple. */ @Override public byte[] serializeVal(final Entry value) { return SerializerUtil.serialize(value.value); } public Entry deserialize(final ITuple tuple) { final byte[] key = tuple.getKey(); // final long minutes = KeyBuilder.decodeLong(key, 0/* off */); final String path = KeyBuilder.decodeASCII(key, Bytes.SIZEOF_LONG/* off */, key.length - (2 * Bytes.SIZEOF_LONG)/* len */); final long timestamp = KeyBuilder.decodeLong(key, key.length - Bytes.SIZEOF_LONG/* off */); // @todo tuple.getValueStream() final Object value = SerializerUtil.deserialize(tuple.getValue()); return new Entry(timestamp, path, value); } } /** * Handles efficient writes of counters with {@link History} data. The shape * of the data is changed so that the resulting writes on the BTree will be * ordered. This is both faster and also results in a smaller size on the * size (since leaves are not updated once they are written to the store). * For a counter without history, the current value of the counter will be * written on the BTree. */ public void writeHistory(final Iterator src) { final long begin = System.currentTimeMillis(); final Vector> v = new Vector>(); final CounterSetBTreeTupleSerializer tupleSer = (CounterSetBTreeTupleSerializer) getIndexMetadata() .getTupleSerializer(); while (src.hasNext()) { final ICounter c = src.next(); final String path = c.getPath(); if (c.getInstrument() instanceof HistoryInstrument) { final History h = ((HistoryInstrument) (c.getInstrument())) .getHistory(); final SampleIterator sitr = h.iterator(); while (sitr.hasNext()) { final IHistoryEntry e = sitr.next(); final Entry entry = new Entry(e.lastModified(), path, e .getValue()); final byte[] key = tupleSer.serializeKey(entry); final byte[] val = tupleSer.serializeVal(entry);//.value); v.add(new KVO(key, val, entry)); } } else { final Entry entry = new Entry(c.lastModified(), path, c .getValue()); final byte[] key = tupleSer.serializeKey(entry); final byte[] val = tupleSer.serializeVal(entry); v.add(new KVO(key, val, entry)); } } // to array final KVO[] a = v.toArray(new KVO[v.size()]); // order by the key. Arrays.sort(a); long nwritten = 0; // ordered write on the BTree. for (KVO t : a) { /* * Note: Don't overwrite if we already have the timestamped counter * value in the store. */ if (!super.contains(t.key)) { super.insert(t.key, t.val); nwritten++; } } final long elapsed = System.currentTimeMillis()-begin; if(log.isInfoEnabled()) { log.info("Wrote " + nwritten + " of " + a.length + " tuples in " + elapsed + "ms"); } } /** * Writes the current value of each visited * {@link ICounter} on the store. *

* Note: This presumes that the counters are associated with scalar values * (rather than {@link History}s). *

* Note that the counter set iterator will normally be in alpha order * already and all samples should be close to the same minute, so this is * already efficient for local operations. * * @todo More efficient storage for Double, Long, Integer and String values * (this is using Java default serialization)? */ public void writeCurrent(final Iterator src) { while (src.hasNext()) { final ICounter c = src.next(); if(log.isDebugEnabled()) { log.debug(c); } // if (c.getInstrument() instanceof HistoryInstrument) { // // /* // * This handles a history counter. However, loading a set // * history counters will cause writes to be scattered across the // * index since the counters are processed in alpha (path) order // * but each counter has a history (ascending minutes). In order // * to be efficient, the histories need to be converted into a // * KVO[] and then sorted before doing a bulk insert. // */ // // final String path = c.getPath(); // // final History h = ((HistoryInstrument)c.getInstrument()).getHistory(); // // final SampleIterator sitr = h.iterator(); // // while(sitr.hasNext()) { // // final IHistoryEntry hentry = sitr.next(); // // // entry reporting the average value for the history slot. // final Entry entry = new Entry(hentry.lastModified(), path, // hentry.getValue()); // // insert(entry, entry.value); // // } // // } else { // just the current value of the counter. insert(c, c.getValue()); // } } } /** * The toTime needs to be ONE (1) unit beyond the time of * interest since the minutes come first in the key. If you do not follow * this rule then you can miss out on the last unit worth of data. * * @param fromTime * The first time whose counters will be visited (in * milliseconds). * @param toTime * The first time whose counters WILL NOT be visited (in * milliseconds). * @param unit * The unit of aggregation for the reported counters. * @param filter * Only paths matched by the filter will be accepted (optional). * @param depth * When non-zero, only counters whose depth is LTE to the * specified depth will be returned. * * @return A collection of the selected performance counters together with * their ordered timestamped values for the specified time period. * * @todo In an act of cowardice, this assumes that the counter paths are * ASCII and encodes them as such. This allows us to decode the * counter path since it is not a compressed sort key. If we don't * take this "hack" then we need a 2nd index to resolve the Unicode * path from the sort key (once we hack off the leading minutes * component). *

* The other problem is that tacking the milliseconds onto the end of * the key might break the natural order of the counter paths in the * index. *

* The two index approach is not so bad. The main drawback is that it * can't be encapsulated as easily. */ public CounterSet rangeIterator(long fromTime, long toTime, final TimeUnit unit, final Pattern filter, final int depth) { if (fromTime < 0) throw new IllegalArgumentException(); if (toTime < 0) throw new IllegalArgumentException(); if (unit == null) throw new IllegalArgumentException(); if (fromTime == 0L) { /* * Default is the first available timestamp. */ fromTime = getFirstTimestamp(); } if (toTime == 0L || toTime == Long.MAX_VALUE) { /* * Default is the last available timestamp. */ toTime = getLastTimestamp(); } /* * Convert the covered time span into the caller's unit of aggregation. * * Note: The +1 is required to allocate enough slots in the History. * Without it the History class can overwrite the first slot, which will * cause the data to be underreported for the first time period. */ final long nslots = unit.convert(toTime, TimeUnit.MILLISECONDS) - unit.convert(fromTime, TimeUnit.MILLISECONDS) + 1; if (nslots > Integer.MAX_VALUE) throw new IllegalArgumentException("too many samples"); final CounterSetBTreeTupleSerializer tupleSer = (CounterSetBTreeTupleSerializer) getIndexMetadata() .getTupleSerializer(); final IKeyBuilder keyBuilder = getIndexMetadata().getTupleSerializer() .getKeyBuilder(); /* * Note: The first field in the key is the counter timestamp converted * to minutes since the epoch. Therefore we need to take the fromTime * milliseconds and convert it to minutes. Since that conversion * truncates the value, we will always have a fromKey that is EQ to the * minute in which the counters with a [fromTime] timestamp would be * found. */ final long fromMinutes = TimeUnit.MILLISECONDS.toMinutes(fromTime); final byte[] fromKey = keyBuilder.reset().append(fromMinutes).getKey(); /* * Note: The [toKey] needs to be strictly GT the minute in which the * [toTime] would be found. This may overscan, but that is better than * failing to scan enough. Any overscan is filtered out below. */ final long toMinutes = TimeUnit.MILLISECONDS.toMinutes(toTime + TimeUnit.MINUTES.toMillis(1)); final byte[] toKey = keyBuilder.reset().append(toMinutes).getKey(); if(log.isInfoEnabled()) { log.info("fromTime=" + fromTime + "ms (" + fromMinutes + "m), toTime=" + toTime + "ms (" + toMinutes + "m), units=" + unit + ", nslots=" + nslots); } // iterator scanning the counters. final ITupleIterator itr = rangeIterator(fromKey, toKey); // #of distinct counter paths selected by the query. int nselected = 0; // #of timestamp counter values accepted. long nvalues = 0; // #of tuples (aka timestamped counter values) visited. long nvisited = 0; // counters are inserted into this collection. final CounterSet counters = new CounterSet(); // factory for history counters. final IInstrumentFactory instrumentFactory = new DefaultInstrumentFactory( (int) nslots, PeriodEnum.getValue(unit), false/* overwrite */); while (itr.hasNext()) { final ITuple tuple = itr.next(); nvisited++; final Entry entry = tupleSer.deserialize(tuple); if (fromTime < entry.timestamp || toTime >= entry.timestamp) { /* * Due to the leading [minutes] field in the key there can be * some underscan and overscan of the index. Therefore we filter * to ensure that only timestamps which are strictly within the * specified milliseconds are extracted. */ if (log.isTraceEnabled()) { log.trace("Rejected: minutes=" + TimeUnit.MILLISECONDS.toMinutes(entry.timestamp) + " : " + entry.path); } } if (depth != 0 && depth > entry.getDepth()) { if (log.isTraceEnabled()) { log.trace("Rejected: minutes=" + TimeUnit.MILLISECONDS.toMinutes(entry.timestamp) + " : " + entry.path); } } if (filter != null && !filter.matcher(entry.path).matches()) { if (log.isTraceEnabled()) { log.trace("Rejected: minutes=" + TimeUnit.MILLISECONDS.toMinutes(entry.timestamp) + " : " + entry.path); } continue; } ICounterNode c = counters.getPath(entry.path); final IInstrument inst; if (c == null) { // log first time matched for each path. if (log.isDebugEnabled()) { log.debug("Matched: ndistinct=" + nselected + ", " + entry.path); } nselected++; inst = instrumentFactory.newInstance(entry.value.getClass()); c = counters.addCounter(entry.path, inst); } else if (c instanceof ICounterSet) { log.error("CounterSet exists for counter path: " + entry.path); continue; } else { inst = ((ICounter) c).getInstrument(); } inst.setValue(entry.value, entry.timestamp); nvalues++; } if (log.isInfoEnabled()) log.info("nselected=" + nselected + ", nvalues=" + nvalues + ", nvisited=" + nvisited); return counters; } /** * Return the timestamp associated with the first performance counter value. * * @return The timestamp -or- 0L if there are no performance counter values. */ public long getFirstTimestamp() { if (getEntryCount() == 0) return 0L; return ((Entry) rangeIterator(null, null, 1/* capacity */, IRangeQuery.DEFAULT, null/* filter */).next().getObject()).timestamp; } /** * Return the timestamp associated with the last performance counter value. * * @return The timestamp -or- 0L if there are no performance counter values. */ public long getLastTimestamp() { if (getEntryCount() == 0) return 0L; return ((Entry) rangeIterator(null, null, 1/* capacity */, IRangeQuery.DEFAULT | IRangeQuery.REVERSE, null/* filter */) .next().getObject()).timestamp; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy