All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.cloudgraph.hbase.scan.FuzzyRowKeyScanAssembler Maven / Gradle / Ivy

Go to download

CloudGraph(tm) is a suite of Service Data Object (SDO) 2.1 services designed for relational and big-table style "cloud" databases, such as HBase and others.

There is a newer version: 2.0.4
Show newest version
/**
 *        CloudGraph Community Edition (CE) License
 * 
 * This is a community release of CloudGraph, a dual-license suite of
 * Service Data Object (SDO) 2.1 services designed for relational and 
 * big-table style "cloud" databases, such as HBase and others. 
 * This particular copy of the software is released under the 
 * version 2 of the GNU General Public License. CloudGraph was developed by 
 * TerraMeta Software, Inc.
 * 
 * Copyright (c) 2013, TerraMeta Software, Inc. All rights reserved.
 * 
 * General License information can be found below.
 * 
 * This distribution may include materials developed by third
 * parties. For license and attribution notices for these
 * materials, please refer to the documentation that accompanies
 * this distribution (see the "Licenses for Third-Party Components"
 * appendix) or view the online documentation at 
 * . 
 */
package org.cloudgraph.hbase.scan;

import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import javax.xml.namespace.QName;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.filter.Filter;
import org.apache.hadoop.hbase.filter.FuzzyRowFilter;
import org.apache.hadoop.hbase.util.Hash;
import org.apache.hadoop.hbase.util.Pair;
import org.cloudgraph.config.CloudGraphConfig;
import org.cloudgraph.config.DataGraphConfig;
import org.cloudgraph.config.KeyFieldConfig;
import org.cloudgraph.config.PreDefinedKeyFieldConfig;
import org.cloudgraph.config.TableConfig;
import org.cloudgraph.config.UserDefinedRowKeyFieldConfig;
import org.cloudgraph.hbase.filter.HBaseFilterAssembler;
import org.cloudgraph.hbase.key.Hashing;
import org.cloudgraph.hbase.key.KeySupport;
import org.cloudgraph.hbase.key.Padding;
import org.plasma.query.Wildcard;
import org.plasma.query.model.Where;
import org.plasma.sdo.DataFlavor;
import org.plasma.sdo.PlasmaType;


/**
 * Assembles a composite fuzzy row key bytes / fuzzy info bytes pair 
 * where each field within the composite row keys are constructed 
 * based a set of query predicates.  
 * 
 * @see org.cloudgraph.config.DataGraphConfig
 * @see org.cloudgraph.config.TableConfig
 * @see org.cloudgraph.config.UserDefinedField 
 * @see org.cloudgraph.config.PredefinedField 
 * @see org.cloudgraph.config.PreDefinedFieldName 
 * @author Scott Cinnamond
 * @since 0.5.3
 */
public class FuzzyRowKeyScanAssembler  
    implements RowKeyScanAssembler, FuzzyRowKey, HBaseFilterAssembler
{
    private static Log log = LogFactory.getLog(FuzzyRowKeyScanAssembler.class);
	protected int bufsize = 4000;
	protected ByteBuffer keyBytes = ByteBuffer.allocate(bufsize);
	protected ByteBuffer infoBytes = ByteBuffer.allocate(bufsize);
	protected PlasmaType rootType;
	protected DataGraphConfig graph;
	protected TableConfig table;
	protected KeySupport keySupport = new KeySupport();
	protected Charset charset;
	protected ScanLiterals scanLiterals;
	protected int fieldCount;
    protected String rootUUID;
    protected byte fixedMaskByte = 0;
    protected byte variableMaskByte = 1;
    protected byte[] delimMask;
	protected Hashing hashing;
	protected Padding padding;
	
	@SuppressWarnings("unused")
	private FuzzyRowKeyScanAssembler() {}

	/**
	 * Constructor
	 * @param rootType the root type
	 */
	public FuzzyRowKeyScanAssembler(PlasmaType rootType)
	{
    	this.rootType = rootType;
		QName rootTypeQname = this.rootType.getQualifiedName();
		this.graph = CloudGraphConfig.getInstance().getDataGraph(
				rootTypeQname);
		this.table = CloudGraphConfig.getInstance().getTable(rootTypeQname);
		this.charset = CloudGraphConfig.getInstance().getCharset();
		this.delimMask = new byte[graph.getRowKeyFieldDelimiterBytes().length];
	    for (int i = 0; i < delimMask.length; i++)
	    	delimMask[i] = fixedMaskByte;
		Hash hash = this.keySupport.getHashAlgorithm(this.table);
		this.hashing = new Hashing(hash, this.charset);
		this.padding = new Padding(this.charset);

	}
	
	/**
	 * Constructor which enables the use of data object UUID as
	 * a pre-defined row key field. 
	 * @param rootType the root type
	 * @param rootUUID the root UUID.
	 */
	public FuzzyRowKeyScanAssembler(PlasmaType rootType, String rootUUID)
	{
		this(rootType);
		this.rootUUID = rootUUID;
	}
	
    /**
     * Assemble row key scan information based only on any
     * pre-defined row-key fields such as the
     * data graph root type or URI.
     * @see org.cloudgraph.config.PredefinedField 
     * @see org.cloudgraph.config.PreDefinedFieldName 
     */
	@Override
	public void assemble() {    	
    	assemblePredefinedFields();
	}
	
	/**
	 * Assemble row key scan information based on the given
	 * scan literals as well as pre-defined row-key fields such as the
     * data graph root type or URI.
	 * @param literalList the scan literals
     * @see org.cloudgraph.config.PredefinedField 
     * @see org.cloudgraph.config.PreDefinedFieldName 
	 */
	@Override
	public void assemble(ScanLiterals literals) {
		this.scanLiterals = literals;
    	assembleLiterals();
	}
	
    /**
     * Assemble row key scan information based on one or more
     * given query predicates.
     * @param where the row predicate hierarchy
     * @param contextType the context type which may be the root type or another
     * type linked by one or more relations to the root
     */
	@Override
	public void assemble(Where where, PlasmaType contextType) {
		
		if (log.isDebugEnabled())
    		log.debug("begin traverse");
    	
		ScanLiteralAssembler literalAssembler = 
				new ScanLiteralAssembler(this.rootType);
    	where.accept(literalAssembler); // traverse
    	
    	this.scanLiterals = literalAssembler.getPartialKeyScanResult();
    	
    	if (log.isDebugEnabled())
    		log.debug("end traverse");      	

    	assembleLiterals();
    }
	
	private void assemblePredefinedFields()
	{
    	List preDefinedFields = this.graph.getPreDefinedRowKeyFields();
        int fieldCount = preDefinedFields.size();
    	for (int i = 0; i < fieldCount; i++) {
        	PreDefinedKeyFieldConfig preDefinedField = preDefinedFields.get(i);
    		if (fieldCount > 0) {
        	    this.keyBytes.put(graph.getRowKeyFieldDelimiterBytes());
    			this.infoBytes.put(delimMask);
    		}
    		byte[] tokenValue = this.keySupport.getPredefinedFieldValueStartBytes(this.rootType, 
       	    		hashing, preDefinedField);
    		
       	    byte[] paddedTokenValue = null;
       	    if (preDefinedField.isHash()) {
       	    	paddedTokenValue = this.padding.pad(tokenValue, preDefinedField.getMaxLength(), 
    				DataFlavor.integral);
       	    }
       	    else {
       	    	paddedTokenValue = this.padding.pad(tokenValue, preDefinedField.getMaxLength(), 
       	    			preDefinedField.getDataFlavor());
       	    }
    		
    		this.keyBytes.put(paddedTokenValue);
			byte[] tokenMask = new byte[tokenValue.length];
    	    for (int j = 0; j < tokenMask.length; j++)
    	    	tokenMask[j] = fixedMaskByte;
			this.infoBytes.put(tokenMask);
    		
    		fieldCount++;
        }				
	}
	
	private void assembleLiterals()
	{		
	    for (KeyFieldConfig fieldConfig : this.graph.getRowKeyFields()) {
    		if (fieldCount > 0) {
        	    this.keyBytes.put(graph.getRowKeyFieldDelimiterBytes());
    			this.infoBytes.put(delimMask);
    		}
			
    		if (fieldConfig instanceof PreDefinedKeyFieldConfig) {
    			PreDefinedKeyFieldConfig predefinedConfig = (PreDefinedKeyFieldConfig)fieldConfig;        		
        		byte[] tokenValue = getPredefinedToken(predefinedConfig);
        		
           	    byte[] paddedTokenValue = null;
           	    if (predefinedConfig.isHash()) {
           	    	paddedTokenValue = this.padding.pad(tokenValue, 
           	    		predefinedConfig.getMaxLength(), 
        				DataFlavor.integral);
           	    }
           	    else {
           	    	paddedTokenValue = this.padding.pad(tokenValue, 
           	    		predefinedConfig.getMaxLength(), 
           	    		predefinedConfig.getDataFlavor());
           	    }
           	    this.keyBytes.put(paddedTokenValue);
    			byte[] tokenMask = getPredefinedTokenMask(predefinedConfig, paddedTokenValue.length);
    			this.infoBytes.put(tokenMask);
           	    
           	    this.fieldCount++;
    		}
    		else if (fieldConfig instanceof UserDefinedRowKeyFieldConfig) {
    			UserDefinedRowKeyFieldConfig userFieldConfig = (UserDefinedRowKeyFieldConfig)fieldConfig;

    			List scanLiterals = this.scanLiterals.getLiterals(userFieldConfig);    				 
    			// if no literal present, create a fuzzy wildcard
    			// as per the field length
    			if (scanLiterals == null || scanLiterals.size() == 0) {
    				byte[] wildcard = new byte[userFieldConfig.getMaxLength()];
    				Arrays.fill(wildcard, (byte)Character.valueOf('*').charValue());   				
    				this.keyBytes.put(wildcard); // no need for padding, we are at max len
        			byte[] fieldMask = new byte[userFieldConfig.getMaxLength()];
            	    for (int i = 0; i < fieldMask.length; i++)
            	    	fieldMask[i] = this.variableMaskByte;
        			this.infoBytes.put(fieldMask);
    			}
    			else {	 
    				if (scanLiterals.size() > 1)
    					log.warn("expected single literal for user defined field - ignoring");
    				FuzzyRowKeyLiteral fuzzyLiteral = (FuzzyRowKeyLiteral)scanLiterals.get(0);
    				if (WildcardStringLiteral.class.isAssignableFrom(fuzzyLiteral.getClass())) {
    					WildcardStringLiteral wc = (WildcardStringLiteral)fuzzyLiteral;
    					String literal = wc.getContent();
    					if (literal.length() != wc.getFieldConfig().getMaxLength()) {
    						if (literal.startsWith(Wildcard.WILDCARD_CHAR) && literal.endsWith(Wildcard.WILDCARD_CHAR)) {
    							throw new InvalidFuzzyRowKeyWildcardException(
    								"wildcard expression length does not equal field max length ("+wc.getFieldConfig().getMaxLength()+") for field '"
    							    +wc.getFieldConfig().getEndpointProperty().getName()+"' - fuzzy row key scan wildcard expressions, starting"
    								+ " and ending with a wildcard character, must exactly match (typically by padding with wildcards) the length of the target (composite)"
    								+ " row key field, as HBase fuzzy row key filters use fixed length masking.");
    						}
    					}
    				}
    				// note; already padded
    				this.keyBytes.put(fuzzyLiteral.getFuzzyKeyBytes());
    				this.infoBytes.put(fuzzyLiteral.getFuzzyInfoBytes());
    			}
    			
           	    this.fieldCount++;
    		}
		}			
	}
	
	private byte[] getPredefinedToken(PreDefinedKeyFieldConfig predefinedConfig) {
		byte[] tokenValue = null;
		switch (predefinedConfig.getName()) {
		case UUID:
			if (this.rootUUID != null) {
				tokenValue = this.rootUUID.getBytes(this.charset);
			}
			else {
				// no UUID available in this context, so return zero len array
				// which will get padded with default wildcard bytes and associated
				// with a variable info-mask bytes array
				byte wc = (byte)Wildcard.WILDCARD_CHAR.charAt(0);
				tokenValue = new byte[predefinedConfig.getMaxLength()];
				for (int i = 0; i < tokenValue.length; i++)
					tokenValue[i] = wc;
			}
			break;
		default:	
		    tokenValue = predefinedConfig.getKeyBytes(this.rootType);
			break;
		}        		
		
		if (predefinedConfig.isHash()) {
			tokenValue = hashing.toStringBytes(tokenValue);
		}    			
		return tokenValue;
	}

	private byte[] getPredefinedTokenMask(PreDefinedKeyFieldConfig predefinedConfig, int paddedTokenValueLength) {
		byte[] tokenMask = new byte[paddedTokenValueLength];
		switch (predefinedConfig.getName()) {
		case UUID:
			if (this.rootUUID != null) {
				Arrays.fill(tokenMask, fixedMaskByte); 
			}
			else {
				Arrays.fill(tokenMask, variableMaskByte); 
			}
			break;
		default:	
			Arrays.fill(tokenMask, fixedMaskByte); 
			break;
		}        		
		return tokenMask;
	}
	
	@Override
	public byte[] getFuzzyKeyBytes() {
		if (this.keyBytes == null)
			throw new IllegalStateException("row keys not assembled - first call assemble(...)");
	    byte [] result = new byte[this.keyBytes.position()];
	    System.arraycopy(this.keyBytes.array(), this.keyBytes.arrayOffset(), result, 0, this.keyBytes.position()); 
		return result;
	}

	@Override
	public byte[] getFuzzyInfoBytes() {
		if (this.infoBytes == null)
			throw new IllegalStateException("row keys not assembled - first call assemble(...)");
	    byte [] result = new byte[this.infoBytes.position()];
	    System.arraycopy(this.infoBytes.array(), this.infoBytes.arrayOffset(), result, 0, this.infoBytes.position()); 
		return result;
	}

	@Override
	public void clear() {
		this.scanLiterals = null;	
	}

	@Override
	public Filter getFilter() {
		Pair pair = new Pair(this.getFuzzyKeyBytes(), getFuzzyInfoBytes());
		List> list = new ArrayList>();
		list.add(pair);
		return new FuzzyRowFilter(list);
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy