com.bazaarvoice.jolt.shiftr.spec.ShiftrSpec Maven / Gradle / Ivy
/*
* Copyright 2013 Bazaarvoice, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.bazaarvoice.jolt.shiftr.spec;
import com.bazaarvoice.jolt.common.pathelement.*;
import com.bazaarvoice.jolt.exception.SpecException;
import com.bazaarvoice.jolt.common.WalkedPath;
import com.bazaarvoice.jolt.utils.StringTools;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.NoSuchElementException;
import java.util.List;
import java.util.LinkedList;
import java.util.Map;
/**
* A Spec Object represents a single line from the JSON Shiftr Spec.
*
* At a minimum a single Spec has :
* Raw LHS spec value
* Some kind of PathElement (based off that raw LHS value)
*
* Additionally there are 2 distinct subclasses of the base Spec
* LeafSpec : where the RHS is a String or Array of Strings, that specify an write path for the data from this level in the tree
* CompositeSpec : where the RHS is a map of children Specs
*
* Mapping of JSON Shiftr Spec to Spec objects :
* {
* rating-*" : { // CompositeSpec with one child and a Star PathElement
* "&(1)" : { // CompositeSpec with one child and a Reference PathElement
* "foo: { // CompositeSpec with one child and a Literal PathElement
* "value" : "Rating-&1.value" // OutputtingSpec with a Literal PathElement and one write path
* }
* }
* }
* }
*
* The tree structure of formed by the CompositeSpecs is what is used during Shiftr transforms
* to do the parallel tree walk with the input data tree.
*
* During the parallel tree walk a stack of data (a WalkedPath) is maintained, and used when
* a tree walk encounters an Outputting spec to evaluate the wildcards in the write DotNotationPath.
*/
public abstract class ShiftrSpec {
// The processed key from the JSON config
protected final MatchablePathElement pathElement;
public ShiftrSpec(String rawJsonKey) {
PathElement pe = parseSingleKeyLHS( rawJsonKey );
if ( ! ( pe instanceof MatchablePathElement ) ) {
throw new SpecException( "Spec LHS key=" + rawJsonKey + " is not a valid LHS key." );
}
this.pathElement = (MatchablePathElement) pe;
}
/**
* Visible for Testing.
*
* Inspects the key in a particular order to determine the correct sublass of
* PathElement to create.
*
* @param key String that should represent a single PathElement
* @return a concrete implementation of PathElement
*/
public static PathElement parseSingleKeyLHS( String key ) {
//// LHS single values
if ( "@".equals( key ) ) {
return new AtPathElement( key );
}
else if ( "*".equals( key ) ) {
return new StarAllPathElement( key );
}
//// LHS multiple values
else if ( key.startsWith("@") || key.contains( "@(" ) ) {
return TransposePathElement.parse( key );
}
else if ( key.contains( "@" ) ) {
throw new SpecException( "Invalid key:" + key + " can not have an @ other than at the front." );
}
else if ( key.contains("$") ) {
return new DollarPathElement( key );
}
else if ( key.contains("[") ) {
if ( StringTools.countMatches(key, "[") != 1 || StringTools.countMatches(key, "]") != 1 ) {
throw new SpecException( "Invalid key:" + key + " has too many [] references.");
}
return new ArrayPathElement( key );
}
else if ( key.contains( "&" ) ) {
if ( key.contains("*") )
{
throw new SpecException("Can't mix * with & ) ");
}
return new AmpPathElement( key );
}
else if ( key.contains("*" ) ) {
int numOfStars = StringTools.countMatches(key, "*");
if(numOfStars == 1){
return new StarSinglePathElement( key );
}
else if(numOfStars == 2){
return new StarDoublePathElement( key );
}
else {
return new StarRegexPathElement( key );
}
}
else if ( key.contains("#" ) ) {
return new HashPathElement( key );
}
else {
return new LiteralPathElement( key );
}
}
/**
* Helper method to turn a String into an Iterator
*/
public static Iterator stringIterator(final String string) {
// Ensure the error is found as soon as possible.
if (string == null)
throw new NullPointerException();
return new Iterator() {
private int index = 0;
public boolean hasNext() {
return index < string.length();
}
public Character next() {
// Throw NoSuchElementException as defined by the Iterator contract,
// not IndexOutOfBoundsException.
if (!hasNext())
throw new NoSuchElementException();
return string.charAt(index++);
}
public void remove() {
throw new UnsupportedOperationException();
}
};
}
/**
* Given a dotNotation style outputPath like "data[2].&(1,1)", this method fixes the syntactic sugar
* of "data[2]" --> "data.[2]"
*
* This makes all the rest of the String processing easier once we know that we can always
* split on the '.' character.
*
* @param dotNotaton Output path dot notation
* @return
*/
// TODO Unit Test this
private static String fixLeadingBracketSugar( String dotNotaton ) {
if ( dotNotaton == null || dotNotaton.length() == 0 ) {
return "";
}
char prev = dotNotaton.charAt( 0 );
StringBuilder sb = new StringBuilder();
sb.append( prev );
for ( int index = 1; index < dotNotaton.length(); index++ ) {
char curr = dotNotaton.charAt( index );
if ( curr == '[' ) {
if ( prev == '@' || prev == '.' ) {
// no need to add an extra '.'
}
else {
sb.append( '.' );
}
}
sb.append( curr );
prev = curr;
}
return sb.toString();
}
/**
* Parse RHS Transpose @ logic.
* "@(a.b)" --> pulls "(a.b)" off the iterator
* "@a.b" --> pulls just "a" off the iterator
*
* This method expects that the the '@' character has already been seen.
*
* @param iter iterator to pull data from
* @param dotNotationRef the original dotNotation string used for error messages
*/
// TODO Unit Test this
private static String parseAtPathElement( Iterator iter, String dotNotationRef ) {
if ( ! iter.hasNext() ) {
return "";
}
StringBuilder sb = new StringBuilder();
// Strategy here is to walk thru the string looking for matching parenthesis.
// '(' increments the count, while ')' decrements it
// If we ever get negative there is a problem.
boolean isParensAt = false;
int atParensCount = 0;
char c = iter.next();
if ( c == '(' ) {
isParensAt = true;
atParensCount++;
}
else if ( c == '.' ) {
throw new SpecException( "Unable to parse dotNotation, invalid TransposePathElement : " + dotNotationRef );
}
sb.append( c );
while( iter.hasNext() ) {
c = iter.next();
sb.append( c );
// Parsing "@(a.b.[&2])"
if ( isParensAt ) {
if ( c == '(' ) {
throw new SpecException( "Unable to parse dotNotation, too many open parens '(' : " + dotNotationRef );
}
else if ( c == ')' ) {
atParensCount--;
}
if ( atParensCount == 0 ) {
return sb.toString();
}
else if ( atParensCount < 0 ) {
throw new SpecException( "Unable to parse dotNotation, specifically the '@()' part : " + dotNotationRef );
}
}
// Parsing "@abc.def, return a canonical form of "@(abc)" and leave the "def" in the iterator
else if ( c == '.' ) {
return "(" + sb.toString().substring( 0, sb.length() - 1 ) + ")";
}
}
// if we got to the end of the String and we have mismatched parenthesis throw an exception.
if ( isParensAt && atParensCount != 0 ) {
throw new SpecException( "Invalid @() pathElement from : " + dotNotationRef );
}
// Parsing "@abc"
return sb.toString();
}
/**
* Method that recursively parses a dotNotation String based on an iterator.
*
* This method will call out to parseAtPathElement
*
* @param pathStrings List to store parsed Strings that each represent a PathElement
* @param iter the iterator to pull characters from
* @param dotNotationRef the original dotNotation string used for error messages
* @return
*/
public static List parseDotNotation( List pathStrings, Iterator iter, String dotNotationRef ) {
if ( ! iter.hasNext() ) {
return pathStrings;
}
boolean escapeActive = false;
StringBuilder sb = new StringBuilder();
char c;
while( iter.hasNext() ) {
c = iter.next();
if ( c == '\\' ) {
// two escapes lets one thru
escapeActive = ! escapeActive;
}
if( c == '@' ) {
sb.append( '@' );
sb.append( parseAtPathElement( iter, dotNotationRef ) );
pathStrings.add( sb.toString() );
sb = new StringBuilder();
}
else if ( c == '.' ) {
if ( escapeActive ) {
sb.append( c );
}
else {
if ( sb.length() != 0 ) {
pathStrings.add( sb.toString() );
}
return parseDotNotation( pathStrings, iter, dotNotationRef );
}
}
else {
if ( ! escapeActive ) {
sb.append( c );
}
}
if ( c != '\\' ) {
escapeActive = false;
}
}
if ( sb.length() != 0 ) {
pathStrings.add( sb.toString() );
}
return pathStrings;
}
/**
* @param refDotNotation the original dotNotation string used for error messages
* @return List of PathElements based on the provided List keys
*/
private static List parseList( List keys, String refDotNotation ) {
ArrayList paths = new ArrayList<>();
for( String key: keys ) {
PathElement path = parseSingleKeyLHS( key );
if ( path instanceof AtPathElement ) {
throw new SpecException( "'.@.' is not valid on the RHS: " + refDotNotation );
}
paths.add( path );
}
return paths;
}
/**
* Parse the dotNotation of the RHS.
*/
public static List parseDotNotationRHS( String dotNotation ) {
String fixedNotation = fixLeadingBracketSugar( dotNotation );
List pathStrs = parseDotNotation( new LinkedList(), stringIterator( fixedNotation ), dotNotation );
return parseList( pathStrs, dotNotation );
}
/**
* This is the main recursive method of the Shiftr parallel "spec" and "input" tree walk.
*
* It should return true if this Spec object was able to successfully apply itself given the
* inputKey and input object.
*
* In the context of the Shiftr parallel treewalk, if this method returns true, the assumption
* is that no other sibling Shiftr specs need to look at this particular input key.
*
* @return true if this this spec "handles" the inputkey such that no sibling specs need to see it
*/
public abstract boolean apply( String inputKey, Object input, WalkedPath walkedPath, Map output );
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy