com.bazaarvoice.jolt.shiftr.spec.ShiftrSpec Maven / Gradle / Ivy
/*
* Copyright 2013 Bazaarvoice, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.bazaarvoice.jolt.shiftr.spec;
import com.bazaarvoice.jolt.common.pathelement.*;
import com.bazaarvoice.jolt.exception.SpecException;
import com.bazaarvoice.jolt.common.WalkedPath;
import org.apache.commons.lang.StringUtils;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
/**
* A Spec Object represents a single line from the JSON Shiftr Spec.
*
* At a minimum a single Spec has :
* Raw LHS spec value
* Some kind of PathElement (based off that raw LHS value)
*
* Additionally there are 2 distinct subclasses of the base Spec
* LeafSpec : where the RHS is a String or Array of Strings, that specify an write path for the data from this level in the tree
* CompositeSpec : where the RHS is a map of children Specs
*
* Mapping of JSON Shiftr Spec to Spec objects :
* {
* rating-*" : { // CompositeSpec with one child and a Star PathElement
* "&(1)" : { // CompositeSpec with one child and a Reference PathElement
* "foo: { // CompositeSpec with one child and a Literal PathElement
* "value" : "Rating-&1.value" // OutputtingSpec with a Literal PathElement and one write path
* }
* }
* }
* }
*
* The tree structure of formed by the CompositeSpecs is what is used during Shiftr transforms
* to do the parallel tree walk with the input data tree.
*
* During the parallel tree walk, a Path is maintained, and used when
* a tree walk encounters an Outputting spec to evaluate the wildcards in the write DotNotationPath.
*/
public abstract class ShiftrSpec {
// The processed key from the JSON config
protected final MatchablePathElement pathElement;
public ShiftrSpec(String rawJsonKey) {
List pathElements = parse( rawJsonKey );
if ( pathElements.size() != 1 ){
throw new SpecException( "Shiftr invalid LHS:" + rawJsonKey + " can not contain '.'" );
}
PathElement pe = pathElements.get( 0 );
if ( ! ( pe instanceof MatchablePathElement ) ) {
throw new SpecException( "Spec LHS key=" + rawJsonKey + " is not a valid LHS key." );
}
this.pathElement = (MatchablePathElement) pe;
}
// once all the shiftr specific logic is extracted.
public static List parse( String key ) {
if ( key.contains("@") ) {
return Arrays.asList( new AtPathElement( key ) );
}
else if ( key.contains("$") ) {
return Arrays.asList( new DollarPathElement( key ) );
}
else if ( key.contains("[") ) {
if ( StringUtils.countMatches( key, "[" ) != 1 || StringUtils.countMatches( key, "]" ) != 1 ) {
throw new SpecException( "Invalid key:" + key + " has too many [] references.");
}
// is canonical array?
if ( key.charAt( 0 ) == '[' && key.charAt( key.length() - 1 ) == ']') {
return Arrays.asList( new ArrayPathElement( key ) );
}
// Split syntactic sugar of "photos[]" --> [ "photos", "[]" ]
// or "bob-&(3,1)-smith[&0]" --> [ "bob-&(3,1)-smith", "[&(0,0)]" ]
String canonicalKey = key.replace( "[", ".[" );
String[] subkeys = canonicalKey.split( "\\." );
List subElements = parse( subkeys ); // at this point each sub key should be a valid key, so just recall parse
for ( int index = 0; index < subElements.size() - 1; index++ ) {
PathElement v = subElements.get( index );
if ( v instanceof ArrayPathElement ) {
throw new SpecException( "Array [..] must be the last thing in the key, was:" + key );
}
}
return subElements;
}
else if ( key.contains("&") ) {
if ( key.contains("*") )
{
throw new SpecException("Can't mix * with & ) ");
}
return Arrays.asList( new AmpPathElement( key ) );
}
else if ( "*".equals( key ) ) {
return Arrays.asList( new StarAllPathElement( key ) );
}
else if (key.contains("*" ) ) {
int numOfStars = StringUtils.countMatches( key, "*" );
if(numOfStars == 1){
return Arrays.asList( new StarSinglePathElement( key ) );
}
else if(numOfStars == 2){
return Arrays.asList( new StarDoublePathElement( key ) );
}
else {
return Arrays.asList( new StarRegexPathElement( key ) );
}
}
else {
return Arrays.asList( new LiteralPathElement( key ) );
}
}
public static List parse( String[] keys ) {
ArrayList paths = new ArrayList();
for( String key: keys ) {
List subPaths = parse( key );
for ( PathElement path : subPaths ) {
paths.add( path );
}
}
return paths;
}
/**
* This is the main recursive method of the Shiftr parallel "spec" and "input" tree walk.
*
* It should return true if this Spec object was able to successfully apply itself given the
* inputKey and input object.
*
* In the context of the Shiftr parallel treewalk, if this method returns true, the assumption
* is that no other sibling Shiftr specs need to look at this particular input key.
*
* @return true if this this spec "handles" the inputkey such that no sibling specs need to see it
*/
public abstract boolean apply( String inputKey, Object input, WalkedPath walkedPath, Map output );
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy