All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.bazaarvoice.jolt.Removr Maven / Gradle / Ivy

There is a newer version: 0.1.8
Show newest version
/*
 * Copyright 2013 Bazaarvoice, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.bazaarvoice.jolt;

import com.bazaarvoice.jolt.exception.SpecException;
import com.bazaarvoice.jolt.removr.spec.RemovrCompositeSpec;
import javax.inject.Inject;
import java.util.HashMap;
import java.util.Map;

/**
 * Removr is a kind of JOLT transform that removes content from the input JSON.
 * 

* For comparision : * Shitr walks the input data and asks its spec "Where should this go?" * Defaultr walks the spec and asks "Does this exist in the data? If not, add it." * * While, Removr walks the spec and asks "if this exists, remove it." *

* Example : Given input JSON like *

 * {
 *   "~emVersion" : "2",
 *   "id":"123124",
 *   "productId":"31231231",
 *   "submissionId":"34343",
 *   "this" : "stays",
 *   "configured" : {
 *     "a" : "b",
 *     "c" : "d"
 *   }
 * }
 * 
* With the desired output being : *
 * {
 *   "id":"123124",
 *   "this" : "stays",
 *
 *   "configured" : {
 *     "a" : "b"
 *   }
 * }
 * 
* This is what the Removr Spec would look like *
 * {
 *   "~emVersion" : "",
 *   "productId":"",
 *   "submissionId":"",
 *
 *   "configured" : {
 *     "c" : ""
 *   }
 * }
 * 
* * * Removr Wildcards * * '*' Wildcard * Valid only on the LHS ( input JSON keys ) side of a Removr Spec * The '*' wildcard can be used by itself or to match part of a key. * * '*' wildcard by itself : * To remove "all" keys under an input, use the * by itself on the LHS. *
 *    // example input
 *    {
 *     "ratings":{
 *        "Set1":{
 *           "a":"a",
 *           "b":"b"
 *        },
 *        "Set2":{
 *            "c":"c",
 *            "b":"b"
 *        }
 *      },
 *    }
 *    //desired output
 *     {
 *     "ratings":{
 *        "Set1":{
 *           "a":"a"
 *        },
 *        "Set2":{
 *            "c":"c"
 *        }
 *      },
 *    }
 *
 *    //Spec would be
 *    {
 *     "ratings":{
 *        "*":{
 *          "b":""
 *        },
 *      },
 *    }
 *    
* In this example, "Set1" and "Set2" under rating both have the same structure, and thus we can use the '*' * to allow use to write more compact rules to remove "b" from all children under ratings. This is especially useful when we don't know * how many children will be under ratings, but we would like to nuke certain part of it across. * * '*' wildcard as part of a key : * This is useful for working with input JSON with keys that are "prefixed". * Ex : if you had an input document like *
 *        {
 *         "ratings_legacy":{
 *              "Set1":{
 *                  "a":"a",
 *                  "b":"b"
 *                },
 *              "Set2":{
 *                  "a":"a",
 *                   "b":"b"
 *               }
 *           }
 *
 *         "ratings_new":{
 *               "Set1":{
 *                   "a":"a",
 *                   "b":"b"
 *               },
 *               "Set2":{
 *                   "a":"a",
 *                   "b":"b"
 *               }
 *          }
 *       }
 *    
* * A 'rating_*' would match both keys. As in Shiftr wildcard matching, * wildcard is as non greedy as possible, which enable us to give more than one * in key. * * For an ouput that removed Set1 from all ratings_* key, the spec would be, *
 *        {
 *         "ratings_*":{
 *              "Set1":""
 *       }
 *    
* * *

* The Spec file format for Removr is a tree Map objects. * The "Right hand side" of the of each entry is ignored/irrelevant unless it is a map, * in which case Removr will recursively walk down the tree. *

*/ public class Removr implements SpecDriven, Transform { private final Map spec; private static final String ROOT_KEY = "root"; private final RemovrCompositeSpec rootSpec; @Inject public Removr( Object spec ) { if ( spec == null ){ throw new SpecException( "Removr expected a spec of Map type, got 'null'." ); } if ( ! ( spec instanceof Map ) ) { throw new SpecException( "Removr expected a spec of Map type, got " + spec.getClass().getSimpleName() ); } rootSpec = new RemovrCompositeSpec( ROOT_KEY, (Map) spec ); this.spec = (Map) spec; } /** * Recursively removes data from the input JSON. * * @param input the JSON object to transform in plain vanilla Jackson Map style */ @Override public Object transform( Object input ) { return transformInternal(input); } private Object transformInternal(Object input) { // Wrap the input in a map to fool the compositespec to recurse itself. Map wrappedMap = new HashMap(); wrappedMap.put(ROOT_KEY, input); rootSpec.remove(wrappedMap); return input; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy