All Downloads are FREE. Search and download functionalities are using the official Maven repository.
Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
org.apache.hadoop.hive.ql.udf.UDFJson Maven / Gradle / Ivy
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.udf;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import com.google.common.collect.Iterators;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.io.Text;
import org.codehaus.jackson.JsonFactory;
import org.codehaus.jackson.JsonParser.Feature;
import org.codehaus.jackson.map.ObjectMapper;
import org.codehaus.jackson.map.type.TypeFactory;
import org.codehaus.jackson.type.JavaType;
/**
* UDFJson.
*
*/
@Description(name = "get_json_object",
value = "_FUNC_(json_txt, path) - Extract a json object from path ",
extended = "Extract json object from a json string based on json path "
+ "specified, and return json string of the extracted json object. It "
+ "will return null if the input json string is invalid.\n"
+ "A limited version of JSONPath supported:\n"
+ " $ : Root object\n"
+ " . : Child operator\n"
+ " [] : Subscript operator for array\n"
+ " * : Wildcard for []\n"
+ "Syntax not supported that's worth noticing:\n"
+ " '' : Zero length string as key\n"
+ " .. : Recursive descent\n"
+ " @ : Current object/element\n"
+ " () : Script expression\n"
+ " ?() : Filter (script) expression.\n"
+ " [,] : Union operator\n"
+ " [start:end:step] : array slice operator\n")
public class UDFJson extends UDF {
private static final Pattern patternKey = Pattern.compile("^([a-zA-Z0-9_\\-\\:\\s]+).*");
private static final Pattern patternIndex = Pattern.compile("\\[([0-9]+|\\*)\\]");
private static final JavaType MAP_TYPE = TypeFactory.fromClass(Map.class);
private static final JavaType LIST_TYPE = TypeFactory.fromClass(List.class);
private final JsonFactory jsonFactory = new JsonFactory();
private final ObjectMapper objectMapper = new ObjectMapper(jsonFactory);
// An LRU cache using a linked hash map
static class HashCache extends LinkedHashMap {
private static final int CACHE_SIZE = 16;
private static final int INIT_SIZE = 32;
private static final float LOAD_FACTOR = 0.6f;
HashCache() {
super(INIT_SIZE, LOAD_FACTOR);
}
private static final long serialVersionUID = 1;
@Override
protected boolean removeEldestEntry(Map.Entry eldest) {
return size() > CACHE_SIZE;
}
}
Map extractObjectCache = new HashCache();
Map pathExprCache = new HashCache();
Map> indexListCache =
new HashCache>();
Map mKeyGroup1Cache = new HashCache();
Map mKeyMatchesCache = new HashCache();
public UDFJson() {
// Allows for unescaped ASCII control characters in JSON values
jsonFactory.enable(Feature.ALLOW_UNQUOTED_CONTROL_CHARS);
// Enabled to accept quoting of all character backslash qooting mechanism
jsonFactory.enable(Feature.ALLOW_BACKSLASH_ESCAPING_ANY_CHARACTER);
}
/**
* Extract json object from a json string based on json path specified, and
* return json string of the extracted json object. It will return null if the
* input json string is invalid.
*
* A limited version of JSONPath supported: $ : Root object . : Child operator
* [] : Subscript operator for array * : Wildcard for []
*
* Syntax not supported that's worth noticing: '' : Zero length string as key
* .. : Recursive descent @ : Current object/element () : Script
* expression ?() : Filter (script) expression. [,] : Union operator
* [start:end:step] : array slice operator
*
* @param jsonString
* the json string.
* @param pathString
* the json path expression.
* @return json string or null when an error happens.
*/
public Text evaluate(String jsonString, String pathString) {
if (jsonString == null || jsonString.isEmpty() || pathString == null
|| pathString.isEmpty() || pathString.charAt(0) != '$') {
return null;
}
int pathExprStart = 1;
boolean unknownType = pathString.equals("$");
boolean isRootArray = false;
if (pathString.length() > 1) {
if (pathString.charAt(1) == '[') {
pathExprStart = 0;
isRootArray = true;
} else if (pathString.charAt(1) == '.') {
isRootArray = pathString.length() > 2 && pathString.charAt(2) == '[';
} else {
return null;
}
}
// Cache pathExpr
String[] pathExpr = pathExprCache.get(pathString);
if (pathExpr == null) {
pathExpr = pathString.split("\\.", -1);
pathExprCache.put(pathString, pathExpr);
}
// Cache extractObject
Object extractObject = extractObjectCache.get(jsonString);
if (extractObject == null) {
if (unknownType) {
try {
extractObject = objectMapper.readValue(jsonString, LIST_TYPE);
} catch (Exception e) {
// Ignore exception
}
if (extractObject == null) {
try {
extractObject = objectMapper.readValue(jsonString, MAP_TYPE);
} catch (Exception e) {
return null;
}
}
} else {
JavaType javaType = isRootArray ? LIST_TYPE : MAP_TYPE;
try {
extractObject = objectMapper.readValue(jsonString, javaType);
} catch (Exception e) {
return null;
}
}
extractObjectCache.put(jsonString, extractObject);
}
for (int i = pathExprStart; i < pathExpr.length; i++) {
if (extractObject == null) {
return null;
}
extractObject = extract(extractObject, pathExpr[i], i == pathExprStart && isRootArray);
}
Text result = new Text();
if (extractObject instanceof Map || extractObject instanceof List) {
try {
result.set(objectMapper.writeValueAsString(extractObject));
} catch (Exception e) {
return null;
}
} else if (extractObject != null) {
result.set(extractObject.toString());
} else {
return null;
}
return result;
}
private Object extract(Object json, String path, boolean skipMapProc) {
// skip MAP processing for the first path element if root is array
if (!skipMapProc) {
// Cache patternkey.matcher(path).matches()
Matcher mKey = null;
Boolean mKeyMatches = mKeyMatchesCache.get(path);
if (mKeyMatches == null) {
mKey = patternKey.matcher(path);
mKeyMatches = mKey.matches() ? Boolean.TRUE : Boolean.FALSE;
mKeyMatchesCache.put(path, mKeyMatches);
}
if (!mKeyMatches.booleanValue()) {
return null;
}
// Cache mkey.group(1)
String mKeyGroup1 = mKeyGroup1Cache.get(path);
if (mKeyGroup1 == null) {
if (mKey == null) {
mKey = patternKey.matcher(path);
mKeyMatches = mKey.matches() ? Boolean.TRUE : Boolean.FALSE;
mKeyMatchesCache.put(path, mKeyMatches);
if (!mKeyMatches.booleanValue()) {
return null;
}
}
mKeyGroup1 = mKey.group(1);
mKeyGroup1Cache.put(path, mKeyGroup1);
}
json = extract_json_withkey(json, mKeyGroup1);
}
// Cache indexList
ArrayList indexList = indexListCache.get(path);
if (indexList == null) {
Matcher mIndex = patternIndex.matcher(path);
indexList = new ArrayList();
while (mIndex.find()) {
indexList.add(mIndex.group(1));
}
indexListCache.put(path, indexList);
}
if (indexList.size() > 0) {
json = extract_json_withindex(json, indexList);
}
return json;
}
private transient AddingList jsonList = new AddingList();
private static class AddingList extends ArrayList {
private static final long serialVersionUID = 1L;
@Override
public Iterator iterator() {
return Iterators.forArray(toArray());
}
@Override
public void removeRange(int fromIndex, int toIndex) {
super.removeRange(fromIndex, toIndex);
}
};
@SuppressWarnings("unchecked")
private Object extract_json_withindex(Object json, ArrayList indexList) {
jsonList.clear();
jsonList.add(json);
for (String index : indexList) {
int targets = jsonList.size();
if (index.equalsIgnoreCase("*")) {
for (Object array : jsonList) {
if (array instanceof List) {
for (int j = 0; j < ((List)array).size(); j++) {
jsonList.add(((List)array).get(j));
}
}
}
} else {
for (Object array : jsonList) {
int indexValue = Integer.parseInt(index);
if (!(array instanceof List)) {
continue;
}
List list = (List) array;
if (indexValue >= list.size()) {
continue;
}
jsonList.add(list.get(indexValue));
}
}
if (jsonList.size() == targets) {
return null;
}
jsonList.removeRange(0, targets);
}
if (jsonList.isEmpty()) {
return null;
}
return (jsonList.size() > 1) ? new ArrayList(jsonList) : jsonList.get(0);
}
@SuppressWarnings("unchecked")
private Object extract_json_withkey(Object json, String path) {
if (json instanceof List) {
List jsonArray = new ArrayList();
for (int i = 0; i < ((List) json).size(); i++) {
Object json_elem = ((List) json).get(i);
Object json_obj = null;
if (json_elem instanceof Map) {
json_obj = ((Map) json_elem).get(path);
} else {
continue;
}
if (json_obj instanceof List) {
for (int j = 0; j < ((List) json_obj).size(); j++) {
jsonArray.add(((List) json_obj).get(j));
}
} else if (json_obj != null) {
jsonArray.add(json_obj);
}
}
return (jsonArray.size() == 0) ? null : jsonArray;
} else if (json instanceof Map) {
return ((Map) json).get(path);
} else {
return null;
}
}
}