All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.trino.operator.scalar.JsonExtract Maven / Gradle / Ivy

/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.trino.operator.scalar;

import com.fasterxml.jackson.core.JsonFactory;
import com.fasterxml.jackson.core.JsonGenerator;
import com.fasterxml.jackson.core.JsonParseException;
import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.core.JsonToken;
import com.fasterxml.jackson.core.io.SerializedString;
import com.google.common.collect.ImmutableList;
import io.airlift.slice.DynamicSliceOutput;
import io.airlift.slice.Slice;
import io.trino.spi.TrinoException;

import java.io.IOException;
import java.io.UncheckedIOException;

import static com.fasterxml.jackson.core.JsonFactory.Feature.CANONICALIZE_FIELD_NAMES;
import static com.fasterxml.jackson.core.JsonToken.END_ARRAY;
import static com.fasterxml.jackson.core.JsonToken.END_OBJECT;
import static com.fasterxml.jackson.core.JsonToken.FIELD_NAME;
import static com.fasterxml.jackson.core.JsonToken.START_ARRAY;
import static com.fasterxml.jackson.core.JsonToken.START_OBJECT;
import static com.fasterxml.jackson.core.JsonToken.VALUE_NULL;
import static io.airlift.slice.Slices.utf8Slice;
import static io.trino.plugin.base.util.JsonUtils.jsonFactoryBuilder;
import static io.trino.spi.StandardErrorCode.INVALID_FUNCTION_ARGUMENT;
import static io.trino.util.JsonUtil.createJsonGenerator;
import static io.trino.util.JsonUtil.createJsonParser;
import static java.util.Objects.requireNonNull;

/**
 * Extracts values from JSON
 * 

* Supports the following JSON path primitives: *

 *    $ : Root object
 *    . or [] : Child operator
 *   [] : Subscript operator for array
 * 
*

* Supported JSON Path Examples: *

 *    { "store": {
 *        "book": [
 *          { "category": "reference",
 *            "author": "Nigel Rees",
 *            "title": "Sayings of the Century",
 *            "price": 8.95,
 *            "contributors": [["Adam", "Levine"], ["Bob", "Strong"]]
 *          },
 *          { "category": "fiction",
 *            "author": "Evelyn Waugh",
 *            "title": "Sword of Honour",
 *            "price": 12.99,
 *            "isbn": "0-553-21311-3",
 *            "last_owner": null
 *          }
 *        ],
 *        "bicycle": {
 *          "color": "red",
 *          "price": 19.95
 *        }
 *      }
 *    }
 * 
*

* With only scalar values using dot-notation of path: *

 *    $.store.book[0].author => Nigel Rees
 *    $.store.bicycle.price => 19.95
 *    $.store.book[0].isbn => NULL (Doesn't exist becomes java null)
 *    $.store.book[1].last_owner => NULL (json null becomes java null)
 *    $.store.book[0].contributors[0][1] => Levine
 * 
*

* With json values using dot-notation of path: *

 *    $.store.book[0].author => "Nigel Rees"
 *    $.store.bicycle.price => 19.95
 *    $.store.book[0].isbn => NULL (Doesn't exist becomes java null)
 *    $.store.book[1].last_owner => null (json null becomes the string "null")
 *    $.store.book[0].contributors[0] => ["Adam", "Levine"]
 *    $.store.bicycle => {"color": "red", "price": 19.95}
 * 
* With only scalar values using bracket-notation of path: *
 *    $["store"]["book"][0]["author"] => Nigel Rees
 *    $["store"]["bicycle"]["price"] => 19.95
 *    $["store"]["book"][0]["isbn"] => NULL (Doesn't exist becomes java null)
 *    $["store"]["book"][1]["last_owner"] => NULL (json null becomes java null)
 *    $["store"]["book"][0]["contributors"][0][1] => Levine
 * 
*

* With json values using bracket-notation of path: *

 *    $["store"]["book"][0]["author"] => "Nigel Rees"
 *    $["store"]["bicycle"]["price"] => 19.95
 *    $["store"]["book"][0]["isbn"] => NULL (Doesn't exist becomes java null)
 *    $["store"]["book"][1]["last_owner"] => null (json null becomes the string "null")
 *    $["store"]["book"][0]["contributors"][0] => ["Adam", "Levine"]
 *    $["store"]["bicycle"] => {"color": "red", "price": 19.95}
 * 
*/ public final class JsonExtract { private static final int ESTIMATED_JSON_OUTPUT_SIZE = 512; private static final JsonFactory JSON_FACTORY = jsonFactoryBuilder() .disable(CANONICALIZE_FIELD_NAMES) .build(); private JsonExtract() {} public static T extract(Slice jsonInput, JsonExtractor jsonExtractor) { requireNonNull(jsonInput, "jsonInput is null"); try (JsonParser jsonParser = createJsonParser(JSON_FACTORY, jsonInput)) { return extract(jsonParser, jsonExtractor); } catch (IOException e) { throw new UncheckedIOException(e); } } public static T extract(JsonParser jsonParser, JsonExtractor jsonExtractor) { requireNonNull(jsonParser, "jsonParser is null"); try { // Initialize by advancing to first token and make sure it exists if (jsonParser.nextToken() == null) { return null; } return jsonExtractor.extract(jsonParser); } catch (JsonParseException e) { // Return null if we failed to parse something return null; } catch (IOException e) { throw new UncheckedIOException(e); } } public static JsonExtractor generateExtractor(String path, JsonExtractor rootExtractor) { return generateExtractor(path, rootExtractor, false); } public static JsonExtractor generateExtractor(String path, JsonExtractor rootExtractor, boolean exceptionOnOutOfBounds) { ImmutableList tokens = ImmutableList.copyOf(new JsonPathTokenizer(path)); JsonExtractor jsonExtractor = rootExtractor; for (String token : tokens.reverse()) { jsonExtractor = new ObjectFieldJsonExtractor<>(token, jsonExtractor, exceptionOnOutOfBounds); } return jsonExtractor; } public interface JsonExtractor { /** * Executes the extraction on the existing content of the JsonParser and outputs the match. *

* Notes: *

    *
  • JsonParser must be on the FIRST token of the value to be processed when extract is called
  • *
  • INVARIANT: when extract() returns, the current token of the parser will be the LAST token of the value
  • *
* * @return the value, or null if not applicable */ T extract(JsonParser jsonParser) throws IOException; } public static class ObjectFieldJsonExtractor implements JsonExtractor { private final SerializedString fieldName; private final JsonExtractor delegate; private final int index; private final boolean exceptionOnOutOfBounds; public ObjectFieldJsonExtractor(String fieldName, JsonExtractor delegate) { this(fieldName, delegate, false); } public ObjectFieldJsonExtractor(String fieldName, JsonExtractor delegate, boolean exceptionOnOutOfBounds) { this.fieldName = new SerializedString(requireNonNull(fieldName, "fieldName is null")); this.delegate = requireNonNull(delegate, "delegate is null"); this.exceptionOnOutOfBounds = exceptionOnOutOfBounds; this.index = tryParseInt(fieldName, -1); } @Override public T extract(JsonParser jsonParser) throws IOException { if (jsonParser.getCurrentToken() == START_OBJECT) { return processJsonObject(jsonParser); } if (jsonParser.getCurrentToken() == START_ARRAY) { return processJsonArray(jsonParser); } throw new JsonParseException(jsonParser, "Expected a JSON object or array"); } public T processJsonObject(JsonParser jsonParser) throws IOException { while (!jsonParser.nextFieldName(fieldName)) { if (!jsonParser.hasCurrentToken()) { throw new JsonParseException(jsonParser, "Unexpected end of object"); } if (jsonParser.getCurrentToken() == END_OBJECT) { // Unable to find matching field return null; } jsonParser.skipChildren(); // Skip nested structure if currently at the start of one } jsonParser.nextToken(); // Shift to first token of the value return delegate.extract(jsonParser); } public T processJsonArray(JsonParser jsonParser) throws IOException { int currentIndex = 0; while (true) { JsonToken token = jsonParser.nextToken(); if (token == null) { throw new JsonParseException(jsonParser, "Unexpected end of array"); } if (token == END_ARRAY) { // Index out of bounds if (exceptionOnOutOfBounds) { throw new TrinoException(INVALID_FUNCTION_ARGUMENT, "Index out of bounds"); } return null; } if (currentIndex == index) { break; } currentIndex++; jsonParser.skipChildren(); // Skip nested structure if currently at the start of one } return delegate.extract(jsonParser); } } public static class ScalarValueJsonExtractor implements JsonExtractor { @Override public Slice extract(JsonParser jsonParser) throws IOException { JsonToken token = jsonParser.getCurrentToken(); if (token == null) { throw new JsonParseException(jsonParser, "Unexpected end of value"); } if (!token.isScalarValue() || token == VALUE_NULL) { return null; } return utf8Slice(jsonParser.getText()); } } public static class JsonValueJsonExtractor implements JsonExtractor { @Override public Slice extract(JsonParser jsonParser) throws IOException { if (!jsonParser.hasCurrentToken()) { throw new JsonParseException(jsonParser, "Unexpected end of value"); } DynamicSliceOutput dynamicSliceOutput = new DynamicSliceOutput(ESTIMATED_JSON_OUTPUT_SIZE); try (JsonGenerator jsonGenerator = createJsonGenerator(JSON_FACTORY, dynamicSliceOutput)) { jsonGenerator.copyCurrentStructure(jsonParser); } return dynamicSliceOutput.slice(); } } public static class JsonSizeExtractor implements JsonExtractor { @Override public Long extract(JsonParser jsonParser) throws IOException { if (!jsonParser.hasCurrentToken()) { throw new JsonParseException(jsonParser, "Unexpected end of value"); } if (jsonParser.getCurrentToken() == START_ARRAY) { long length = 0; while (true) { JsonToken token = jsonParser.nextToken(); if (token == null) { return null; } if (token == END_ARRAY) { return length; } jsonParser.skipChildren(); length++; } } if (jsonParser.getCurrentToken() == START_OBJECT) { long length = 0; while (true) { JsonToken token = jsonParser.nextToken(); if (token == null) { return null; } if (token == END_OBJECT) { return length; } if (token == FIELD_NAME) { length++; } else { jsonParser.skipChildren(); } } } return 0L; } } private static int tryParseInt(String fieldName, int defaultValue) { int index = defaultValue; try { index = Integer.parseInt(fieldName); } catch (NumberFormatException _) { } return index; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy