All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.calcite.adapter.elasticsearch.ElasticsearchJson Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to you under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.calcite.adapter.elasticsearch;

import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.DeserializationContext;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
import com.fasterxml.jackson.databind.deser.std.StdDeserializer;
import com.fasterxml.jackson.databind.node.ArrayNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.google.common.collect.ImmutableSet;

import java.io.IOException;
import java.time.Duration;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Deque;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.function.BiConsumer;
import java.util.function.Consumer;
import java.util.function.Predicate;
import java.util.stream.StreamSupport;

import static java.util.Collections.unmodifiableMap;

/**
 * Internal objects (and deserializers) used to parse Elasticsearch results
 * (which are in JSON format).
 *
 * 

Since we're using basic row-level rest client http response has to be * processed manually using JSON (jackson) library. */ final class ElasticsearchJson { private ElasticsearchJson() {} /** * Visits leaves of the aggregation where all values are stored. */ static void visitValueNodes(Aggregations aggregations, Consumer> consumer) { Objects.requireNonNull(aggregations, "aggregations"); Objects.requireNonNull(consumer, "consumer"); Map> rows = new LinkedHashMap<>(); BiConsumer cons = (r, v) -> rows.computeIfAbsent(r, ignore -> new ArrayList<>()).add(v); aggregations.forEach(a -> visitValueNodes(a, new ArrayList<>(), cons)); rows.forEach((k, v) -> { if (v.stream().allMatch(val -> val instanceof GroupValue)) { v.forEach(tuple -> { Map groupRow = new LinkedHashMap<>(k.keys); groupRow.put(tuple.getName(), tuple.value()); consumer.accept(groupRow); }); } else { Map row = new LinkedHashMap<>(k.keys); v.forEach(val -> row.put(val.getName(), val.value())); consumer.accept(row); } }); } /** * Visits Elasticsearch * mapping * properties and calls consumer for each {@code field / type} pair. * Nested fields are represented as {@code foo.bar.qux}. */ static void visitMappingProperties(ObjectNode mapping, BiConsumer consumer) { Objects.requireNonNull(mapping, "mapping"); Objects.requireNonNull(consumer, "consumer"); visitMappingProperties(new ArrayDeque<>(), mapping, consumer); } private static void visitMappingProperties(Deque path, ObjectNode mapping, BiConsumer consumer) { Objects.requireNonNull(mapping, "mapping"); if (mapping.isMissingNode()) { return; } // check if we have reached actual field mapping (leaf of JSON tree) Predicate isLeaf = node -> node.path("type").isValueNode(); if (mapping.path("properties").isObject() && !isLeaf.test(mapping.path("properties"))) { // recurse visitMappingProperties(path, (ObjectNode) mapping.get("properties"), consumer); return; } if (isLeaf.test(mapping)) { // this is leaf (register field / type mapping) consumer.accept(String.join(".", path), mapping.get("type").asText()); return; } // otherwise continue visiting mapping(s) Iterable> iter = mapping::fields; for (Map.Entry entry : iter) { final String name = entry.getKey(); final ObjectNode node = (ObjectNode) entry.getValue(); path.add(name); visitMappingProperties(path, node, consumer); path.removeLast(); } } /** * Identifies a Calcite row (as in relational algebra). */ private static class RowKey { private final Map keys; private final int hashCode; private RowKey(final Map keys) { this.keys = Objects.requireNonNull(keys, "keys"); this.hashCode = Objects.hashCode(keys); } private RowKey(List buckets) { this(toMap(buckets)); } private static Map toMap(Iterable buckets) { return StreamSupport.stream(buckets.spliterator(), false) .collect(LinkedHashMap::new, (m, v) -> m.put(v.getName(), v.key()), LinkedHashMap::putAll); } @Override public boolean equals(final Object o) { if (this == o) { return true; } if (o == null || getClass() != o.getClass()) { return false; } final RowKey rowKey = (RowKey) o; return hashCode == rowKey.hashCode && Objects.equals(keys, rowKey.keys); } @Override public int hashCode() { return this.hashCode; } } private static void visitValueNodes(Aggregation aggregation, List parents, BiConsumer consumer) { if (aggregation instanceof MultiValue) { // this is a leaf. publish value of the row. RowKey key = new RowKey(parents); consumer.accept(key, (MultiValue) aggregation); return; } if (aggregation instanceof Bucket) { Bucket bucket = (Bucket) aggregation; if (bucket.hasNoAggregations()) { // bucket with no aggregations is also considered a leaf node visitValueNodes(GroupValue.of(bucket.getName(), bucket.key()), parents, consumer); return; } parents.add(bucket); bucket.getAggregations().forEach(a -> visitValueNodes(a, parents, consumer)); parents.remove(parents.size() - 1); } else if (aggregation instanceof HasAggregations) { HasAggregations children = (HasAggregations) aggregation; children.getAggregations().forEach(a -> visitValueNodes(a, parents, consumer)); } else if (aggregation instanceof MultiBucketsAggregation) { MultiBucketsAggregation multi = (MultiBucketsAggregation) aggregation; multi.buckets().forEach(b -> visitValueNodes(b, parents, consumer)); } } /** * Response from Elastic. */ @JsonIgnoreProperties(ignoreUnknown = true) static class Result { private final SearchHits hits; private final Aggregations aggregations; private final String scrollId; private final long took; /** * Constructor for this instance. * @param hits list of matched documents * @param took time taken (in took) for this query to execute */ @JsonCreator Result(@JsonProperty("hits") SearchHits hits, @JsonProperty("aggregations") Aggregations aggregations, @JsonProperty("_scroll_id") String scrollId, @JsonProperty("took") long took) { this.hits = Objects.requireNonNull(hits, "hits"); this.aggregations = aggregations; this.scrollId = scrollId; this.took = took; } SearchHits searchHits() { return hits; } Aggregations aggregations() { return aggregations; } Duration took() { return Duration.ofMillis(took); } Optional scrollId() { return Optional.ofNullable(scrollId); } } /** * Similar to {@code SearchHits} in ES. Container for {@link SearchHit} */ @JsonIgnoreProperties(ignoreUnknown = true) static class SearchHits { private final SearchTotal total; private final List hits; @JsonCreator SearchHits(@JsonProperty("total")final SearchTotal total, @JsonProperty("hits") final List hits) { this.total = total; this.hits = Objects.requireNonNull(hits, "hits"); } public List hits() { return this.hits; } public SearchTotal total() { return total; } } /** * Container for total hits. */ @JsonDeserialize(using = SearchTotalDeserializer.class) static class SearchTotal { private final long value; SearchTotal(final long value) { this.value = value; } public long value() { return value; } } /** * Allows to de-serialize total hits structures. */ static class SearchTotalDeserializer extends StdDeserializer { SearchTotalDeserializer() { super(SearchTotal.class); } @Override public SearchTotal deserialize(final JsonParser parser, final DeserializationContext ctxt) throws IOException { JsonNode node = parser.getCodec().readTree(parser); return parseSearchTotal(node); } private static SearchTotal parseSearchTotal(JsonNode node) { final Number value; if (node.isNumber()) { value = node.numberValue(); } else { value = node.get("value").numberValue(); } return new SearchTotal(value.longValue()); } } /** * Concrete result record which matched the query. Similar to {@code SearchHit} in ES. */ @JsonIgnoreProperties(ignoreUnknown = true) static class SearchHit { /** * ID of the document (not available in aggregations). */ private final String id; private final Map source; private final Map fields; @JsonCreator SearchHit(@JsonProperty(ElasticsearchConstants.ID) final String id, @JsonProperty("_source") final Map source, @JsonProperty("fields") final Map fields) { this.id = Objects.requireNonNull(id, "id"); // both can't be null if (source == null && fields == null) { final String message = String.format(Locale.ROOT, "Both '_source' and 'fields' are missing for %s", id); throw new IllegalArgumentException(message); } // both can't be non-null if (source != null && fields != null) { final String message = String.format(Locale.ROOT, "Both '_source' and 'fields' are populated (non-null) for %s", id); throw new IllegalArgumentException(message); } this.source = source; this.fields = fields; } /** * Returns id of this hit (usually document id). * * @return unique id */ public String id() { return id; } Object valueOrNull(String name) { Objects.requireNonNull(name, "name"); // for "select *" return whole document if (ElasticsearchConstants.isSelectAll(name)) { return sourceOrFields(); } if (fields != null && fields.containsKey(name)) { Object field = fields.get(name); if (field instanceof Iterable) { // return first element (or null) Iterator iter = ((Iterable) field).iterator(); return iter.hasNext() ? iter.next() : null; } return field; } return valueFromPath(source, name); } /** * Returns property from nested maps given a path like {@code a.b.c}. * @param map current map * @param path field path(s), optionally with dots ({@code a.b.c}). * @return value located at path {@code path} or {@code null} if not found. */ private static Object valueFromPath(Map map, String path) { if (map == null) { return null; } if (map.containsKey(path)) { return map.get(path); } // maybe pattern of type a.b.c final int index = path.indexOf('.'); if (index == -1) { return null; } final String prefix = path.substring(0, index); final String suffix = path.substring(index + 1); Object maybeMap = map.get(prefix); if (maybeMap instanceof Map) { return valueFromPath((Map) maybeMap, suffix); } return null; } Map source() { return source; } Map fields() { return fields; } Map sourceOrFields() { return source != null ? source : fields; } } /** * {@link Aggregation} container. */ @JsonDeserialize(using = AggregationsDeserializer.class) static class Aggregations implements Iterable { private final List aggregations; private Map aggregationsAsMap; Aggregations(List aggregations) { this.aggregations = Objects.requireNonNull(aggregations, "aggregations"); } /** * Iterates over the {@link Aggregation}s. */ @Override public final Iterator iterator() { return asList().iterator(); } /** * The list of {@link Aggregation}s. */ final List asList() { return Collections.unmodifiableList(aggregations); } /** * Returns the {@link Aggregation}s keyed by aggregation name. Lazy init. */ final Map asMap() { if (aggregationsAsMap == null) { Map map = new LinkedHashMap<>(aggregations.size()); for (Aggregation aggregation : aggregations) { map.put(aggregation.getName(), aggregation); } this.aggregationsAsMap = unmodifiableMap(map); } return aggregationsAsMap; } /** * Returns the aggregation that is associated with the specified name. */ @SuppressWarnings("unchecked") public final A get(String name) { return (A) asMap().get(name); } @Override public final boolean equals(Object obj) { if (obj == null || getClass() != obj.getClass()) { return false; } return aggregations.equals(((Aggregations) obj).aggregations); } @Override public final int hashCode() { return Objects.hash(getClass(), aggregations); } } /** * Identifies all aggregations. */ interface Aggregation { /** * Returns the name of this aggregation. */ String getName(); } /** * Allows traversing aggregations tree. */ interface HasAggregations { Aggregations getAggregations(); } /** * An aggregation that returns multiple buckets. */ static class MultiBucketsAggregation implements Aggregation { private final String name; private final List buckets; MultiBucketsAggregation(final String name, final List buckets) { this.name = name; this.buckets = buckets; } /** * Returns the buckets of this aggregation. */ List buckets() { return buckets; } @Override public String getName() { return name; } } /** * A bucket represents a criteria to which all documents that fall in it adhere to. * It is also uniquely identified * by a key, and can potentially hold sub-aggregations computed over all documents in it. */ static class Bucket implements HasAggregations, Aggregation { private final Object key; private final String name; private final Aggregations aggregations; Bucket(final Object key, final String name, final Aggregations aggregations) { this.key = key; // key can be set after construction this.name = Objects.requireNonNull(name, "name"); this.aggregations = Objects.requireNonNull(aggregations, "aggregations"); } /** * Returns the key associated with the bucket. */ Object key() { return key; } /** * Returns the key associated with the bucket as a string. */ String keyAsString() { return Objects.toString(key()); } /** * Means current bucket has no aggregations. */ boolean hasNoAggregations() { return aggregations.asList().isEmpty(); } /** * Returns the sub-aggregations of this bucket. */ @Override public Aggregations getAggregations() { return aggregations; } @Override public String getName() { return name; } } /** * Multi-value aggregation, like * Stats. */ static class MultiValue implements Aggregation { private final String name; private final Map values; MultiValue(final String name, final Map values) { this.name = Objects.requireNonNull(name, "name"); this.values = Objects.requireNonNull(values, "values"); } @Override public String getName() { return name; } Map values() { return values; } /** * For single value. Returns single value represented by this leaf aggregation. * @return value corresponding to {@code value} */ Object value() { if (!values().containsKey("value")) { String message = String.format(Locale.ROOT, "'value' field not present in " + "%s aggregation", getName()); throw new IllegalStateException(message); } return values().get("value"); } } /** * Distinguishes from {@link MultiValue}. * In order that rows which have the same key can be put into result map. */ static class GroupValue extends MultiValue { GroupValue(String name, Map values) { super(name, values); } /** * Constructs a {@link GroupValue} instance with a single value. */ static GroupValue of(String name, Object value) { return new GroupValue(name, Collections.singletonMap("value", value)); } } /** * Allows to de-serialize nested aggregation structures. */ static class AggregationsDeserializer extends StdDeserializer { private static final Set IGNORE_TOKENS = ImmutableSet.of("meta", "buckets", "value", "values", "value_as_string", "doc_count", "key", "key_as_string"); AggregationsDeserializer() { super(Aggregations.class); } @Override public Aggregations deserialize(final JsonParser parser, final DeserializationContext ctxt) throws IOException { ObjectNode node = parser.getCodec().readTree(parser); return parseAggregations(parser, node); } private static Aggregations parseAggregations(JsonParser parser, ObjectNode node) throws JsonProcessingException { List aggregations = new ArrayList<>(); Iterable> iter = node::fields; for (Map.Entry entry : iter) { final String name = entry.getKey(); final JsonNode value = entry.getValue(); Aggregation agg = null; if (value.has("buckets")) { agg = parseBuckets(parser, name, (ArrayNode) value.get("buckets")); } else if (value.isObject() && !IGNORE_TOKENS.contains(name)) { // leaf agg = parseValue(parser, name, (ObjectNode) value); } if (agg != null) { aggregations.add(agg); } } return new Aggregations(aggregations); } private static MultiValue parseValue(JsonParser parser, String name, ObjectNode node) throws JsonProcessingException { return new MultiValue(name, parser.getCodec().treeToValue(node, Map.class)); } private static Aggregation parseBuckets(JsonParser parser, String name, ArrayNode nodes) throws JsonProcessingException { List buckets = new ArrayList<>(nodes.size()); for (JsonNode b: nodes) { buckets.add(parseBucket(parser, name, (ObjectNode) b)); } return new MultiBucketsAggregation(name, buckets); } /** * Determines if current key is a missing field key. Missing key is returned when document * does not have pivoting attribute (example {@code GROUP BY _MAP['a.b.missing']}). It helps * grouping documents which don't have a field. In relational algebra this * would normally be {@code null}. * *

Please note that missing value is different for each type. * * @param key current {@code key} (usually string) as returned by ES * @return {@code true} if this value */ private static boolean isMissingBucket(JsonNode key) { return ElasticsearchMapping.Datatype.isMissingValue(key); } private static Bucket parseBucket(JsonParser parser, String name, ObjectNode node) throws JsonProcessingException { if (!node.has("key")) { throw new IllegalArgumentException("No 'key' attribute for " + node); } final JsonNode keyNode = node.get("key"); final Object key; if (isMissingBucket(keyNode) || keyNode.isNull()) { key = null; } else if (keyNode.isTextual()) { key = keyNode.textValue(); } else if (keyNode.isNumber()) { key = keyNode.numberValue(); } else if (keyNode.isBoolean()) { key = keyNode.booleanValue(); } else { // don't usually expect keys to be Objects key = parser.getCodec().treeToValue(node, Map.class); } return new Bucket(key, name, parseAggregations(parser, node)); } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy