Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
org.openmetadata.service.search.SearchIndexUtils Maven / Gradle / Ivy
package org.openmetadata.service.search;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
import javax.json.JsonArray;
import javax.json.JsonObject;
import org.openmetadata.schema.tests.DataQualityReport;
import org.openmetadata.schema.tests.Datum;
import org.openmetadata.schema.tests.type.DataQualityReportMetadata;
import org.openmetadata.schema.type.EntityReference;
import org.openmetadata.schema.type.TagLabel;
public final class SearchIndexUtils {
private SearchIndexUtils() {}
public static List parseFollowers(List followersRef) {
if (followersRef == null) {
return Collections.emptyList();
}
return followersRef.stream().map(item -> item.getId().toString()).toList();
}
public static List parseOwners(List ownersRef) {
if (ownersRef == null) {
return Collections.emptyList();
}
return ownersRef.stream().map(item -> item.getId().toString()).toList();
}
public static void removeNonIndexableFields(Map doc, Set fields) {
for (String key : fields) {
if (key.contains(".")) {
removeFieldByPath(doc, key);
} else {
doc.remove(key);
}
}
}
public static void removeFieldByPath(Map jsonMap, String path) {
String[] pathElements = path.split("\\.");
Map currentMap = jsonMap;
String key = pathElements[0];
Object value = currentMap.get(key);
if (value instanceof Map) {
currentMap = (Map) value;
} else if (value instanceof List) {
List> list = (List>) value;
for (Map item : list) {
removeFieldByPath(
item,
Arrays.stream(pathElements, 1, pathElements.length).collect(Collectors.joining(".")));
}
return;
} else {
return;
}
// Remove the field at the last path element
String lastKey = pathElements[pathElements.length - 1];
currentMap.remove(lastKey);
}
private static void handleLeafTermsAggregation(
JsonObject aggregationResults, List reportData, Map nodeData) {
Optional docCount = Optional.ofNullable(aggregationResults.get("doc_count").toString());
docCount.ifPresentOrElse(
s -> nodeData.put("document_count", s), () -> nodeData.put("document_count", null));
Datum datum = new Datum();
for (Map.Entry entry : nodeData.entrySet()) {
datum.withAdditionalProperty(entry.getKey(), entry.getValue());
}
reportData.add(datum);
}
private static void handleLeafMetricsAggregation(
JsonObject aggregationResults,
List reportData,
Map nodeData,
String metric) {
Optional val = Optional.ofNullable(aggregationResults.getString("value_as_string"));
val.ifPresentOrElse(s -> nodeData.put(metric, s), () -> nodeData.put(metric, null));
Datum datum = new Datum();
for (Map.Entry entry : nodeData.entrySet()) {
datum.withAdditionalProperty(entry.getKey(), entry.getValue());
}
reportData.add(datum);
}
/*
* Get the metadata for the aggregation results. We'll use the metadata to build the report and
* to traverse the aggregation tree. 3 types of metadata are returned:
* 1. dimensions: the list of dimensions
* 2. metrics: the list of metrics
* 3. keys: the list of keys to traverse the aggregation tree
*
* @param aggregationMapList the list of aggregations
* @return the metadata
*/
private static DataQualityReportMetadata getAggregationMetadata(
List>> aggregationMapList) {
DataQualityReportMetadata metadata = new DataQualityReportMetadata();
List dimensions = new ArrayList<>();
List metrics = new ArrayList<>();
List keys = new ArrayList<>();
for (List> aggregationsMap : aggregationMapList) {
for (int j = 0; j < aggregationsMap.size(); j++) {
Map aggregationMap = aggregationsMap.get(j);
String aggType = aggregationMap.get("aggType");
String field = aggregationMap.get("field");
boolean isLeaf = j == aggregationsMap.size() - 1;
if (isLeaf) {
// leaf aggregation
if (!aggType.contains("term")) {
metrics.add(field);
} else {
dimensions.add(field);
metrics.add("document_count");
}
} else {
dimensions.add(field);
}
String formattedAggType = aggType.contains("term") ? "s%s".formatted(aggType) : aggType;
keys.add("%s#%s".formatted(formattedAggType, aggregationMap.get("bucketName")));
}
}
metadata.withKeys(keys).withDimensions(dimensions).withMetrics(metrics);
return metadata;
}
/*
* Traverse the aggregation results and build the report data. Note that the method supports
* n levels of nested aggregations, but does not support sibling aggregations.
*
* @Param aggregationResults the aggregation results
* @Param reportData the report data
* @Param nodeData the node data
* @Param keys the keys to traverse the aggregation tree
* @Param metric the metric to add to the report data
* @Param dimensions the dimensions to add to the report data
* @return the report data
*/
private static void traverseAggregationResults(
JsonObject aggregationResults,
List reportData,
Map nodeData,
List keys,
String metric,
List dimensions) {
if (keys.isEmpty()) {
// We are in the leaf of the term aggregation. We'll add the count of documents as the metric
handleLeafTermsAggregation(aggregationResults, reportData, nodeData);
return;
}
String currentKey =
keys.get(0); // The current key represent the node in the aggregation tree (i.e. the current
// bucket)
Optional aggregation =
Optional.ofNullable(SearchClient.getAggregationObject(aggregationResults, currentKey));
aggregation.ifPresent(
agg -> {
Optional buckets =
Optional.ofNullable(SearchClient.getAggregationBuckets(agg));
if (buckets.isEmpty()) {
// If the current node in the aggregation tree does not have further bucket
// it means we are in the leaf of the metric aggregation. We'll add the metric
handleLeafMetricsAggregation(agg, reportData, nodeData, metric);
} else {
buckets
.get()
.forEach(
bucket -> {
JsonObject bucketObject = (JsonObject) bucket;
Optional bucketKey = Optional.of(bucketObject.getString("key"));
bucketKey.ifPresentOrElse(
s -> nodeData.put(dimensions.get(0), s),
() -> nodeData.put(dimensions.get(0), null));
// Traverse the next level of the aggregation tree.
// Dimensions and keys represent the same level in the tree.
// They are used for different purpose (i.e. dimensions are used to
// generate the report while the keys are used to traverse the aggregation
// tree)
traverseAggregationResults(
bucketObject,
reportData,
nodeData,
keys.subList(1, keys.size()),
metric,
dimensions.subList(1, dimensions.size()));
});
}
});
}
public static DataQualityReport parseAggregationResults(
Optional aggregationResults, List>> aggregationMapList) {
DataQualityReportMetadata metadata = getAggregationMetadata(aggregationMapList);
List reportData = new ArrayList<>();
aggregationResults.ifPresent(
jsonObject ->
traverseAggregationResults(
jsonObject,
reportData,
new HashMap<>(),
metadata.getKeys(),
metadata.getMetrics().get(0),
metadata.getDimensions()));
DataQualityReport report = new DataQualityReport();
return report.withMetadata(metadata).withData(reportData);
}
/*
* Build the aggregation string for the given aggregation
*
* @param aggregation the aggregation to build the string for.
* The aggregation string is in the form
* `bucketName:aggType:key=value,bucketName:aggType:key=value;bucketName:aggType:key=value`
* where `,` represents a nested aggregation and `;` represents a sibling aggregation
* NOTE: As of 07/25/2024 sibling aggregation parsing and processing has not been added
* @return the aggregation string
*/
public static Map buildAggregationString(String aggregation) {
Map metadata = new HashMap<>();
StringBuilder aggregationString = new StringBuilder();
String[] siblings = aggregation.split(";");
List>> aggregationsMapList = new ArrayList<>();
for (String sibling : siblings) {
List> aggregationsMap = new ArrayList<>();
String[] nested = sibling.split(",");
for (int i = 0; i < nested.length; i++) {
Map aggregationMap = new HashMap<>();
String[] parts = nested[i].split(":");
for (int j = 0; j < parts.length; j++) {
String part = parts[j];
String[] kvPairs = part.split("=");
if (kvPairs[0].equals("field")) {
aggregationString
.append("\"")
.append(kvPairs[0])
.append("\":\"")
.append(kvPairs[1])
.append("\"");
aggregationString.append("}");
} else {
aggregationString.append("\"").append(kvPairs[1]).append("\":{");
}
aggregationMap.put(kvPairs[0], kvPairs[1]);
}
if (i < nested.length - 1) {
aggregationString.append(",\"aggs\":{");
}
aggregationsMap.add(aggregationMap);
}
// nested aggregations will add the "aggs" key if nested.length > 1, hence *2
aggregationString.append("}".repeat(((nested.length - 1) * 2) + 1));
aggregationsMapList.add(aggregationsMap);
}
metadata.put("aggregationStr", aggregationString.toString());
metadata.put("aggregationMapList", aggregationsMapList);
return metadata;
}
public static List parseTags(List tags) {
if (tags == null) {
return Collections.emptyList();
}
return tags;
}
}