gov.nasa.pds.api.registry.search.RegistrySearchRequestBuilder Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of registry-api-service Show documentation
Show all versions of registry-api-service Show documentation
Registry API Service contributing to the PDS Federated Search API
package gov.nasa.pds.api.registry.search;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import gov.nasa.pds.api.registry.model.identifiers.PdsLidVid;
import gov.nasa.pds.api.registry.model.identifiers.PdsProductClasses;
import org.antlr.v4.runtime.BailErrorStrategy;
import org.antlr.v4.runtime.CharStreams;
import org.antlr.v4.runtime.CodePointCharStream;
import org.antlr.v4.runtime.CommonTokenStream;
import org.antlr.v4.runtime.tree.ParseTree;
import org.antlr.v4.runtime.tree.ParseTreeWalker;
import org.antlr.v4.runtime.RecognitionException;
import org.antlr.v4.runtime.misc.ParseCancellationException;
import org.opensearch.client.opensearch._types.FieldSort;
import org.opensearch.client.opensearch._types.FieldValue;
import org.opensearch.client.opensearch._types.SortOptions;
import org.opensearch.client.opensearch._types.SortOrder;
import org.opensearch.client.opensearch._types.query_dsl.BoolQuery;
import org.opensearch.client.opensearch._types.query_dsl.ExistsQuery;
import org.opensearch.client.opensearch._types.query_dsl.FieldAndFormat;
import org.opensearch.client.opensearch._types.query_dsl.MatchQuery;
import org.opensearch.client.opensearch._types.query_dsl.Query;
import org.opensearch.client.opensearch._types.query_dsl.TermsQuery;
import org.opensearch.client.opensearch._types.query_dsl.TermsQueryField;
import org.opensearch.client.opensearch.core.SearchRequest;
import org.opensearch.client.opensearch.core.search.SourceConfig;
import org.opensearch.client.opensearch.core.search.SourceFilter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import gov.nasa.pds.api.registry.ConnectionContext;
import gov.nasa.pds.api.registry.lexer.SearchLexer;
import gov.nasa.pds.api.registry.lexer.SearchParser;
import gov.nasa.pds.api.registry.model.Antlr4SearchListener;
import gov.nasa.pds.api.registry.model.EntityProduct;
import gov.nasa.pds.api.registry.model.SearchUtil;
import gov.nasa.pds.api.registry.model.exceptions.SortSearchAfterMismatchException;
import gov.nasa.pds.api.registry.model.exceptions.UnparsableQParamException;
import gov.nasa.pds.api.registry.model.identifiers.PdsProductIdentifier;
public class RegistrySearchRequestBuilder extends SearchRequest.Builder{
private static final Logger log = LoggerFactory.getLogger(RegistrySearchRequestBuilder.class);
private static final ArrayList STATIC_FIELDANDFORMATS =
new ArrayList() {
{
for (String prop : EntityProduct.JSON_PROPERTIES) {
add(new FieldAndFormat.Builder().field(prop).build());
}
}
};
private ConnectionContext connectionContext;
private List registryIndices;
private BoolQuery.Builder queryBuilder;
public RegistrySearchRequestBuilder(ConnectionContext connectionContext) {
// edunn TODO: Evaluate what can be taken out of the constructor
this.connectionContext = connectionContext;
this.registryIndices = this.connectionContext.getRegistryIndices();
log.info("Use indices: " + String.join(",", registryIndices) + "End indices");
this.index(registryIndices);
Query baseQuery = getMandatoryBaselineQuery(connectionContext);
this.queryBuilder = new BoolQuery.Builder()
.must(baseQuery);
}
/**
* Return a baseline, non-configurable query which applies to all search requests. Currently, this is just archive
* status, but this will likely be subject to extensive revision (may balloon, or disappear entirely)
* @param connectionContext
* @return the minimal match constraints applicable to all search requests
*/
private static Query getMandatoryBaselineQuery(ConnectionContext connectionContext) {
List archiveStatus = connectionContext.getArchiveStatus();
List archiveStatusFieldValues = archiveStatus.stream().map(FieldValue::of).toList();
log.info("Only publishes archiveStatus: " + String.join(",", archiveStatus));
TermsQueryField archiveStatusTerms = new TermsQueryField.Builder()
.value(archiveStatusFieldValues)
.build();
TermsQuery archiveStatusQuery = new TermsQuery.Builder()
.field("ops:Tracking_Meta/ops:archive_status")
.terms(archiveStatusTerms)
.build();
return archiveStatusQuery.toQuery();
}
/**
* Access the internal BoolQuery.Builder instance which is used to build a query during
* RegistrySearchRequestBuilder.build()
* Before accessing the query builder directly, consider whether the behaviour is common enough that it should be
* abstracted as a method of RegistrySearchRequestBuilder.
* @return the query builder instance for this search-request builder
*/
public BoolQuery.Builder getQueryBuilder() {
return this.queryBuilder;
}
/**
* Applies a common set of constraints and other build options which generally apply to any endpoint which queries
* OpenSearch for a result-set of multiple products.
* @param includeFieldNames - which properties to include in the results (JSON format, not OpenSearch format)
* @param queryString - a querystring (q=) to constrain the result-set by
* @param keywords - a set of keyword matches to
* @param pageSize - the page size to use for pagination
* @param sortFieldNames - the fields by which results are sorted (ascending), from highest to lowest priority
* @param searchAfterFieldValues - the values corresponding to the sort fields, for pagination
* @param excludeSupersededProducts - whether to exclude superseded products from the result set
*/
public RegistrySearchRequestBuilder applyMultipleProductsDefaults(
List includeFieldNames,
String queryString,
List keywords,
Integer pageSize,
List sortFieldNames,
List searchAfterFieldValues,
Boolean excludeSupersededProducts
) throws UnparsableQParamException, SortSearchAfterMismatchException {
this
.fieldsFromStrings(includeFieldNames)
.constrainByQueryString(queryString)
.addKeywordsParam(keywords)
.paginate(pageSize, sortFieldNames, searchAfterFieldValues);
if (excludeSupersededProducts) {
this.excludeSupersededProducts();
}
return this;
}
public SearchRequest build() {
this.query(this.queryBuilder.build().toQuery());
this.trackTotalHits(t -> t.enabled(true));
return super.build();
}
/**
* Add a constraint that a given field name must match the given field value
* @param fieldName the name of the field in OpenSearch format
* @param value the value which must be present in the given field
*/
public RegistrySearchRequestBuilder matchField(String fieldName, String value) {
FieldValue fieldValue = new FieldValue.Builder().stringValue(value).build();
MatchQuery lidvidMatch = new MatchQuery.Builder().field(fieldName).query(fieldValue).build();
this.queryBuilder.must(lidvidMatch.toQuery());
return this;
}
/**
* Add a constraint that a given field name must match the given field value
* @param fieldName the name of the field in OpenSearch format
* @param identifier the PDS identifier whose string representation must be present in the given field
*/
public RegistrySearchRequestBuilder matchField(String fieldName, PdsProductIdentifier identifier) {
return this.matchField(fieldName, identifier.toString());
}
/**
* Add a constraint that a given field name must match at least one of the given field values
* @param fieldName the name of the field in OpenSearch format
* @param values the values, one of which must be present in the given field
*/
public RegistrySearchRequestBuilder matchFieldAnyOf(String fieldName, List values) {
List fieldValues = values.stream().map(value -> new FieldValue.Builder().stringValue(value).build()).toList();
TermsQueryField termsQueryField = new TermsQueryField.Builder().value(fieldValues).build();
TermsQuery query = new TermsQuery.Builder().field(fieldName).terms(termsQueryField).build();
this.queryBuilder.must(query.toQuery());
return this;
}
/**
* Add a constraint that a given field name must match at least one of the given field values
* @param fieldName the name of the field in OpenSearch format
* @param identifiers the PDS identifiers, one of whose string representation must be present in the given field
*/
public RegistrySearchRequestBuilder matchFieldAnyOfIdentifiers(String fieldName, List extends PdsProductIdentifier> identifiers) {
return this.matchFieldAnyOf(fieldName, identifiers.stream().map(PdsProductIdentifier::toString).toList());
}
public RegistrySearchRequestBuilder matchLidvid(PdsProductIdentifier identifier) {
return this.matchField("_id", identifier);
}
public RegistrySearchRequestBuilder matchLid(PdsProductIdentifier identifier) {
return this.matchField("lid", identifier.getLid());
}
public RegistrySearchRequestBuilder matchProductClass(PdsProductClasses productClass) {
return this.matchField(PdsProductClasses.getPropertyName(), productClass.getValue());
}
public RegistrySearchRequestBuilder matchMembersOfBundle(PdsLidVid identifier) {
return this.matchField("ops:Provenance/ops:parent_bundle_identifier", identifier);
}
public RegistrySearchRequestBuilder matchMembersOfCollection(PdsLidVid identifier) {
return this.matchField("ops:Provenance/ops:parent_collection_identifier", identifier);
}
public RegistrySearchRequestBuilder paginate(Integer pageSize, List sortFieldNames,
List searchAfterFieldValues) throws SortSearchAfterMismatchException {
if ((sortFieldNames != null) && (!sortFieldNames.isEmpty())) {
this.sortFromStrings(sortFieldNames);
}
this.size(pageSize);
if ((searchAfterFieldValues != null) && (!searchAfterFieldValues.isEmpty())) {
if (sortFieldNames == null) {
throw new SortSearchAfterMismatchException("sort argument must be provided if searchAfter argument is provided");
} else if (searchAfterFieldValues.size() != sortFieldNames.size()) {
throw new SortSearchAfterMismatchException("sort and searchAfter arguments must be of equal length if provided");
}
this.searchAfterFromStrings(searchAfterFieldValues);
}
return this;
}
/**
* Implements an alternative to .sort() that accepts strings in API property format.
* Currently hardcoded to sort in ascending order only.
* @param sortFieldNames
*/
public RegistrySearchRequestBuilder sortFromStrings(List sortFieldNames) {
String openSearchField;
List sortOptionsList = new ArrayList();
for (String field : sortFieldNames) {
openSearchField = SearchUtil.jsonPropertyToOpenProperty(field);
FieldSort fieldSort =
new FieldSort.Builder().field(openSearchField).order(SortOrder.Asc).build();
sortOptionsList.add(new SortOptions.Builder().field(fieldSort).build());
}
this.sort(sortOptionsList);
return this;
}
/**
* Implements an alternative to .searchAfter() that accepts values as strings.
* No-op in current version of OpenSearch client, but a later version will require the commented-out
* implementation to convert the Strings to FieldValues
* @param searchAfterValues
*/
public RegistrySearchRequestBuilder searchAfterFromStrings(List searchAfterValues) {
/*
* List fieldValues = new ArrayList();
*
* for (String fieldValue : searchAfter) { fieldValues.add(new
// TODO check if the number value need to be handled specfically. Method stringValue() implies yes
* FieldValue.Builder().stringValue(fieldValue).build()); }
*/
this.searchAfter(searchAfterValues);
return this;
}
/**
* Implements an alternative to .fields() that accepts values as strings.
* @param fieldNames
*/
public RegistrySearchRequestBuilder fieldsFromStrings(List fieldNames) {
if ((fieldNames == null) || (fieldNames.isEmpty())) {
return this;
} else {
log.info("restricting list of fields requested from OpenSearch.");
// TODO refine to only pull the static field when the output response requires it.
List openSearchField =
new ArrayList(Arrays.asList(EntityProduct.JSON_PROPERTIES));
for (String field : fieldNames) {
openSearchField.add(SearchUtil.jsonPropertyToOpenProperty(field));
}
SourceFilter sourceFilter = new SourceFilter.Builder().includes(openSearchField).build();
SourceConfig limitedSourceCfg = new SourceConfig.Builder().filter(sourceFilter).build();
this.source(limitedSourceCfg);
return this;
}
}
private static BoolQuery parseQueryString(String queryString) {
CodePointCharStream input = CharStreams.fromString(queryString);
SearchLexer lex = new SearchLexer(input);
CommonTokenStream tokens = new CommonTokenStream(lex);
SearchParser par = new SearchParser(tokens);
par.setErrorHandler(new BailErrorStrategy());
ParseTree tree = par.query();
log.debug(tree.toStringTree(par));
// Walk it and attach our listener
ParseTreeWalker walker = new ParseTreeWalker();
Antlr4SearchListener listener = new Antlr4SearchListener();
walker.walk(listener, tree);
return listener.getBoolQuery();
}
/**
* Constrain results with a query-string in PDS API Search Query syntax
* @param q a PDS API Search Query string
* @throws UnparsableQParamException if the string is not parseable
*/
public RegistrySearchRequestBuilder constrainByQueryString(String q) throws UnparsableQParamException {
try {
if ((q != null) && (q.length() > 0)) {
BoolQuery qBoolQuery = RegistrySearchRequestBuilder.parseQueryString(q);
this.queryBuilder.must(qBoolQuery.toQuery());
}
return this;
} catch (RecognitionException | ParseCancellationException e) {
log.info("Unable to parse q " + q + "error message is " + e);
throw new UnparsableQParamException(
"q string value:" + q + " Error message " + e.getMessage());
}
}
public RegistrySearchRequestBuilder addKeywordsParam(List keywords) {
// TODO implement
return this;
}
/**
* Limit results to the latest version of each LID in the result-set.
* N.B. this does *not* mean the latest version which satisfies other constraints, so application of this constraint
* can result in no hits being returned despite valid results existing.
*/
public RegistrySearchRequestBuilder excludeSupersededProducts() {
ExistsQuery supersededByExists = new ExistsQuery.Builder()
.field("ops:Provenance/ops:superseded_by")
.build();
this.queryBuilder.mustNot(supersededByExists.toQuery());
return this;
}
/**
* Limit results to bundle products
*/
public RegistrySearchRequestBuilder onlyBundles() {
return this.matchField(PdsProductClasses.getPropertyName(), PdsProductClasses.Product_Bundle.toString());
}
/**
* Limit results to collection products
*/public RegistrySearchRequestBuilder onlyCollections() {
return this.matchField(PdsProductClasses.getPropertyName(), PdsProductClasses.Product_Collection.toString());
}
/**
* Limit results to basic (non-aggregate) products, i.e. exclude bundles/collections
*/
public RegistrySearchRequestBuilder onlyBasicProducts() {
List excludeValues = Arrays.stream(PdsProductClasses.values())
.filter(cls -> !cls.isBasicProduct())
.map(value -> new FieldValue.Builder().stringValue(value.toString()).build()).toList();
TermsQueryField termsQueryField = new TermsQueryField.Builder().value(excludeValues).build();
TermsQuery query = new TermsQuery.Builder().field(PdsProductClasses.getPropertyName()).terms(termsQueryField).build();
this.queryBuilder.mustNot(query.toQuery());
return this;
}
}