edu.byu.hbll.box.impl.View Maven / Gradle / Ivy
package edu.byu.hbll.box.impl;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.node.JsonNodeFactory;
import com.fasterxml.jackson.databind.node.ObjectNode;
import edu.byu.hbll.box.Box;
import edu.byu.hbll.box.BoxConfigurable;
import edu.byu.hbll.box.BoxDatabase;
import edu.byu.hbll.box.BoxDocument;
import edu.byu.hbll.box.BoxQuery;
import edu.byu.hbll.box.ConstructConfig;
import edu.byu.hbll.box.DocumentId;
import edu.byu.hbll.box.Facet;
import edu.byu.hbll.box.HarvestContext;
import edu.byu.hbll.box.HarvestResult;
import edu.byu.hbll.box.Harvester;
import edu.byu.hbll.box.InitConfig;
import edu.byu.hbll.box.ProcessBatch;
import edu.byu.hbll.box.ProcessContext;
import edu.byu.hbll.box.ProcessResult;
import edu.byu.hbll.box.Processor;
import edu.byu.hbll.box.QueryResult;
import edu.byu.hbll.box.ReadOnlyDatabase;
import edu.byu.hbll.box.Source;
import edu.byu.hbll.box.client.AbstractHttpBoxClient;
import edu.byu.hbll.box.client.BoxClient;
import edu.byu.hbll.box.client.BoxUpdatesClient;
import edu.byu.hbll.box.client.HttpBoxClient;
import edu.byu.hbll.box.internal.util.BoxUtils;
import edu.byu.hbll.json.JsonField;
import java.net.URI;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.stream.Collectors;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.Singular;
import lombok.experimental.SuperBuilder;
import lombok.extern.slf4j.Slf4j;
/**
* A read-only and realtime view into an internal or external box source. All communication occurs
* over http(s). The view does not cache anything with the exception of the cursor. The cursor is
* used to know when updates occur at the underlying source and to be able to notify dependents of
* updates. This implements both {@link BoxDatabase} and {@link Harvester}.
*
*
* SOURCENAME:
* template:
* id: view
* params:
* uri: https://www.example.com/box
*
*
* @author Charles Draper
*/
@Slf4j
@SuperBuilder(toBuilder = true)
@NoArgsConstructor(access = AccessLevel.PROTECTED)
@AllArgsConstructor(access = AccessLevel.PROTECTED)
public class View implements Processor, Harvester, ReadOnlyDatabase {
@Getter private BoxClient boxClient;
/** The uri of the underlying remote source upon which the view is built. */
@Getter private URI baseUri;
/**
* The name of the underlying source upon which the view is built. The source will be resolved at
* registration time with Box.
*/
@Getter private String baseSourceName;
/**
* The underlying source upon which the view is built. This is set by Box during registration if
* the baseSourceName is set or will need to be set manually with the builder if this is not
* registered with Box.
*/
@Getter private Source baseSource;
/** Include documents with these statuses. */
@Getter @Singular private final Set statuses = new LinkedHashSet<>();
/** Limit of documents per request. */
@Getter @Builder.Default private long limit = BoxQuery.DEFAULT_LIMIT;
/** Only return these document fields. */
@Getter @Singular private final Set fields = new LinkedHashSet<>();
/** Filter by these facets only. */
@Getter @Singular private final Set facets = new LinkedHashSet<>();
/**
* If documents come in through the harvester, they should be marked as unprocessed so that they
* get put on the process queue rather than be saved as processed.
*/
@Getter private boolean harvestUnprocessed;
/**
* For a typical view, harvesting still takes place in order to know what has been updated. This
* is important for a box that uses the view as a dependency. Dependents of the view need to know
* when a document has been updated so that their corresponding dependent documents can also be
* updated. When set to true, this only harvests metadata so as to know what's been updated.
*/
@Getter private boolean harvestForView;
/**
* The source name of this view. This is only used to set up the dependency in the transformation.
* If not set, `principal` will be used.
*/
@Getter @Builder.Default private String sourceName = "principal";
/**
* The source that this view belongs to if registered with Box. This is set by Box during
* registration.
*/
@Deprecated private Source source;
/** Set of facet groups represented by facets. */
private final Set facetGroups = new LinkedHashSet<>();
/**
* Creates a new View with the given baseUri to the box instance.
*
* @param uri base URI for the box instance, the path should end with /box
* @deprecated use builder
*/
@Deprecated
public View(URI uri) {
this(new HttpBoxClient(uri));
}
/**
* Creates a new View with the given baseUri to the box instance.
*
* @param boxClient the client for communicating with the remote box
* @deprecated use builder
*/
@Deprecated
public View(BoxClient boxClient) {
this.boxClient = Objects.requireNonNull(boxClient);
if (boxClient instanceof AbstractHttpBoxClient) {
this.baseUri = ((AbstractHttpBoxClient) boxClient).getUri();
}
}
/**
* Creates a new View with the given local source for which this view is based.
*
* @param source the source for this view
* @param localSource the source this view is based on
* @deprecated use builder
*/
@Deprecated
public View(Source source, Source localSource) {
this.source = source;
this.sourceName = source.getName();
this.baseSource = localSource;
this.baseSourceName = localSource.getName();
}
/**
* See {@link BoxConfigurable#postConstruct(ConstructConfig)} and {@link
* BoxConfigurable#postInit(InitConfig)}.
*
* @param constructConfig the construct config to use
* @param initConfig the init config to use
* @deprecated user builder
*/
@Deprecated
public View(ConstructConfig constructConfig, InitConfig initConfig) {
postConstruct(constructConfig);
postInit(initConfig);
}
/**
* {@inheritDoc}
*
* uri: base URI for the box instance, the path should end with /box or a specific source
* /SOURCE_NAME (required if local is also null)
*
*
local: source name for underlying local source, uri is ignored if this is set.
*
*
fields: array of the particular fields to return for each document (optional)
*
*
statuses: only include documents with these statuses. Default: [READY,DELETED] (optional)
*
*
facets: filter to these collections only, key value pairs in the form of "name:value"
* (optional)
*
*
limit: limit of documents per request (optional)
*
*
username: username to use with basic authentication. (optional)
*
*
password: password to use with basic authentication. (optional)
*/
@Override
public void postConstruct(ConstructConfig config) {
this.sourceName = config.getSourceName();
ObjectNode params = config.getParams();
baseSourceName = params.path("baseSourceName").asText(params.path("local").asText(null));
if (baseSourceName == null) {
this.boxClient =
HttpBoxClient.builder()
.uri(params.path("baseUri").asText(params.path("uri").asText(null)))
.username(params.path("username").asText(null))
.password(params.path("password").asText(null))
.accessTokenUri(params.path("accessTokenUri").asText(null))
.clientId(params.path("clientId").asText(null))
.clientSecret(params.path("clientSecret").asText(null))
.build();
this.baseUri = ((HttpBoxClient) this.boxClient).getUri();
}
for (JsonNode field : params.path("fields")) {
fields.add(field.asText());
}
for (JsonNode status : params.path("statuses")) {
statuses.add(BoxDocument.Status.valueOf(status.asText().toUpperCase()));
}
for (JsonField facet : new JsonField(params.path("facets"))) {
if (!facet.getValue().isNull()) {
facets.add(Facet.parse(facet.getValue().asText()));
}
}
updateFacetGroups();
limit = params.path("limit").asLong(BoxQuery.DEFAULT_LIMIT);
harvestUnprocessed = params.path("harvestUnprocessed").asBoolean();
harvestForView = params.path("harvestForView").asBoolean();
}
@Override
public void postInit(InitConfig config) {
Source source = config.getSource();
this.source = source;
sourceName = config.getSourceName();
if (baseSourceName != null && baseSource == null) {
baseSource = config.getBox().getSource(baseSourceName);
}
if (config.isHarvester()) {
if (baseSourceName != null) {
baseSource.registerForUpdateNotifications(() -> source.triggerHarvest());
} else if (baseUri != null) {
// updates client doesn't appear to need to be closed
new BoxUpdatesClient(baseUri, () -> source.triggerHarvest());
}
}
}
/**
* {@inheritDoc}
*
*
Requests documents denoted by the {@link ProcessBatch} from the underlying source. Documents
* are pared according to the `fields` parameter and the full metadata is retrieved (minus groups
* and dependencies). Existing documents at the underlying source that do not match the
* preconfigured facets will be marked as deleted. Optionally, subclasses can transform the list
* of documents before being returned in the {@link ProcessResult}. First {@link
* #transform(ProcessBatch)} is called followed by {@link #transform(List)}. When overriding
* {@link #transform(ProcessBatch)}, the document is put into the {@link ProcessContext} as a
* dependency with this sourceName and the corresponding document's id.
*/
@Override
public ProcessResult process(ProcessBatch batch) {
List ids = batch.getIds();
BoxQuery query = new BoxQuery().addIds(ids);
QueryResult queryResult = findAndTransform(query, batch);
return new ProcessResult().addDocuments(queryResult);
}
/**
* {@inheritDoc}
*
* Requests documents denoted by the {@link HarvestContext} from the underlying source.
* Documents are pared according to the `fields` parameter and the full metadata is retrieved
* (minus groups and dependencies). Only documents matching the preconfigured facets and statuses
* are returned (default to no facets and statuses of ready and deleted). Optionally, subclasses
* can transform the list of documents before being returned in the {@link HarvestResult}. First
* {@link #transform(ProcessBatch)} is called followed by {@link #transform(List)}. When
* overriding {@link #transform(ProcessBatch)}, the document is put into the {@link
* ProcessContext} as a dependency with this sourceName and the corresponding document's id.
*/
@Override
public HarvestResult harvest(HarvestContext context) {
HarvestResult result = new HarvestResult();
long cursor = context.getCursor().path("cursor").asLong();
BoxQuery query = new BoxQuery().setCursor(cursor).setLimit(limit);
if (harvestForView || harvestUnprocessed) {
query.addFields(BoxQuery.METADATA_FIELD);
}
InfoQueryResult queryResult = findAndTransform(query);
result.withCursor().put("cursor", queryResult.getNextCursor() + "");
if (harvestUnprocessed) {
for (BoxDocument transformedDocument : queryResult) {
result.add(new BoxDocument(transformedDocument.getId()));
}
} else {
result.addDocuments(queryResult);
}
result.setMore(queryResult.more);
return result;
}
/**
* {@inheritDoc}
*
*
Requests documents denoted by the {@link BoxQuery} from the underlying source. Documents are
* pared according to the `fields` parameter and the full metadata is retrieved (minus groups and
* dependencies). Only documents matching the preconfigured facets and statuses are returned
* (default to no facets and statuses of ready and deleted). When requesting by id, existing
* documents at the underlying source that do not match the preconfigured facets will be marked as
* deleted. Optionally, subclasses can transform the list of documents before being returned in
* the {@link HarvestResult}. First {@link #transform(ProcessBatch)} is called followed by {@link
* #transform(List)}. When overriding {@link #transform(ProcessBatch)}, the document is put into
* the {@link ProcessContext} as a dependency with this sourceName and the corresponding
* document's id.
*/
@Override
public QueryResult find(BoxQuery query) {
return findAndTransform(new BoxQuery(query));
}
private InfoQueryResult findAndTransform(BoxQuery query) {
return findAndTransform(query, new ProcessBatch());
}
private InfoQueryResult findAndTransform(BoxQuery query, ProcessBatch batch) {
InfoQueryResult result = new InfoQueryResult(rawFind(query));
List documents = transform(transform(makeBatch(result, batch)));
result.clear();
result.addAll(documents);
return result;
}
/**
* Prepares {@link ProcessBatch} with documents as dependencies.
*
* @param documents documents to put in batch as dependencies
* @param batch batch to process
* @return ready batch
*/
private ProcessBatch makeBatch(List documents, ProcessBatch batch) {
ProcessBatch newBatch = new ProcessBatch();
for (BoxDocument document : documents) {
Map dependencies = new HashMap<>();
ProcessContext context = batch.get(document.getId());
Box box = null;
Source source = null;
if (context != null) {
dependencies.putAll(context.getDependencies());
box = context.getBox();
source = context.getSource();
}
dependencies.put(new DocumentId(sourceName, document.getId()), document);
newBatch.add(new ProcessContext(box, source, document.getId(), dependencies));
}
return newBatch;
}
/**
* Same as {@link #find(BoxQuery)} except it does not perform any transformation other than
* applying the preconfigured `fields`, `facets`, and `statuses`.
*
* @param query the query to run against the underlying source
* @return the query result
*/
public QueryResult rawFind(BoxQuery query) {
try {
query = new BoxQuery(query);
// if we're not careful, a query specifying particular facets and/or fields can cause the view
// to divulge information it shouldn't. These next steps make sure facets or fields in the
// query
// will not expose documents or document fields that should be hidden.
// make sure the OR logic within a facet group does not expose documents being hidden by
// this.facets
boolean forceEmptyResults = false;
// if this view and the query both specify facets
if (!this.facets.isEmpty() && !query.getFacets().isEmpty()) {
Set facetGroups = new HashSet<>();
// get all facet groups represented from the query
for (Facet facet : query.getFacets()) {
facetGroups.add(facet.getName());
}
// final facets to use
List facets = new ArrayList<>();
Set finalFacetGroups = new HashSet<>();
// only add facets from query that appear in this view's set of facets or whose group is not
// represented by the view
for (Facet facet : query.getFacets()) {
if (this.facets.contains(facet) || !this.facetGroups.contains(facet.getName())) {
facets.add(facet);
finalFacetGroups.add(facet.getName());
}
}
// only add facets from this view that are not part of the query's facet groups
for (Facet facet : this.facets) {
if (!facetGroups.contains(facet.getName())) {
facets.add(facet);
}
}
query.clearFacets();
// only return results if all facet groups from query are represented in final facets
if (facetGroups.size() == finalFacetGroups.size()) {
query.addFacets(facets);
} else {
forceEmptyResults = true;
query.addFacets(this.facets);
}
} else {
query.addFacets(this.facets);
}
// if there are preconfigured statuses
if (!this.statuses.isEmpty()) {
if (query.getStatuses().isEmpty()) {
// add preconfigured statuses if query doesn't specify any
query.addStatuses(this.statuses);
} else {
// only return documents with intersection of preconfigured statuses and query statuses
query.getStatuses().retainAll(this.statuses);
// if there is no intersection
forceEmptyResults |= query.getStatuses().isEmpty();
}
}
// fields specified in query may only be activated if they are subfields of view fields
boolean forceEmptyDocuments = false;
// if there are preconfigured fields
if (!this.fields.isEmpty()) {
List fields = new ArrayList<>();
for (String field : BoxUtils.canonicalizeFields(query.getFields())) {
for (String masterField : BoxUtils.canonicalizeFields(this.fields)) {
// use fields from query that are subfields of this view's fields
if (field.equals(masterField) || field.startsWith(masterField + ".")) {
fields.add(field);
}
// use masterField if it is a subfield of the query field
if (masterField.startsWith(field + ".")) {
fields.add(masterField);
}
}
}
if (query.getFields().isEmpty()) {
fields.addAll(this.fields);
}
query.clearFields();
if (fields.isEmpty()) {
// if there are no fields left, then documents' contents will be empty
forceEmptyDocuments = true;
query.addFields(this.fields);
} else {
query.addFields(fields);
}
}
boolean removeFacets = false;
if (!(query.getFields().isEmpty()
|| query.getFields().contains(BoxQuery.METADATA_FIELD)
&& !query.getFields().stream().anyMatch(f -> f.startsWith("@box.")))) {
removeFacets = !query.getFields().contains(BoxQuery.METADATA_FIELD_FACETS);
query.addFields(
BoxQuery.METADATA_FIELD_ID,
BoxQuery.METADATA_FIELD_STATUS,
BoxQuery.METADATA_FIELD_CURSOR,
BoxQuery.METADATA_FIELD_FACETS);
}
QueryResult result;
if (baseSource != null) {
result = baseSource.collect(query);
} else {
result = boxClient.collect(query);
}
if (forceEmptyResults) {
result = new QueryResult();
}
// remove dependencies and groupId
for (int i = 0; i < result.size(); i++) {
BoxDocument document = result.get(i);
document.clearDependencies();
document.setGroupId(null);
// for id type queries, mark as deleted if document doesn't have required facet(s)
if (document.isReady() && !document.matches(facets)) {
result.set(i, new BoxDocument(document.getId()).setAsDeleted());
}
if (removeFacets) {
document.clearFacets();
}
}
if (forceEmptyDocuments) {
result.forEach(d -> d.setDocument(JsonNodeFactory.instance.objectNode()));
}
result = new QueryResult(result).setNextCursor(result.getNextCursor());
return result;
} catch (Exception e) {
String base = baseSource != null ? baseSource.getName() : baseUri.toString();
log.error("Error in view {} while querying {}: {}", sourceName, base, e.getMessage());
throw e;
}
}
/**
* Extend this class and override this method in order to transform the result documents in any
* way from the direct processor. You can ignore certain documents by simply not including them in
* the resulting collection. By default the processed flag is set to false (this flag tells box to
* simply put the id on the queue).
*
* Note: You may modify the documents in place.
*
* @param batch batch to transform
* @return list of {@link BoxDocument}s to return to box
*/
protected List transform(ProcessBatch batch) {
return batch
.stream()
.map(c -> c.getDependency(sourceName, c.getId()))
.collect(Collectors.toList());
}
/**
* Extend this class and override this method in order to transform the result documents in any
* way from the direct processor. You can ignore certain documents by simply not including them in
* the resulting collection. By default the processed flag is set to false (this flag tells box to
* simply put the id on the queue).
*
* Note: You may modify the documents in place.
*
* @param documents documents to transform
* @return list of {@link BoxDocument}s to return to box
*/
protected List transform(List documents) {
return documents;
}
private void updateFacetGroups() {
facets.forEach(f -> facetGroups.add(f.getName()));
}
/**
* Returns the base uri for the underlying source.
*
* @return the uri
* @deprecated use getBaseUri
*/
@Deprecated
public URI getUri() {
return baseUri;
}
/**
* Sets the fields for this view.
*
* @param fields the particular fields to return for each document
* @deprecated use builder
*/
@Deprecated
public void setFields(Collection fields) {
this.fields.clear();
this.fields.addAll(fields);
}
/**
* Sets the statuses for the view.
*
* @param statuses the statuses to set
* @deprecated use builder
*/
@Deprecated
public void setStatuses(Collection statuses) {
this.statuses.clear();
this.statuses.addAll(statuses);
}
/**
* Sets the facets for the view.
*
* @param facets the facets to set
* @deprecated use builder
*/
@Deprecated
public void setFacets(Collection facets) {
this.facets.clear();
this.facets.addAll(facets);
updateFacetGroups();
}
/**
* Sets the harvest limit.
*
* @param limit the limit of documents per request to set
* @deprecated use builder
*/
@Deprecated
public void setLimit(long limit) {
this.limit = limit;
}
/**
* Sets the source name.
*
* @param sourceName the sourceName to set
* @deprecated use builder
*/
@Deprecated
public void setSourceName(String sourceName) {
this.sourceName = sourceName;
}
/**
* Sets whether to harvest unprocessed.
*
* @param harvestUnprocessed the harvestUnprocessed to set
* @deprecated use builder
*/
@Deprecated
public void setHarvestUnprocessed(boolean harvestUnprocessed) {
this.harvestUnprocessed = harvestUnprocessed;
}
/**
* Sets whether this is harvest for view.
*
* @param harvestForView the harvestForView to set
* @deprecated use builder
*/
@Deprecated
public void setHarvestForView(boolean harvestForView) {
this.harvestForView = harvestForView;
}
/**
* Returns the underlying local source.
*
* @return the local
* @deprecated use getBaseSource
*/
@Deprecated
public String getLocal() {
return baseSourceName;
}
/**
* You should use the builder to set a {@link BoxClient}. This is here for the rare occasion where
* an auto-instantiated client will not work and must be replaced post initialization.
*
* @param boxClient the boxClient to set
*/
public void setBoxClient(BoxClient boxClient) {
this.boxClient = boxClient;
}
/**
* Returns the source.
*
* @return the source
* @deprecated source should be retrieved through the Box instance
*/
@Deprecated
public Source getSource() {
return source;
}
private class InfoQueryResult extends QueryResult {
private static final long serialVersionUID = 1L;
private boolean more;
private InfoQueryResult(QueryResult result) {
super(result);
setNextCursor(result.getNextCursor());
more = !result.isEmpty();
}
}
public abstract static class ViewBuilder> {
public B baseUri(URI baseUri) {
this.baseUri = baseUri;
return self();
}
public B baseUri(String baseUri) {
return baseUri(URI.create(baseUri));
}
private B source(Source source) {
return self();
}
@SuppressWarnings("unused")
private B facetGroups(Set facetGroups) {
return self();
}
}
}