All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.byu.hbll.box.impl.View Maven / Gradle / Ivy

There is a newer version: 2.5.3
Show newest version
package edu.byu.hbll.box.impl;

import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.node.JsonNodeFactory;
import com.fasterxml.jackson.databind.node.ObjectNode;
import edu.byu.hbll.box.Box;
import edu.byu.hbll.box.BoxConfigurable;
import edu.byu.hbll.box.BoxDatabase;
import edu.byu.hbll.box.BoxDocument;
import edu.byu.hbll.box.BoxQuery;
import edu.byu.hbll.box.ConstructConfig;
import edu.byu.hbll.box.DocumentId;
import edu.byu.hbll.box.Facet;
import edu.byu.hbll.box.HarvestContext;
import edu.byu.hbll.box.HarvestResult;
import edu.byu.hbll.box.Harvester;
import edu.byu.hbll.box.InitConfig;
import edu.byu.hbll.box.ProcessBatch;
import edu.byu.hbll.box.ProcessContext;
import edu.byu.hbll.box.ProcessResult;
import edu.byu.hbll.box.Processor;
import edu.byu.hbll.box.QueryResult;
import edu.byu.hbll.box.ReadOnlyDatabase;
import edu.byu.hbll.box.Source;
import edu.byu.hbll.box.client.AbstractHttpBoxClient;
import edu.byu.hbll.box.client.BoxClient;
import edu.byu.hbll.box.client.BoxUpdatesClient;
import edu.byu.hbll.box.client.HttpBoxClient;
import edu.byu.hbll.box.internal.util.BoxUtils;
import edu.byu.hbll.json.JsonField;
import java.net.URI;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.stream.Collectors;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.Singular;
import lombok.experimental.SuperBuilder;
import lombok.extern.slf4j.Slf4j;

/**
 * A read-only and realtime view into an internal or external box source. All communication occurs
 * over http(s). The view does not cache anything with the exception of the cursor. The cursor is
 * used to know when updates occur at the underlying source and to be able to notify dependents of
 * updates. This implements both {@link BoxDatabase} and {@link Harvester}.
 *
 * 
 * SOURCENAME:
 *   template:
 *     id: view
 *     params:
 *       uri: https://www.example.com/box
 * 
* * @author Charles Draper */ @Slf4j @SuperBuilder(toBuilder = true) @NoArgsConstructor(access = AccessLevel.PROTECTED) @AllArgsConstructor(access = AccessLevel.PROTECTED) public class View implements Processor, Harvester, ReadOnlyDatabase { @Getter private BoxClient boxClient; /** The uri of the underlying remote source upon which the view is built. */ @Getter private URI baseUri; /** * The name of the underlying source upon which the view is built. The source will be resolved at * registration time with Box. */ @Getter private String baseSourceName; /** * The underlying source upon which the view is built. This is set by Box during registration if * the baseSourceName is set or will need to be set manually with the builder if this is not * registered with Box. */ @Getter private Source baseSource; /** Include documents with these statuses. */ @Getter @Singular private final Set statuses = new LinkedHashSet<>(); /** Limit of documents per request. */ @Getter @Builder.Default private long limit = BoxQuery.DEFAULT_LIMIT; /** Only return these document fields. */ @Getter @Singular private final Set fields = new LinkedHashSet<>(); /** Filter by these facets only. */ @Getter @Singular private final Set facets = new LinkedHashSet<>(); /** * If documents come in through the harvester, they should be marked as unprocessed so that they * get put on the process queue rather than be saved as processed. */ @Getter private boolean harvestUnprocessed; /** * For a typical view, harvesting still takes place in order to know what has been updated. This * is important for a box that uses the view as a dependency. Dependents of the view need to know * when a document has been updated so that their corresponding dependent documents can also be * updated. When set to true, this only harvests metadata so as to know what's been updated. */ @Getter private boolean harvestForView; /** * The source name of this view. This is only used to set up the dependency in the transformation. * If not set, `principal` will be used. */ @Getter @Builder.Default private String sourceName = "principal"; /** * The source that this view belongs to if registered with Box. This is set by Box during * registration. */ @Deprecated private Source source; /** Set of facet groups represented by facets. */ private final Set facetGroups = new LinkedHashSet<>(); /** * Creates a new View with the given baseUri to the box instance. * * @param uri base URI for the box instance, the path should end with /box * @deprecated use builder */ @Deprecated public View(URI uri) { this(new HttpBoxClient(uri)); } /** * Creates a new View with the given baseUri to the box instance. * * @param boxClient the client for communicating with the remote box * @deprecated use builder */ @Deprecated public View(BoxClient boxClient) { this.boxClient = Objects.requireNonNull(boxClient); if (boxClient instanceof AbstractHttpBoxClient) { this.baseUri = ((AbstractHttpBoxClient) boxClient).getUri(); } } /** * Creates a new View with the given local source for which this view is based. * * @param source the source for this view * @param localSource the source this view is based on * @deprecated use builder */ @Deprecated public View(Source source, Source localSource) { this.source = source; this.sourceName = source.getName(); this.baseSource = localSource; this.baseSourceName = localSource.getName(); } /** * See {@link BoxConfigurable#postConstruct(ConstructConfig)} and {@link * BoxConfigurable#postInit(InitConfig)}. * * @param constructConfig the construct config to use * @param initConfig the init config to use * @deprecated user builder */ @Deprecated public View(ConstructConfig constructConfig, InitConfig initConfig) { postConstruct(constructConfig); postInit(initConfig); } /** * {@inheritDoc} * *

uri: base URI for the box instance, the path should end with /box or a specific source * /SOURCE_NAME (required if local is also null) * *

local: source name for underlying local source, uri is ignored if this is set. * *

fields: array of the particular fields to return for each document (optional) * *

statuses: only include documents with these statuses. Default: [READY,DELETED] (optional) * *

facets: filter to these collections only, key value pairs in the form of "name:value" * (optional) * *

limit: limit of documents per request (optional) * *

username: username to use with basic authentication. (optional) * *

password: password to use with basic authentication. (optional) */ @Override public void postConstruct(ConstructConfig config) { this.sourceName = config.getSourceName(); ObjectNode params = config.getParams(); baseSourceName = params.path("baseSourceName").asText(params.path("local").asText(null)); if (baseSourceName == null) { this.boxClient = HttpBoxClient.builder() .uri(params.path("baseUri").asText(params.path("uri").asText(null))) .username(params.path("username").asText(null)) .password(params.path("password").asText(null)) .accessTokenUri(params.path("accessTokenUri").asText(null)) .clientId(params.path("clientId").asText(null)) .clientSecret(params.path("clientSecret").asText(null)) .build(); this.baseUri = ((HttpBoxClient) this.boxClient).getUri(); } for (JsonNode field : params.path("fields")) { fields.add(field.asText()); } for (JsonNode status : params.path("statuses")) { statuses.add(BoxDocument.Status.valueOf(status.asText().toUpperCase())); } for (JsonField facet : new JsonField(params.path("facets"))) { if (!facet.getValue().isNull()) { facets.add(Facet.parse(facet.getValue().asText())); } } updateFacetGroups(); limit = params.path("limit").asLong(BoxQuery.DEFAULT_LIMIT); harvestUnprocessed = params.path("harvestUnprocessed").asBoolean(); harvestForView = params.path("harvestForView").asBoolean(); } @Override public void postInit(InitConfig config) { Source source = config.getSource(); this.source = source; sourceName = config.getSourceName(); if (baseSourceName != null && baseSource == null) { baseSource = config.getBox().getSource(baseSourceName); } if (config.isHarvester()) { if (baseSourceName != null) { baseSource.registerForUpdateNotifications(() -> source.triggerHarvest()); } else if (baseUri != null) { // updates client doesn't appear to need to be closed new BoxUpdatesClient(baseUri, () -> source.triggerHarvest()); } } } /** * {@inheritDoc} * *

Requests documents denoted by the {@link ProcessBatch} from the underlying source. Documents * are pared according to the `fields` parameter and the full metadata is retrieved (minus groups * and dependencies). Existing documents at the underlying source that do not match the * preconfigured facets will be marked as deleted. Optionally, subclasses can transform the list * of documents before being returned in the {@link ProcessResult}. First {@link * #transform(ProcessBatch)} is called followed by {@link #transform(List)}. When overriding * {@link #transform(ProcessBatch)}, the document is put into the {@link ProcessContext} as a * dependency with this sourceName and the corresponding document's id. */ @Override public ProcessResult process(ProcessBatch batch) { List ids = batch.getIds(); BoxQuery query = new BoxQuery().addIds(ids); QueryResult queryResult = findAndTransform(query, batch); return new ProcessResult().addDocuments(queryResult); } /** * {@inheritDoc} * *

Requests documents denoted by the {@link HarvestContext} from the underlying source. * Documents are pared according to the `fields` parameter and the full metadata is retrieved * (minus groups and dependencies). Only documents matching the preconfigured facets and statuses * are returned (default to no facets and statuses of ready and deleted). Optionally, subclasses * can transform the list of documents before being returned in the {@link HarvestResult}. First * {@link #transform(ProcessBatch)} is called followed by {@link #transform(List)}. When * overriding {@link #transform(ProcessBatch)}, the document is put into the {@link * ProcessContext} as a dependency with this sourceName and the corresponding document's id. */ @Override public HarvestResult harvest(HarvestContext context) { HarvestResult result = new HarvestResult(); long cursor = context.getCursor().path("cursor").asLong(); BoxQuery query = new BoxQuery().setCursor(cursor).setLimit(limit); if (harvestForView || harvestUnprocessed) { query.addFields(BoxQuery.METADATA_FIELD); } InfoQueryResult queryResult = findAndTransform(query); result.withCursor().put("cursor", queryResult.getNextCursor() + ""); if (harvestUnprocessed) { for (BoxDocument transformedDocument : queryResult) { result.add(new BoxDocument(transformedDocument.getId())); } } else { result.addDocuments(queryResult); } result.setMore(queryResult.more); return result; } /** * {@inheritDoc} * *

Requests documents denoted by the {@link BoxQuery} from the underlying source. Documents are * pared according to the `fields` parameter and the full metadata is retrieved (minus groups and * dependencies). Only documents matching the preconfigured facets and statuses are returned * (default to no facets and statuses of ready and deleted). When requesting by id, existing * documents at the underlying source that do not match the preconfigured facets will be marked as * deleted. Optionally, subclasses can transform the list of documents before being returned in * the {@link HarvestResult}. First {@link #transform(ProcessBatch)} is called followed by {@link * #transform(List)}. When overriding {@link #transform(ProcessBatch)}, the document is put into * the {@link ProcessContext} as a dependency with this sourceName and the corresponding * document's id. */ @Override public QueryResult find(BoxQuery query) { return findAndTransform(new BoxQuery(query)); } private InfoQueryResult findAndTransform(BoxQuery query) { return findAndTransform(query, new ProcessBatch()); } private InfoQueryResult findAndTransform(BoxQuery query, ProcessBatch batch) { InfoQueryResult result = new InfoQueryResult(rawFind(query)); List documents = transform(transform(makeBatch(result, batch))); result.clear(); result.addAll(documents); return result; } /** * Prepares {@link ProcessBatch} with documents as dependencies. * * @param documents documents to put in batch as dependencies * @param batch batch to process * @return ready batch */ private ProcessBatch makeBatch(List documents, ProcessBatch batch) { ProcessBatch newBatch = new ProcessBatch(); for (BoxDocument document : documents) { Map dependencies = new HashMap<>(); ProcessContext context = batch.get(document.getId()); Box box = null; Source source = null; if (context != null) { dependencies.putAll(context.getDependencies()); box = context.getBox(); source = context.getSource(); } dependencies.put(new DocumentId(sourceName, document.getId()), document); newBatch.add(new ProcessContext(box, source, document.getId(), dependencies)); } return newBatch; } /** * Same as {@link #find(BoxQuery)} except it does not perform any transformation other than * applying the preconfigured `fields`, `facets`, and `statuses`. * * @param query the query to run against the underlying source * @return the query result */ public QueryResult rawFind(BoxQuery query) { try { query = new BoxQuery(query); // if we're not careful, a query specifying particular facets and/or fields can cause the view // to divulge information it shouldn't. These next steps make sure facets or fields in the // query // will not expose documents or document fields that should be hidden. // make sure the OR logic within a facet group does not expose documents being hidden by // this.facets boolean forceEmptyResults = false; // if this view and the query both specify facets if (!this.facets.isEmpty() && !query.getFacets().isEmpty()) { Set facetGroups = new HashSet<>(); // get all facet groups represented from the query for (Facet facet : query.getFacets()) { facetGroups.add(facet.getName()); } // final facets to use List facets = new ArrayList<>(); Set finalFacetGroups = new HashSet<>(); // only add facets from query that appear in this view's set of facets or whose group is not // represented by the view for (Facet facet : query.getFacets()) { if (this.facets.contains(facet) || !this.facetGroups.contains(facet.getName())) { facets.add(facet); finalFacetGroups.add(facet.getName()); } } // only add facets from this view that are not part of the query's facet groups for (Facet facet : this.facets) { if (!facetGroups.contains(facet.getName())) { facets.add(facet); } } query.clearFacets(); // only return results if all facet groups from query are represented in final facets if (facetGroups.size() == finalFacetGroups.size()) { query.addFacets(facets); } else { forceEmptyResults = true; query.addFacets(this.facets); } } else { query.addFacets(this.facets); } // if there are preconfigured statuses if (!this.statuses.isEmpty()) { if (query.getStatuses().isEmpty()) { // add preconfigured statuses if query doesn't specify any query.addStatuses(this.statuses); } else { // only return documents with intersection of preconfigured statuses and query statuses query.getStatuses().retainAll(this.statuses); // if there is no intersection forceEmptyResults |= query.getStatuses().isEmpty(); } } // fields specified in query may only be activated if they are subfields of view fields boolean forceEmptyDocuments = false; // if there are preconfigured fields if (!this.fields.isEmpty()) { List fields = new ArrayList<>(); for (String field : BoxUtils.canonicalizeFields(query.getFields())) { for (String masterField : BoxUtils.canonicalizeFields(this.fields)) { // use fields from query that are subfields of this view's fields if (field.equals(masterField) || field.startsWith(masterField + ".")) { fields.add(field); } // use masterField if it is a subfield of the query field if (masterField.startsWith(field + ".")) { fields.add(masterField); } } } if (query.getFields().isEmpty()) { fields.addAll(this.fields); } query.clearFields(); if (fields.isEmpty()) { // if there are no fields left, then documents' contents will be empty forceEmptyDocuments = true; query.addFields(this.fields); } else { query.addFields(fields); } } boolean removeFacets = false; if (!(query.getFields().isEmpty() || query.getFields().contains(BoxQuery.METADATA_FIELD) && !query.getFields().stream().anyMatch(f -> f.startsWith("@box.")))) { removeFacets = !query.getFields().contains(BoxQuery.METADATA_FIELD_FACETS); query.addFields( BoxQuery.METADATA_FIELD_ID, BoxQuery.METADATA_FIELD_STATUS, BoxQuery.METADATA_FIELD_CURSOR, BoxQuery.METADATA_FIELD_FACETS); } QueryResult result; if (baseSource != null) { result = baseSource.collect(query); } else { result = boxClient.collect(query); } if (forceEmptyResults) { result = new QueryResult(); } // remove dependencies and groupId for (int i = 0; i < result.size(); i++) { BoxDocument document = result.get(i); document.clearDependencies(); document.setGroupId(null); // for id type queries, mark as deleted if document doesn't have required facet(s) if (document.isReady() && !document.matches(facets)) { result.set(i, new BoxDocument(document.getId()).setAsDeleted()); } if (removeFacets) { document.clearFacets(); } } if (forceEmptyDocuments) { result.forEach(d -> d.setDocument(JsonNodeFactory.instance.objectNode())); } result = new QueryResult(result).setNextCursor(result.getNextCursor()); return result; } catch (Exception e) { String base = baseSource != null ? baseSource.getName() : baseUri.toString(); log.error("Error in view {} while querying {}: {}", sourceName, base, e.getMessage()); throw e; } } /** * Extend this class and override this method in order to transform the result documents in any * way from the direct processor. You can ignore certain documents by simply not including them in * the resulting collection. By default the processed flag is set to false (this flag tells box to * simply put the id on the queue). * *

Note: You may modify the documents in place. * * @param batch batch to transform * @return list of {@link BoxDocument}s to return to box */ protected List transform(ProcessBatch batch) { return batch .stream() .map(c -> c.getDependency(sourceName, c.getId())) .collect(Collectors.toList()); } /** * Extend this class and override this method in order to transform the result documents in any * way from the direct processor. You can ignore certain documents by simply not including them in * the resulting collection. By default the processed flag is set to false (this flag tells box to * simply put the id on the queue). * *

Note: You may modify the documents in place. * * @param documents documents to transform * @return list of {@link BoxDocument}s to return to box */ protected List transform(List documents) { return documents; } private void updateFacetGroups() { facets.forEach(f -> facetGroups.add(f.getName())); } /** * Returns the base uri for the underlying source. * * @return the uri * @deprecated use getBaseUri */ @Deprecated public URI getUri() { return baseUri; } /** * Sets the fields for this view. * * @param fields the particular fields to return for each document * @deprecated use builder */ @Deprecated public void setFields(Collection fields) { this.fields.clear(); this.fields.addAll(fields); } /** * Sets the statuses for the view. * * @param statuses the statuses to set * @deprecated use builder */ @Deprecated public void setStatuses(Collection statuses) { this.statuses.clear(); this.statuses.addAll(statuses); } /** * Sets the facets for the view. * * @param facets the facets to set * @deprecated use builder */ @Deprecated public void setFacets(Collection facets) { this.facets.clear(); this.facets.addAll(facets); updateFacetGroups(); } /** * Sets the harvest limit. * * @param limit the limit of documents per request to set * @deprecated use builder */ @Deprecated public void setLimit(long limit) { this.limit = limit; } /** * Sets the source name. * * @param sourceName the sourceName to set * @deprecated use builder */ @Deprecated public void setSourceName(String sourceName) { this.sourceName = sourceName; } /** * Sets whether to harvest unprocessed. * * @param harvestUnprocessed the harvestUnprocessed to set * @deprecated use builder */ @Deprecated public void setHarvestUnprocessed(boolean harvestUnprocessed) { this.harvestUnprocessed = harvestUnprocessed; } /** * Sets whether this is harvest for view. * * @param harvestForView the harvestForView to set * @deprecated use builder */ @Deprecated public void setHarvestForView(boolean harvestForView) { this.harvestForView = harvestForView; } /** * Returns the underlying local source. * * @return the local * @deprecated use getBaseSource */ @Deprecated public String getLocal() { return baseSourceName; } /** * You should use the builder to set a {@link BoxClient}. This is here for the rare occasion where * an auto-instantiated client will not work and must be replaced post initialization. * * @param boxClient the boxClient to set */ public void setBoxClient(BoxClient boxClient) { this.boxClient = boxClient; } /** * Returns the source. * * @return the source * @deprecated source should be retrieved through the Box instance */ @Deprecated public Source getSource() { return source; } private class InfoQueryResult extends QueryResult { private static final long serialVersionUID = 1L; private boolean more; private InfoQueryResult(QueryResult result) { super(result); setNextCursor(result.getNextCursor()); more = !result.isEmpty(); } } public abstract static class ViewBuilder> { public B baseUri(URI baseUri) { this.baseUri = baseUri; return self(); } public B baseUri(String baseUri) { return baseUri(URI.create(baseUri)); } private B source(Source source) { return self(); } @SuppressWarnings("unused") private B facetGroups(Set facetGroups) { return self(); } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy