edu.byu.hbll.box.BoxDocument Maven / Gradle / Ivy
package edu.byu.hbll.box;
import com.fasterxml.jackson.core.JsonGenerator;
import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.DeserializationContext;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.SerializerProvider;
import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
import com.fasterxml.jackson.databind.annotation.JsonSerialize;
import com.fasterxml.jackson.databind.deser.std.StdDeserializer;
import com.fasterxml.jackson.databind.node.JsonNodeFactory;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.fasterxml.jackson.databind.ser.std.StdSerializer;
import edu.byu.hbll.box.BoxDocument.Deserializer;
import edu.byu.hbll.box.BoxDocument.Serializer;
import edu.byu.hbll.box.internal.util.BoxUtils;
import edu.byu.hbll.box.internal.util.JsonUtils;
import edu.byu.hbll.json.JsonField;
import edu.byu.hbll.json.ObjectMapperFactory;
import edu.byu.hbll.json.UncheckedObjectMapper;
import java.io.IOException;
import java.io.InputStream;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.time.Instant;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
import lombok.AllArgsConstructor;
import lombok.Data;
/**
* Represents a document in box including metadata and dependencies. This object contains an
* internal document in the form of a {@link ObjectNode}. This internal document is the data of this
* object and all other fields are metadata.
*
* @author Charles Draper
*/
@JsonDeserialize(using = Deserializer.class)
@JsonSerialize(using = Serializer.class)
public class BoxDocument {
private static final UncheckedObjectMapper mapper = ObjectMapperFactory.newUnchecked();
private String id;
private Status status = Status.UNPROCESSED;
private ObjectNode document = JsonNodeFactory.instance.objectNode();
private Optional cursor = Optional.empty();
private Optional modified = Optional.empty();
private Optional processed = Optional.empty();
private Optional message = Optional.empty();
private Set facets = new LinkedHashSet<>();
private Set dependencies = new LinkedHashSet<>();
private Optional groupId = Optional.empty();
private boolean statusSet;
/** Creates a new empty document. */
protected BoxDocument() {}
/**
* Creates a new document initially in an UNPROCESSED state.
*
* @param id the unique id of the document
*/
public BoxDocument(String id) {
this.id = Objects.requireNonNull(id);
}
/**
* Creates a new document with the given state.
*
* @param id the unique id of the document
* @param status the status of the document
*/
public BoxDocument(String id, Status status) {
this.id = Objects.requireNonNull(id);
this.status = status;
this.statusSet = true;
}
/**
* Creates a new document initially in a READY state with the given internal document.
*
* @param id the unique id of the document
* @param document the processed document
*/
public BoxDocument(String id, ObjectNode document) {
this.id = Objects.requireNonNull(id);
this.document = Objects.requireNonNull(document);
this.status = Status.READY;
}
/**
* Copy Constructor.
*
* @param boxDocument the box document to copy
*/
public BoxDocument(BoxDocument boxDocument) {
this.id = boxDocument.id;
this.status = boxDocument.status;
this.document = boxDocument.document.deepCopy();
this.cursor = boxDocument.cursor;
this.modified = boxDocument.modified;
this.processed = boxDocument.processed;
this.message = boxDocument.message;
this.facets.addAll(boxDocument.facets);
this.dependencies.addAll(boxDocument.dependencies);
this.groupId = boxDocument.groupId;
this.statusSet = boxDocument.statusSet;
}
/**
* Serializes to an {@link ObjectNode}.
*
* @return a json representation of this document
*/
public ObjectNode toJson() {
return toJson(List.of());
}
/**
* Returns the {@link ObjectNode} representation of this document with the given field projection
* applied. Calling this method with an empty projection returns the entire document.
*
* Note: projection follows MongoDB's projection pattern
* (https://docs.mongodb.com/manual/tutorial/project-fields-from-query-results/). The projection
* is only applied to child nodes of objects. Arrays or nested arrays are iterated over and
* projection picks up again when objects are found. Primitive values that are not part of a
* terminal projection are removed.
*
* @param fields collection of dot-notated fields representing the projection
* @return the node representation of this document with projection applied.
*/
public ObjectNode toJson(Collection fields) {
if (fields == null || fields.isEmpty()) {
return mapper.valueToTree(this);
}
ObjectNode document = mapper.createObjectNode();
document.set("@doc", this.document);
document.set("@box", metadataToJson());
ObjectNode projectedDocument =
(ObjectNode) JsonUtils.project(document, BoxUtils.canonicalizeFields(fields));
if (projectedDocument.has("@doc")) {
projectedDocument.setAll((ObjectNode) projectedDocument.remove("@doc"));
}
if (projectedDocument.has("@box")) {
// puts @box back at the bottom
projectedDocument.set("@box", projectedDocument.remove("@box"));
}
return projectedDocument;
}
/**
* Creates a new box document from the given json.
*
* @param json the json to parse
* @return the parsed box document
*/
public static BoxDocument parse(InputStream json) {
return parse((ObjectNode) mapper.readTree(json), false);
}
/**
* Creates a new box document from the given json.
*
* @param json the json to parse
* @return the parsed box document
*/
public static BoxDocument parse(String json) {
return parse((ObjectNode) mapper.readTree(json), false);
}
/**
* Creates a new box document from the given json.
*
* @param json the json to parse
* @return the parsed box document
*/
public static BoxDocument parse(ObjectNode json) {
return parse(json, true);
}
/**
* Creates a new box document from the given json.
*
* @param json the json to parse
* @param copy whether or not to make a copy of the object node
* @return the parsed box document
*/
private static BoxDocument parse(ObjectNode json, boolean copy) {
if (!json.path("@box").has("id")) {
throw new IllegalArgumentException("Missing required field: @box.id");
}
ObjectNode document = copy ? json.deepCopy() : json;
ObjectNode metadata = (ObjectNode) document.remove("@box");
String id = metadata.path("id").asText();
BoxDocument boxDocument = new BoxDocument(id);
boxDocument.document = document;
if (metadata.has("status")) {
boxDocument.status = Status.valueOf(metadata.path("status").asText().toUpperCase());
boxDocument.statusSet = true;
} else if (document.size() > 0) {
boxDocument.status = Status.READY;
}
if (metadata.has("cursor")) {
boxDocument.cursor = Optional.ofNullable(metadata.path("cursor").asLong());
}
if (metadata.has("modified")) {
boxDocument.modified = Optional.ofNullable(Instant.parse(metadata.path("modified").asText()));
}
if (metadata.has("processed")) {
boxDocument.processed =
Optional.ofNullable(Instant.parse(metadata.path("processed").asText()));
}
boxDocument.message = Optional.ofNullable(metadata.path("message").asText(null));
boxDocument.groupId = Optional.ofNullable(metadata.path("groupId").asText(null));
for (JsonNode facet : metadata.path("facets")) {
boxDocument.addFacets(facet.path("name").asText(null), facet.path("value").asText(null));
}
for (JsonNode dep : metadata.path("dependencies")) {
boxDocument.addDependency(dep.path("sourceName").asText(null), dep.path("id").asText(null));
}
return boxDocument;
}
/**
* Hashes the document using SHA-256 including metadata, but excluding all volatile metadata
* fields (ie, cursor, modified, and processed).
*
* @return the digest
*/
public byte[] hash() {
MessageDigest md;
try {
md = MessageDigest.getInstance("SHA-256");
} catch (NoSuchAlgorithmException e) {
throw new InternalError(e);
}
md.update(mapper.writeValueAsBytes(new NonVolatileBoxDocument(this)));
return md.digest();
}
/**
* Whether or not this document's dependencies differ from the give document's.
*
* @param o the document to compare
* @return whether or not the dependencies differ
*/
public boolean hasDifferentDependencies(BoxDocument o) {
return !Objects.equals(dependencies, o.dependencies);
}
/**
* Whether or not this document's processing has completed. That is whether the document is in a
* READY or DELETED state.
*
* @return if in a ready or deleted state
*/
public boolean isProcessed() {
return status == Status.READY || status == Status.DELETED;
}
/**
* Whether or not this document is in the UNPROCESSED state.
*
* @return if in the UNPROCESSED state
* @deprecated use {@link #isUnprocessed()} instead
*/
@Deprecated
public boolean isUnProcessed() {
return status == Status.UNPROCESSED;
}
/**
* Whether or not this document is in the UNPROCESSED state.
*
* @return if in the UNPROCESSED state
*/
public boolean isUnprocessed() {
return status == Status.UNPROCESSED;
}
/**
* Whether or not this document is in the DELETED state.
*
* @return if in the DELETED state
*/
public boolean isDeleted() {
return status == Status.DELETED;
}
/**
* Whether or not this document is in the ERROR state.
*
* @return if in the ERROR state
*/
public boolean isError() {
return status == Status.ERROR;
}
/**
* Whether or not this document is in the READY state.
*
* @return if in the READY state
*/
public boolean isReady() {
return status == Status.READY;
}
/**
* Determines if this document matches (or should be included in a result set) given the supplied
* facets. In order to match, the document must have at least one facet from each facet group
* represented in the supplied facets. If the supplied facet list is empty, the document will
* match.
*
* @param facets the facets to test the document against
* @return whether or not this document matches the given facets
*/
public boolean matches(Collection facets) {
Map> facetMap = Facet.group(facets);
for (String group : facetMap.keySet()) {
if (Collections.disjoint(this.facets, facetMap.get(group))) {
return false;
}
}
return true;
}
/**
* Explicitly sets the status to deleted.
*
* @return this
*/
public BoxDocument setAsDeleted() {
this.status = Status.DELETED;
this.statusSet = true;
return this;
}
/**
* Explicitly sets the status to unprocessed.
*
* @return this
*/
public BoxDocument setAsUnprocessed() {
this.status = Status.UNPROCESSED;
this.statusSet = true;
return this;
}
/**
* Explicitly sets the status to ready.
*
* @return this
*/
public BoxDocument setAsReady() {
this.status = Status.READY;
this.statusSet = true;
return this;
}
/**
* Explicitly sets the status to error.
*
* @return this
*/
public BoxDocument setAsError() {
this.status = Status.ERROR;
this.statusSet = true;
return this;
}
/**
* Explicitly sets the status to error and includes an error message.
*
* @param message the error message
* @return this
*/
public BoxDocument setAsError(String message) {
this.status = Status.ERROR;
this.message = Optional.ofNullable(message);
this.statusSet = true;
return this;
}
/**
* Adds a dependency for this document.
*
* @param sourceName the sourceName of the dependency to add.
* @param id the id of the dependency to add.
* @return this
*/
public BoxDocument addDependency(String sourceName, String id) {
this.dependencies.add(new DocumentId(sourceName, id));
return this;
}
/**
* Adds dependencies for this document.
*
* @param dependencies the dependencies to add.
* @return this
*/
public BoxDocument addDependencies(DocumentId... dependencies) {
this.dependencies.addAll(Arrays.asList(dependencies));
return this;
}
/**
* Adds dependencies for this document.
*
* @param dependencies the dependencies to add.
* @return this
*/
public BoxDocument addDependencies(Collection extends DocumentId> dependencies) {
this.dependencies.addAll(dependencies);
return this;
}
/**
* Clears dependencies and then adds these dependencies for this document.
*
* @param dependencies the dependencies to add.
* @return this
*/
public BoxDocument setDependencies(Collection extends DocumentId> dependencies) {
clearDependencies();
addDependencies(dependencies);
return this;
}
/**
* Adds a facet to this document.
*
* @param name name of the facet group
* @param value value of the facet
* @return this
*/
public BoxDocument addFacet(String name, String value) {
return addFacets(name, Arrays.asList(value));
}
/**
* Adds multiple facets to the document.
*
* @param name name of the facet group
* @param values value(s) of the facet
* @return this
*/
public BoxDocument addFacets(String name, String... values) {
return addFacets(name, Arrays.asList(values));
}
/**
* Adds multiple facets to the document.
*
* @param name name of the facet group
* @param values values of the facet
* @return this
*/
public BoxDocument addFacets(String name, Collection values) {
values.forEach(v -> facets.add(new Facet(name, v)));
return this;
}
/**
* Adds multiple facets to the document.
*
* @param facets the facets to add
* @return this
*/
public BoxDocument addFacets(Facet... facets) {
this.facets.addAll(Arrays.asList(facets));
return this;
}
/**
* Adds multiple facets to the document.
*
* @param facets the facets to add
* @return this
*/
public BoxDocument addFacets(Collection extends Facet> facets) {
this.facets.addAll(facets);
return this;
}
/**
* Clears existing facets and then adds these facets to the document.
*
* @param facets the facets to add
* @return this
*/
public BoxDocument setFacets(Collection extends Facet> facets) {
clearFacets();
addFacets(facets);
return this;
}
/**
* Adds a facet by querying the internal document using the given path.
*
* @param name name of facet
* @param path path to field value in dot notation
* @return this
*/
public BoxDocument addFacetsByQuery(String name, String path) {
if (document != null) {
String[] splitPath = path.trim().split("\\s*\\.\\s*");
addFacetsByQuery(name, document, splitPath);
}
return this;
}
/**
* Adds facets by querying the internal document using the given paths.
*
* @param paths the paths to the field values in dot notation, key is name of facet, value is set
* of paths
* @return this
*/
public BoxDocument addFacetsByQuery(Map> paths) {
if (document != null) {
for (String name : paths.keySet()) {
for (String path : paths.get(name)) {
addFacetsByQuery(name, path);
}
}
}
return this;
}
/**
* Adds facets by querying the internal document.
*
* @param name name of the facet
* @param fieldPath field where the value is
* @param node the node to be queried
*/
private void addFacetsByQuery(String name, JsonNode node, String[] path) {
if (path.length == 0) {
if (node.isValueNode()) {
this.facets.add(new Facet(name, node.asText()));
}
} else if (node.isArray()) {
for (JsonNode element : node) {
addFacetsByQuery(name, element, path);
}
} else if (node.isObject()) {
addFacetsByQuery(name, node.path(path[0]), Arrays.copyOfRange(path, 1, path.length));
}
}
/**
* Returns the unique id of the document.
*
* @return the unique id of the document
*/
public String getId() {
return id;
}
/**
* Set the unique id of the document.
*
* @param id the id of the document
* @return this
*/
public BoxDocument setId(String id) {
this.id = Objects.requireNonNull(id);
return this;
}
/**
* Returns the internal document. Initialized to an empty object.
*
* @return the internal document
*/
public ObjectNode getDocument() {
return document;
}
/**
* Sets the internal document and modifies the status to READY if state not explicitly set
* elsewhere.
*
* @param document the document to set
* @return this
*/
public BoxDocument setDocument(ObjectNode document) {
this.document = Objects.requireNonNull(document);
if (!statusSet) {
this.status = Status.READY;
}
return this;
}
/**
* Sets the status to READY if state not explicitly set elsewhere and returns the internal
* document. The internal documents is initially empty if not already set elsewhere.
*
* @return internal document
*/
public ObjectNode withDocument() {
if (!statusSet) {
this.status = Status.READY;
}
return this.document;
}
/**
* Returns the cursor.
*
* @return the cursor
*/
public Optional getCursor() {
return cursor;
}
/**
* Sets the cursor.
*
* When processing a document, this should only be updated if the box document actually changed
* since last time. This should generally be left blank because Box will make that determination
* and set it appropriately when saving. If it's not blank, Box will honor the set value.
*
* @param cursor the cursor to set
* @return this
*/
public BoxDocument setCursor(Long cursor) {
this.cursor = Optional.ofNullable(cursor);
return this;
}
/**
* Returns the modified date.
*
* @return when the document was last modified
*/
public Optional getModified() {
return modified;
}
/**
* Sets the modified date.
*
* When processing a document, this should only be updated if the box document actually changed
* since last time. This should generally be left blank because Box will make that determination
* and set it appropriately when saving. If it's not blank, Box will honor the set value.
*
* @param modified the modified to set
* @return this
*/
public BoxDocument setModified(Instant modified) {
this.modified = Optional.ofNullable(modified);
return this;
}
/**
* Returns the processed date.
*
* @return the processed
*/
public Optional getProcessed() {
return processed;
}
/**
* Sets the processed date.
*
* When processing a document, this can be left blank because Box will set it appropriately
* when saving. If it's not blank, Box will honor the set value.
*
* @param processed the processed to set
* @return this
*/
public BoxDocument setProcessed(Instant processed) {
this.processed = Optional.ofNullable(processed);
return this;
}
/**
* Return the error message.
*
* @return the error
*/
public Optional getMessage() {
return message;
}
/**
* Sets the message if an error occurred.
*
* @param message the message to set if an error occurred
* @return this
*/
public BoxDocument setMessage(String message) {
this.message = Optional.ofNullable(message);
return this;
}
/**
* Returns the facets.
*
* @return the facets
*/
public Set getFacets() {
return facets;
}
/**
* Returns all values for the given facet. An empty set is returned if no facet of the given name
* exists.
*
* @param facetName the facet name
* @return the facet values
*/
public Set getFacetValues(String facetName) {
return Collections.unmodifiableSet(
(Set)
facets.stream()
.filter(f -> f.getName().equals(facetName))
.map(f -> f.getValue())
.collect(Collectors.toCollection(LinkedHashSet::new)));
}
/**
* Returns the first value for the given facet.
*
* @param facetName the facet name
* @return the first facet value
*/
public Optional getFacetValue(String facetName) {
return facets.stream()
.filter(f -> f.getName().equals(facetName))
.map(f -> f.getValue())
.findFirst();
}
/**
* Returns the dependencies.
*
* @return the dependencies
*/
public Set getDependencies() {
return dependencies;
}
/**
* Returns the groupId.
*
* @return the groupId
*/
public Optional getGroupId() {
return groupId;
}
/**
* Sets this document's group (used for orphan cleanup).
*
* @param groupId the groupId to set
* @return this
*/
public BoxDocument setGroupId(String groupId) {
this.groupId = Optional.ofNullable(groupId);
return this;
}
/**
* Returns the status.
*
* @return the status
*/
public Status getStatus() {
return status;
}
/**
* Explicitly sets the status.
*
* @param status the status to set
* @return this
*/
public BoxDocument setStatus(Status status) {
Objects.requireNonNull(status);
this.status = status;
this.statusSet = true;
return this;
}
/**
* Clears the set of facets.
*
* @return this
*/
public BoxDocument clearFacets() {
this.facets = new LinkedHashSet<>();
return this;
}
/**
* Clears the set of dependencies.
*
* @return this
*/
public BoxDocument clearDependencies() {
this.dependencies = new LinkedHashSet<>();
return this;
}
@Override
public int hashCode() {
return Arrays.hashCode(hash());
}
/**
* {@inheritDoc}
*
* This method follows the same comparison Box uses internally to determine if the document has
* been modified. All fields are compared except for the volatile fields cursor, modified, and
* processed. The two documents must serialize the same in order for them to be considered equal.
*/
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null) {
return false;
}
if (getClass() != o.getClass()) {
return false;
}
return Arrays.equals(((BoxDocument) o).hash(), hash());
}
@Override
public String toString() {
return mapper.writeValueAsString(this);
}
/**
* Returns the string representation of this document with the given field projection applied.
* Calling this method with an empty projection returns the entire document.
*
*
Note: projection follows MongoDB's projection pattern
* (https://docs.mongodb.com/manual/tutorial/project-fields-from-query-results/). The projection
* is only applied to child nodes of objects. Arrays or nested arrays are iterated over and
* projection picks up again when objects are found. Primitive values that are not part of a
* terminal projection are removed.
*
* @param fields collection of dot-notated fields representing the projection
* @return the string representation of this document with projection applied.
*/
public String toString(Collection fields) {
return mapper.writeValueAsString(toJson(fields));
}
/**
* Serializes the metadata to an {@link ObjectNode}.
*
* @return the serialized metadata
*/
private ObjectNode metadataToJson() {
return metadataToJson(false);
}
/**
* Serializes the metadata to an {@link ObjectNode} excluding volatile fields if indicated.
*
* @param excludeVolatile excludes the cursor, modified, and processed
* @return the serialized metadata
*/
private ObjectNode metadataToJson(boolean excludeVolatile) {
ObjectNode metadata = mapper.createObjectNode();
metadata.put("id", id);
metadata.put("status", status.name());
if (!excludeVolatile) {
if (cursor.isPresent()) {
metadata.put("cursor", cursor.get() + "");
}
if (modified.isPresent()) {
metadata.put("modified", modified.get().toString());
}
if (processed.isPresent()) {
metadata.put("processed", processed.get().toString());
}
}
if (message.isPresent()) {
metadata.put("message", message.get());
}
if (!facets.isEmpty()) {
for (Facet facet : facets) {
metadata
.withArray("facets")
.addObject()
.put("name", facet.getName())
.put("value", facet.getValue());
}
}
if (!dependencies.isEmpty()) {
for (DocumentId dep : dependencies) {
metadata
.withArray("dependencies")
.addObject()
.put("sourceName", dep.getSourceName())
.put("id", dep.getId());
}
}
if (groupId.isPresent()) {
metadata.put("groupId", groupId.get());
}
return metadata;
}
/**
* Status of the document.
*
* @author Charles Draper
*/
public static enum Status {
/** The document is new and has not yet been processed. */
UNPROCESSED,
/** The document has been processed and is ready to use. */
READY,
/** The document has been deleted. */
DELETED,
/** There was an error in the processing. */
ERROR
}
/**
* Deserializer of BoxDocument for Jackson.
*
* @author Charles Draper
*/
public static class Deserializer extends StdDeserializer {
private static final long serialVersionUID = 1L;
/** Creates a new jackson Deserializer. */
public Deserializer() {
this(null);
}
/**
* Creates a new jackson Deserializer for a box document.
*
* @param vc the {@link BoxDocument} class
*/
public Deserializer(Class> vc) {
super(vc);
}
@Override
public BoxDocument deserialize(JsonParser parser, DeserializationContext context)
throws IOException, JsonProcessingException {
return BoxDocument.parse((ObjectNode) parser.readValueAsTree(), false);
}
}
/**
* Serializer of BoxDocument for Jackson.
*
* @author Charles Draper
*/
public static class Serializer extends StdSerializer {
private static final long serialVersionUID = 1L;
/** Creates a new jackson Serializer. */
public Serializer() {
this(null);
}
/**
* Creates a new jackson Serializer for a box document.
*
* @param t the {@link BoxDocument} class
*/
public Serializer(Class t) {
super(t);
}
@Override
public void serialize(BoxDocument doc, JsonGenerator gen, SerializerProvider provider)
throws IOException, JsonProcessingException {
write(doc, gen, false);
}
private static void write(BoxDocument doc, JsonGenerator gen, boolean excludeVolatile)
throws IOException, JsonProcessingException {
gen.writeStartObject();
for (JsonField child : new JsonField(doc.getDocument())) {
gen.writeObjectField(child.getKey(), child.getValue());
}
gen.writeObjectField("@box", doc.metadataToJson(excludeVolatile));
gen.writeEndObject();
}
}
@Data
@AllArgsConstructor
@JsonSerialize(using = NonVolatileSerializer.class)
private static class NonVolatileBoxDocument {
private BoxDocument document;
}
private static class NonVolatileSerializer extends StdSerializer {
private static final long serialVersionUID = 1L;
@SuppressWarnings("unused")
public NonVolatileSerializer() {
this(null);
}
public NonVolatileSerializer(Class t) {
super(t);
}
@Override
public void serialize(
NonVolatileBoxDocument doc, JsonGenerator gen, SerializerProvider provider)
throws IOException, JsonProcessingException {
Serializer.write(doc.getDocument(), gen, true);
}
}
}