All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.byu.hbll.box.BoxDocument Maven / Gradle / Ivy

package edu.byu.hbll.box;

import com.fasterxml.jackson.core.JsonGenerator;
import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.DeserializationContext;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.SerializerProvider;
import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
import com.fasterxml.jackson.databind.annotation.JsonSerialize;
import com.fasterxml.jackson.databind.deser.std.StdDeserializer;
import com.fasterxml.jackson.databind.node.JsonNodeFactory;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.fasterxml.jackson.databind.ser.std.StdSerializer;
import edu.byu.hbll.box.BoxDocument.Deserializer;
import edu.byu.hbll.box.BoxDocument.Serializer;
import edu.byu.hbll.box.internal.util.BoxUtils;
import edu.byu.hbll.box.internal.util.JsonUtils;
import edu.byu.hbll.json.JsonField;
import edu.byu.hbll.json.ObjectMapperFactory;
import edu.byu.hbll.json.UncheckedObjectMapper;
import java.io.IOException;
import java.io.InputStream;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.time.Instant;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
import lombok.AllArgsConstructor;
import lombok.Data;

/**
 * Represents a document in box including metadata and dependencies. This object contains an
 * internal document in the form of a {@link ObjectNode}. This internal document is the data of this
 * object and all other fields are metadata.
 *
 * @author Charles Draper
 */
@JsonDeserialize(using = Deserializer.class)
@JsonSerialize(using = Serializer.class)
public class BoxDocument {

  private static final UncheckedObjectMapper mapper = ObjectMapperFactory.newUnchecked();

  private String id;

  private Status status = Status.UNPROCESSED;

  private ObjectNode document = JsonNodeFactory.instance.objectNode();

  private Optional cursor = Optional.empty();

  private Optional modified = Optional.empty();

  private Optional processed = Optional.empty();

  private Optional message = Optional.empty();

  private Set facets = new LinkedHashSet<>();

  private Set dependencies = new LinkedHashSet<>();

  private Optional groupId = Optional.empty();

  private boolean statusSet;

  /** Creates a new empty document. */
  protected BoxDocument() {}

  /**
   * Creates a new document initially in an UNPROCESSED state.
   *
   * @param id the unique id of the document
   */
  public BoxDocument(String id) {
    this.id = Objects.requireNonNull(id);
  }

  /**
   * Creates a new document with the given state.
   *
   * @param id the unique id of the document
   * @param status the status of the document
   */
  public BoxDocument(String id, Status status) {
    this.id = Objects.requireNonNull(id);
    this.status = status;
    this.statusSet = true;
  }

  /**
   * Creates a new document initially in a READY state with the given internal document.
   *
   * @param id the unique id of the document
   * @param document the processed document
   */
  public BoxDocument(String id, ObjectNode document) {
    this.id = Objects.requireNonNull(id);
    this.document = Objects.requireNonNull(document);
    this.status = Status.READY;
  }

  /**
   * Copy Constructor.
   *
   * @param boxDocument the box document to copy
   */
  public BoxDocument(BoxDocument boxDocument) {
    this.id = boxDocument.id;
    this.status = boxDocument.status;
    this.document = boxDocument.document.deepCopy();
    this.cursor = boxDocument.cursor;
    this.modified = boxDocument.modified;
    this.processed = boxDocument.processed;
    this.message = boxDocument.message;
    this.facets.addAll(boxDocument.facets);
    this.dependencies.addAll(boxDocument.dependencies);
    this.groupId = boxDocument.groupId;
    this.statusSet = boxDocument.statusSet;
  }

  /**
   * Serializes to an {@link ObjectNode}.
   *
   * @return a json representation of this document
   */
  public ObjectNode toJson() {
    return toJson(List.of());
  }

  /**
   * Returns the {@link ObjectNode} representation of this document with the given field projection
   * applied. Calling this method with an empty projection returns the entire document.
   *
   * 

Note: projection follows MongoDB's projection pattern * (https://docs.mongodb.com/manual/tutorial/project-fields-from-query-results/). The projection * is only applied to child nodes of objects. Arrays or nested arrays are iterated over and * projection picks up again when objects are found. Primitive values that are not part of a * terminal projection are removed. * * @param fields collection of dot-notated fields representing the projection * @return the node representation of this document with projection applied. */ public ObjectNode toJson(Collection fields) { if (fields == null || fields.isEmpty()) { return mapper.valueToTree(this); } ObjectNode document = mapper.createObjectNode(); document.set("@doc", this.document); document.set("@box", metadataToJson()); ObjectNode projectedDocument = (ObjectNode) JsonUtils.project(document, BoxUtils.canonicalizeFields(fields)); if (projectedDocument.has("@doc")) { projectedDocument.setAll((ObjectNode) projectedDocument.remove("@doc")); } if (projectedDocument.has("@box")) { // puts @box back at the bottom projectedDocument.set("@box", projectedDocument.remove("@box")); } return projectedDocument; } /** * Creates a new box document from the given json. * * @param json the json to parse * @return the parsed box document */ public static BoxDocument parse(InputStream json) { return parse((ObjectNode) mapper.readTree(json), false); } /** * Creates a new box document from the given json. * * @param json the json to parse * @return the parsed box document */ public static BoxDocument parse(String json) { return parse((ObjectNode) mapper.readTree(json), false); } /** * Creates a new box document from the given json. * * @param json the json to parse * @return the parsed box document */ public static BoxDocument parse(ObjectNode json) { return parse(json, true); } /** * Creates a new box document from the given json. * * @param json the json to parse * @param copy whether or not to make a copy of the object node * @return the parsed box document */ private static BoxDocument parse(ObjectNode json, boolean copy) { if (!json.path("@box").has("id")) { throw new IllegalArgumentException("Missing required field: @box.id"); } ObjectNode document = copy ? json.deepCopy() : json; ObjectNode metadata = (ObjectNode) document.remove("@box"); String id = metadata.path("id").asText(); BoxDocument boxDocument = new BoxDocument(id); boxDocument.document = document; if (metadata.has("status")) { boxDocument.status = Status.valueOf(metadata.path("status").asText().toUpperCase()); boxDocument.statusSet = true; } else if (document.size() > 0) { boxDocument.status = Status.READY; } if (metadata.has("cursor")) { boxDocument.cursor = Optional.ofNullable(metadata.path("cursor").asLong()); } if (metadata.has("modified")) { boxDocument.modified = Optional.ofNullable(Instant.parse(metadata.path("modified").asText())); } if (metadata.has("processed")) { boxDocument.processed = Optional.ofNullable(Instant.parse(metadata.path("processed").asText())); } boxDocument.message = Optional.ofNullable(metadata.path("message").asText(null)); boxDocument.groupId = Optional.ofNullable(metadata.path("groupId").asText(null)); for (JsonNode facet : metadata.path("facets")) { boxDocument.addFacets(facet.path("name").asText(null), facet.path("value").asText(null)); } for (JsonNode dep : metadata.path("dependencies")) { boxDocument.addDependency(dep.path("sourceName").asText(null), dep.path("id").asText(null)); } return boxDocument; } /** * Hashes the document using SHA-256 including metadata, but excluding all volatile metadata * fields (ie, cursor, modified, and processed). * * @return the digest */ public byte[] hash() { MessageDigest md; try { md = MessageDigest.getInstance("SHA-256"); } catch (NoSuchAlgorithmException e) { throw new InternalError(e); } md.update(mapper.writeValueAsBytes(new NonVolatileBoxDocument(this))); return md.digest(); } /** * Whether or not this document's dependencies differ from the give document's. * * @param o the document to compare * @return whether or not the dependencies differ */ public boolean hasDifferentDependencies(BoxDocument o) { return !Objects.equals(dependencies, o.dependencies); } /** * Whether or not this document's processing has completed. That is whether the document is in a * READY or DELETED state. * * @return if in a ready or deleted state */ public boolean isProcessed() { return status == Status.READY || status == Status.DELETED; } /** * Whether or not this document is in the UNPROCESSED state. * * @return if in the UNPROCESSED state * @deprecated use {@link #isUnprocessed()} instead */ @Deprecated public boolean isUnProcessed() { return status == Status.UNPROCESSED; } /** * Whether or not this document is in the UNPROCESSED state. * * @return if in the UNPROCESSED state */ public boolean isUnprocessed() { return status == Status.UNPROCESSED; } /** * Whether or not this document is in the DELETED state. * * @return if in the DELETED state */ public boolean isDeleted() { return status == Status.DELETED; } /** * Whether or not this document is in the ERROR state. * * @return if in the ERROR state */ public boolean isError() { return status == Status.ERROR; } /** * Whether or not this document is in the READY state. * * @return if in the READY state */ public boolean isReady() { return status == Status.READY; } /** * Determines if this document matches (or should be included in a result set) given the supplied * facets. In order to match, the document must have at least one facet from each facet group * represented in the supplied facets. If the supplied facet list is empty, the document will * match. * * @param facets the facets to test the document against * @return whether or not this document matches the given facets */ public boolean matches(Collection facets) { Map> facetMap = Facet.group(facets); for (String group : facetMap.keySet()) { if (Collections.disjoint(this.facets, facetMap.get(group))) { return false; } } return true; } /** * Explicitly sets the status to deleted. * * @return this */ public BoxDocument setAsDeleted() { this.status = Status.DELETED; this.statusSet = true; return this; } /** * Explicitly sets the status to unprocessed. * * @return this */ public BoxDocument setAsUnprocessed() { this.status = Status.UNPROCESSED; this.statusSet = true; return this; } /** * Explicitly sets the status to ready. * * @return this */ public BoxDocument setAsReady() { this.status = Status.READY; this.statusSet = true; return this; } /** * Explicitly sets the status to error. * * @return this */ public BoxDocument setAsError() { this.status = Status.ERROR; this.statusSet = true; return this; } /** * Explicitly sets the status to error and includes an error message. * * @param message the error message * @return this */ public BoxDocument setAsError(String message) { this.status = Status.ERROR; this.message = Optional.ofNullable(message); this.statusSet = true; return this; } /** * Adds a dependency for this document. * * @param sourceName the sourceName of the dependency to add. * @param id the id of the dependency to add. * @return this */ public BoxDocument addDependency(String sourceName, String id) { this.dependencies.add(new DocumentId(sourceName, id)); return this; } /** * Adds dependencies for this document. * * @param dependencies the dependencies to add. * @return this */ public BoxDocument addDependencies(DocumentId... dependencies) { this.dependencies.addAll(Arrays.asList(dependencies)); return this; } /** * Adds dependencies for this document. * * @param dependencies the dependencies to add. * @return this */ public BoxDocument addDependencies(Collection dependencies) { this.dependencies.addAll(dependencies); return this; } /** * Clears dependencies and then adds these dependencies for this document. * * @param dependencies the dependencies to add. * @return this */ public BoxDocument setDependencies(Collection dependencies) { clearDependencies(); addDependencies(dependencies); return this; } /** * Adds a facet to this document. * * @param name name of the facet group * @param value value of the facet * @return this */ public BoxDocument addFacet(String name, String value) { return addFacets(name, Arrays.asList(value)); } /** * Adds multiple facets to the document. * * @param name name of the facet group * @param values value(s) of the facet * @return this */ public BoxDocument addFacets(String name, String... values) { return addFacets(name, Arrays.asList(values)); } /** * Adds multiple facets to the document. * * @param name name of the facet group * @param values values of the facet * @return this */ public BoxDocument addFacets(String name, Collection values) { values.forEach(v -> facets.add(new Facet(name, v))); return this; } /** * Adds multiple facets to the document. * * @param facets the facets to add * @return this */ public BoxDocument addFacets(Facet... facets) { this.facets.addAll(Arrays.asList(facets)); return this; } /** * Adds multiple facets to the document. * * @param facets the facets to add * @return this */ public BoxDocument addFacets(Collection facets) { this.facets.addAll(facets); return this; } /** * Clears existing facets and then adds these facets to the document. * * @param facets the facets to add * @return this */ public BoxDocument setFacets(Collection facets) { clearFacets(); addFacets(facets); return this; } /** * Adds a facet by querying the internal document using the given path. * * @param name name of facet * @param path path to field value in dot notation * @return this */ public BoxDocument addFacetsByQuery(String name, String path) { if (document != null) { String[] splitPath = path.trim().split("\\s*\\.\\s*"); addFacetsByQuery(name, document, splitPath); } return this; } /** * Adds facets by querying the internal document using the given paths. * * @param paths the paths to the field values in dot notation, key is name of facet, value is set * of paths * @return this */ public BoxDocument addFacetsByQuery(Map> paths) { if (document != null) { for (String name : paths.keySet()) { for (String path : paths.get(name)) { addFacetsByQuery(name, path); } } } return this; } /** * Adds facets by querying the internal document. * * @param name name of the facet * @param fieldPath field where the value is * @param node the node to be queried */ private void addFacetsByQuery(String name, JsonNode node, String[] path) { if (path.length == 0) { if (node.isValueNode()) { this.facets.add(new Facet(name, node.asText())); } } else if (node.isArray()) { for (JsonNode element : node) { addFacetsByQuery(name, element, path); } } else if (node.isObject()) { addFacetsByQuery(name, node.path(path[0]), Arrays.copyOfRange(path, 1, path.length)); } } /** * Returns the unique id of the document. * * @return the unique id of the document */ public String getId() { return id; } /** * Set the unique id of the document. * * @param id the id of the document * @return this */ public BoxDocument setId(String id) { this.id = Objects.requireNonNull(id); return this; } /** * Returns the internal document. Initialized to an empty object. * * @return the internal document */ public ObjectNode getDocument() { return document; } /** * Sets the internal document and modifies the status to READY if state not explicitly set * elsewhere. * * @param document the document to set * @return this */ public BoxDocument setDocument(ObjectNode document) { this.document = Objects.requireNonNull(document); if (!statusSet) { this.status = Status.READY; } return this; } /** * Sets the status to READY if state not explicitly set elsewhere and returns the internal * document. The internal documents is initially empty if not already set elsewhere. * * @return internal document */ public ObjectNode withDocument() { if (!statusSet) { this.status = Status.READY; } return this.document; } /** * Returns the cursor. * * @return the cursor */ public Optional getCursor() { return cursor; } /** * Sets the cursor. * *

When processing a document, this should only be updated if the box document actually changed * since last time. This should generally be left blank because Box will make that determination * and set it appropriately when saving. If it's not blank, Box will honor the set value. * * @param cursor the cursor to set * @return this */ public BoxDocument setCursor(Long cursor) { this.cursor = Optional.ofNullable(cursor); return this; } /** * Returns the modified date. * * @return when the document was last modified */ public Optional getModified() { return modified; } /** * Sets the modified date. * *

When processing a document, this should only be updated if the box document actually changed * since last time. This should generally be left blank because Box will make that determination * and set it appropriately when saving. If it's not blank, Box will honor the set value. * * @param modified the modified to set * @return this */ public BoxDocument setModified(Instant modified) { this.modified = Optional.ofNullable(modified); return this; } /** * Returns the processed date. * * @return the processed */ public Optional getProcessed() { return processed; } /** * Sets the processed date. * *

When processing a document, this can be left blank because Box will set it appropriately * when saving. If it's not blank, Box will honor the set value. * * @param processed the processed to set * @return this */ public BoxDocument setProcessed(Instant processed) { this.processed = Optional.ofNullable(processed); return this; } /** * Return the error message. * * @return the error */ public Optional getMessage() { return message; } /** * Sets the message if an error occurred. * * @param message the message to set if an error occurred * @return this */ public BoxDocument setMessage(String message) { this.message = Optional.ofNullable(message); return this; } /** * Returns the facets. * * @return the facets */ public Set getFacets() { return facets; } /** * Returns all values for the given facet. An empty set is returned if no facet of the given name * exists. * * @param facetName the facet name * @return the facet values */ public Set getFacetValues(String facetName) { return Collections.unmodifiableSet( (Set) facets.stream() .filter(f -> f.getName().equals(facetName)) .map(f -> f.getValue()) .collect(Collectors.toCollection(LinkedHashSet::new))); } /** * Returns the first value for the given facet. * * @param facetName the facet name * @return the first facet value */ public Optional getFacetValue(String facetName) { return facets.stream() .filter(f -> f.getName().equals(facetName)) .map(f -> f.getValue()) .findFirst(); } /** * Returns the dependencies. * * @return the dependencies */ public Set getDependencies() { return dependencies; } /** * Returns the groupId. * * @return the groupId */ public Optional getGroupId() { return groupId; } /** * Sets this document's group (used for orphan cleanup). * * @param groupId the groupId to set * @return this */ public BoxDocument setGroupId(String groupId) { this.groupId = Optional.ofNullable(groupId); return this; } /** * Returns the status. * * @return the status */ public Status getStatus() { return status; } /** * Explicitly sets the status. * * @param status the status to set * @return this */ public BoxDocument setStatus(Status status) { Objects.requireNonNull(status); this.status = status; this.statusSet = true; return this; } /** * Clears the set of facets. * * @return this */ public BoxDocument clearFacets() { this.facets = new LinkedHashSet<>(); return this; } /** * Clears the set of dependencies. * * @return this */ public BoxDocument clearDependencies() { this.dependencies = new LinkedHashSet<>(); return this; } @Override public int hashCode() { return Arrays.hashCode(hash()); } /** * {@inheritDoc} * *

This method follows the same comparison Box uses internally to determine if the document has * been modified. All fields are compared except for the volatile fields cursor, modified, and * processed. The two documents must serialize the same in order for them to be considered equal. */ @Override public boolean equals(Object o) { if (this == o) { return true; } if (o == null) { return false; } if (getClass() != o.getClass()) { return false; } return Arrays.equals(((BoxDocument) o).hash(), hash()); } @Override public String toString() { return mapper.writeValueAsString(this); } /** * Returns the string representation of this document with the given field projection applied. * Calling this method with an empty projection returns the entire document. * *

Note: projection follows MongoDB's projection pattern * (https://docs.mongodb.com/manual/tutorial/project-fields-from-query-results/). The projection * is only applied to child nodes of objects. Arrays or nested arrays are iterated over and * projection picks up again when objects are found. Primitive values that are not part of a * terminal projection are removed. * * @param fields collection of dot-notated fields representing the projection * @return the string representation of this document with projection applied. */ public String toString(Collection fields) { return mapper.writeValueAsString(toJson(fields)); } /** * Serializes the metadata to an {@link ObjectNode}. * * @return the serialized metadata */ private ObjectNode metadataToJson() { return metadataToJson(false); } /** * Serializes the metadata to an {@link ObjectNode} excluding volatile fields if indicated. * * @param excludeVolatile excludes the cursor, modified, and processed * @return the serialized metadata */ private ObjectNode metadataToJson(boolean excludeVolatile) { ObjectNode metadata = mapper.createObjectNode(); metadata.put("id", id); metadata.put("status", status.name()); if (!excludeVolatile) { if (cursor.isPresent()) { metadata.put("cursor", cursor.get() + ""); } if (modified.isPresent()) { metadata.put("modified", modified.get().toString()); } if (processed.isPresent()) { metadata.put("processed", processed.get().toString()); } } if (message.isPresent()) { metadata.put("message", message.get()); } if (!facets.isEmpty()) { for (Facet facet : facets) { metadata .withArray("facets") .addObject() .put("name", facet.getName()) .put("value", facet.getValue()); } } if (!dependencies.isEmpty()) { for (DocumentId dep : dependencies) { metadata .withArray("dependencies") .addObject() .put("sourceName", dep.getSourceName()) .put("id", dep.getId()); } } if (groupId.isPresent()) { metadata.put("groupId", groupId.get()); } return metadata; } /** * Status of the document. * * @author Charles Draper */ public static enum Status { /** The document is new and has not yet been processed. */ UNPROCESSED, /** The document has been processed and is ready to use. */ READY, /** The document has been deleted. */ DELETED, /** There was an error in the processing. */ ERROR } /** * Deserializer of BoxDocument for Jackson. * * @author Charles Draper */ public static class Deserializer extends StdDeserializer { private static final long serialVersionUID = 1L; /** Creates a new jackson Deserializer. */ public Deserializer() { this(null); } /** * Creates a new jackson Deserializer for a box document. * * @param vc the {@link BoxDocument} class */ public Deserializer(Class vc) { super(vc); } @Override public BoxDocument deserialize(JsonParser parser, DeserializationContext context) throws IOException, JsonProcessingException { return BoxDocument.parse((ObjectNode) parser.readValueAsTree(), false); } } /** * Serializer of BoxDocument for Jackson. * * @author Charles Draper */ public static class Serializer extends StdSerializer { private static final long serialVersionUID = 1L; /** Creates a new jackson Serializer. */ public Serializer() { this(null); } /** * Creates a new jackson Serializer for a box document. * * @param t the {@link BoxDocument} class */ public Serializer(Class t) { super(t); } @Override public void serialize(BoxDocument doc, JsonGenerator gen, SerializerProvider provider) throws IOException, JsonProcessingException { write(doc, gen, false); } private static void write(BoxDocument doc, JsonGenerator gen, boolean excludeVolatile) throws IOException, JsonProcessingException { gen.writeStartObject(); for (JsonField child : new JsonField(doc.getDocument())) { gen.writeObjectField(child.getKey(), child.getValue()); } gen.writeObjectField("@box", doc.metadataToJson(excludeVolatile)); gen.writeEndObject(); } } @Data @AllArgsConstructor @JsonSerialize(using = NonVolatileSerializer.class) private static class NonVolatileBoxDocument { private BoxDocument document; } private static class NonVolatileSerializer extends StdSerializer { private static final long serialVersionUID = 1L; @SuppressWarnings("unused") public NonVolatileSerializer() { this(null); } public NonVolatileSerializer(Class t) { super(t); } @Override public void serialize( NonVolatileBoxDocument doc, JsonGenerator gen, SerializerProvider provider) throws IOException, JsonProcessingException { Serializer.write(doc.getDocument(), gen, true); } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy