All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.byu.hbll.box.impl.FacetView Maven / Gradle / Ivy

There is a newer version: 2.5.3
Show newest version
package edu.byu.hbll.box.impl;

import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.node.ArrayNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
import edu.byu.hbll.box.BoxConfigurable;
import edu.byu.hbll.box.BoxDocument;
import edu.byu.hbll.box.BoxQuery;
import edu.byu.hbll.box.ConstructConfig;
import edu.byu.hbll.box.Facet;
import edu.byu.hbll.box.InitConfig;
import edu.byu.hbll.box.QueryResult;
import edu.byu.hbll.box.client.AbstractHttpBoxClient;
import edu.byu.hbll.box.internal.util.JsonUtils;
import java.util.Collections;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.function.Function;
import java.util.stream.Collectors;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.NoArgsConstructor;
import lombok.experimental.SuperBuilder;

/**
 * A view that translates a facet value into an id and includes all documents with that facet in the
 * final document.
 *
 * 

NOTE: when using the builder, you will need to make sure that @box.facets is * included in the field projection. * *

IMPORTANT: {@link FacetView} has some weaknesses. First, you cannot force the process of * upstream documents based on a facet. A facet is one-way. Documents get tagged with a facet once * they've been processed another way. Second, when a document drops its facet value recognized by * this view, it is impossible to know which group it once belonged to in order to signal the * reprocess of the group. Therefore it is good practice to occasionally reprocess documents * dependent on this view. Third, all documents corresponding to a facet id are brought in. If there * are many many documents per facet id, the resulting facet documents could be too large to fit in * memory. Set the documentLimit in this case. * * @author Charles Draper */ @SuperBuilder(toBuilder = true) @NoArgsConstructor(access = AccessLevel.PROTECTED) @AllArgsConstructor(access = AccessLevel.PROTECTED) public class FacetView extends View { private String facetName; @Builder.Default private long documentLimit = BoxQuery.DEFAULT_LIMIT; private boolean removeFacets; private long sourceDefaultLimit; /** * Creates a new {@link FacetView} with the given box client and facet to use. * * @param boxClient the client for communicating with the remote box * @param facetName the facet group name to use as the id for documents * @deprecated user builder */ @Deprecated public FacetView(AbstractHttpBoxClient boxClient, String facetName) { super(boxClient); this.facetName = facetName; } /** * See {@link BoxConfigurable#postConstruct(ConstructConfig)} and {@link * BoxConfigurable#postInit(InitConfig)}. * * @param constructConfig the construct config to use * @param initConfig the init config to use * @deprecated user builder */ @Deprecated public FacetView(ConstructConfig constructConfig, InitConfig initConfig) { postConstruct(constructConfig); postInit(initConfig); } @Override public void postConstruct(ConstructConfig config) { Set fields = new LinkedHashSet<>(); for (JsonNode field : config.getParams().path("fields")) { fields.add(field.asText()); } // FacetView requires the metadata facets field, so we add it to the view if needed if (!JsonUtils.matchesProjection(BoxQuery.METADATA_FIELD_FACETS, fields)) { fields.add(BoxQuery.METADATA_FIELD_FACETS); config.getParams().remove("fields"); fields.forEach(f -> config.getParams().withArray("fields").add(f)); removeFacets = true; } super.postConstruct(config); ObjectNode params = config.getParams(); this.facetName = Objects.requireNonNull(params.path("facetName").asText(null)); this.documentLimit = params.path("limit").asLong(BoxQuery.DEFAULT_LIMIT); this.documentLimit = this.documentLimit == -1 ? Long.MAX_VALUE : this.documentLimit; } @Override public void postInit(InitConfig config) { super.postInit(config); this.sourceDefaultLimit = config.getSource().getConfig().getDefaultLimit(); } @Override public QueryResult rawFind(BoxQuery query) { Set facetValues = new LinkedHashSet<>(); long upstreamCursor = query.getCursorOrDefault(); if (query.isHarvestQuery()) { // harvest the underlying source until we find query.getLimit() number of unique facet values BoxQuery facetValueQuery = new BoxQuery(query) .setLimit(getLimit()) .clearFields() .addFields(BoxQuery.METADATA_FIELD_FACETS); long limit = query.getLimitOrDefault(); limit = limit == BoxQuery.UNLIMITED ? sourceDefaultLimit : limit; boolean more = true; long nextCursor = query.getCursorOrDefault(); while (more) { QueryResult response = super.rawFind(facetValueQuery.setCursor(nextCursor)); nextCursor = response.getNextCursor(); more = !response.isEmpty(); for (BoxDocument document : response) { // must get all the facets from a document even if it pushes us over the limit in order // for the cursor to work out in subsequent calls if (facetValues.size() < limit) { for (Facet facet : document.getFacets()) { if (facet.getName().equals(facetName)) { facetValues.add(facet.getValue()); upstreamCursor = document.getCursor().get(); } } } } more = more && facetValues.size() < limit; } } else { query.getIds().forEach(i -> facetValues.add(i)); } QueryResult result = new QueryResult(); result.setNextCursor(upstreamCursor); if (facetValues.isEmpty()) { return result; } // add one if facet values were found to push to next page result.setNextCursor(upstreamCursor + 1); Map> documentMap = new LinkedHashMap<>(); // in order to make the request to the View, `documents.` needs to be removed from fields // starting with `documents.` Set upstreamFields = query .getFields() .stream() .filter(f -> !f.equals("documents")) .filter(f -> f.startsWith("documents.")) .map(f -> f.replaceAll("^documents.", "")) .collect(Collectors.toSet()); BoxQuery facetQuery = new BoxQuery().addFields(upstreamFields).setLimit(documentLimit); // need to make sure we get back the facets metadata field if (!JsonUtils.matchesProjection(BoxQuery.METADATA_FIELD_FACETS, upstreamFields)) { facetQuery.addField(BoxQuery.METADATA_FIELD_FACETS); } // process each facet one at a time for (String facetValue : facetValues) { documentMap.put( facetValue, super.rawFind(new BoxQuery(facetQuery).addFacet(facetName, facetValue)) .stream() // take latest if doc appears multiple times due to being updated while collecting .collect( Collectors.toMap( d -> d.getId(), Function.identity(), (x, y) -> y, LinkedHashMap::new)) .values() .stream() .collect(Collectors.toList())); } Set finalFields = new HashSet<>(upstreamFields.isEmpty() ? getFields() : upstreamFields); if (removeFacets) { finalFields.remove(BoxQuery.METADATA_FIELD_FACETS); } for (String facetValue : facetValues) { BoxDocument facetDocument = new BoxDocument(facetValue); ArrayNode documents = facetDocument.withDocument().withArray("documents"); for (BoxDocument document : documentMap.getOrDefault(facetValue, Collections.emptyList())) { documents.add(document.toJson(finalFields)); } if (documents.size() == 0) { facetDocument.getDocument().removeAll(); facetDocument.setAsDeleted(); } if (!query.getFields().isEmpty()) { ObjectNode json = facetDocument.toJson(query.getFields()); json.with("@box") .put("id", facetDocument.getId()) .put("status", facetDocument.getStatus().toString()); facetDocument = BoxDocument.parse(json); } result.add(facetDocument); } for (BoxDocument doc : result) { doc.setCursor(upstreamCursor); } return result; } // needed for javadoc to succeed (https://stackoverflow.com/a/58809436/1530184) @java.lang.SuppressWarnings("all") @lombok.Generated public abstract static class FacetViewBuilder< C extends FacetView, B extends FacetViewBuilder> extends View.ViewBuilder { @SuppressWarnings("unused") private B removeFacets(boolean removeFacets) { return self(); } @SuppressWarnings("unused") private B sourceDefaultLimit(long sourceDefaultLimit) { return self(); } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy