org.apache.solr.response.transform.ChildDocTransformer Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of solr-core Show documentation
Apache Solr (module: core)
There is a newer version: 9.7.0
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.solr.response.transform;

import static org.apache.solr.response.transform.ChildDocTransformerFactory.NUM_SEP_CHAR;
import static org.apache.solr.response.transform.ChildDocTransformerFactory.PATH_SEP_CHAR;
import static org.apache.solr.schema.IndexSchema.NEST_PATH_FIELD_NAME;

import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.join.BitSetProducer;
import org.apache.lucene.util.BitSet;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrException;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.search.BitsFilteredPostingsEnum;
import org.apache.solr.search.DocSet;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.search.SolrReturnFields;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

class ChildDocTransformer extends DocTransformer {
  private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());

  private static final String ANON_CHILD_KEY = "_childDocuments_";

  private final String name;
  private final BitSetProducer parentsFilter; // if null; resolve parent via uniqueKey instead
  private final DocSet childDocSet;
  private final int limit;
  private final boolean isNestedSchema;
  private final SolrReturnFields childReturnFields;
  private final String[] extraRequestedFields;

  ChildDocTransformer(
      String name,
      BitSetProducer parentsFilter,
      DocSet childDocSet,
      SolrReturnFields returnFields,
      boolean isNestedSchema,
      int limit,
      String uniqueKeyField) {
    this.name = name;
    this.parentsFilter = parentsFilter;
    this.childDocSet = childDocSet;
    this.limit = limit;
    this.isNestedSchema = isNestedSchema;
    this.childReturnFields = returnFields != null ? returnFields : new SolrReturnFields();
    this.extraRequestedFields = parentsFilter == null ? new String[] {uniqueKeyField} : null;
  }

  @Override
  public String getName() {
    return name;
  }

  @Override
  public boolean needsSolrIndexSearcher() {
    return true;
  }

  @Override
  public String[] getExtraRequestFields() {
    return extraRequestedFields;
  }

  private int getPrevRootGivenFilter(LeafReaderContext leafReaderContext, int segRootId)
      throws IOException {
    final BitSet segParentsBitSet = parentsFilter.getBitSet(leafReaderContext);
    if (segParentsBitSet != null) {
      return segRootId == 0 ? -1 : segParentsBitSet.prevSetBit(segRootId - 1);
    }
    throw new SolrException(
        SolrException.ErrorCode.BAD_REQUEST,
        "Parent filter '" + parentsFilter + "' doesn't match any parent documents");
  }

  private int getPrevRootGivenId(
      LeafReaderContext leafReaderContext, int segRootId, BytesRef idBytes) throws IOException {
    final LeafReader reader = leafReaderContext.reader();
    final Terms terms = reader.terms(IndexSchema.ROOT_FIELD_NAME); // never returns null here
    final TermsEnum iterator = terms.iterator();
    if (iterator.seekExact(idBytes)) {
      PostingsEnum docs = iterator.postings(null, PostingsEnum.NONE);
      docs = BitsFilteredPostingsEnum.wrap(docs, reader.getLiveDocs());
      int id = docs.nextDoc();
      if (id != DocIdSetIterator.NO_MORE_DOCS) {
        assert id <= segRootId : "integrity violation";
        return id - 1;
      }
    }
    return segRootId - 1; // thus no child docs
  }

  @Override
  public void transform(SolrDocument rootDoc, int rootDocId) {
    // note: this algorithm works if both if we have have _nest_path_  and also if we don't!

    try {

      // lookup what the *previous* rootDocId is, and figure which segment this is
      final SolrIndexSearcher searcher = context.getSearcher();
      final List leaves = searcher.getIndexReader().leaves();
      final int seg = ReaderUtil.subIndex(rootDocId, leaves);
      final LeafReaderContext leafReaderContext = leaves.get(seg);
      final Bits liveDocs = leafReaderContext.reader().getLiveDocs();
      final int segBaseId = leafReaderContext.docBase;
      final int segRootId = rootDocId - segBaseId;

      // can return be -1 and that's okay  (happens for very first block)
      final int segPrevRootId;
      if (parentsFilter != null) {
        segPrevRootId = getPrevRootGivenFilter(leafReaderContext, segRootId);
      } else {
        final IndexSchema schema = searcher.getSchema();
        final String idStr = schema.printableUniqueKey(rootDoc);
        if (idStr == null) {
          throw new SolrException(
              SolrException.ErrorCode.BAD_REQUEST, "[child] requires fl to include the ID");
        }
        final BytesRef idBytes = schema.indexableUniqueKey(idStr);
        segPrevRootId = getPrevRootGivenId(leafReaderContext, rootDocId, idBytes);
      }

      if (segPrevRootId == (segRootId - 1)) {
        // doc has no children, return fast
        return;
      }

      // we'll need this soon...
      final SortedDocValues segPathDocValues =
          DocValues.getSorted(leafReaderContext.reader(), NEST_PATH_FIELD_NAME);
      // passing a different SortedDocValues obj since the child documents which come after are of
      // smaller docIDs, and the iterator can not be reversed. The root doc is the input document to
      // be transformed, and is not necessarily the root doc of the block of docs.
      final String rootDocPath =
          getPathByDocId(
              segRootId, DocValues.getSorted(leafReaderContext.reader(), NEST_PATH_FIELD_NAME));

      // the key in the Map is the document's ancestors key (one above the parent), while the key in
      // the intermediate Map is the direct child document's key(of the parent document)
      final Map>> pendingParentPathsToChildren =
          new HashMap<>();

      final int firstChildId = segBaseId + segPrevRootId + 1;
      int matches = 0;
      // Loop each child ID up to the parent (exclusive).
      for (int docId = firstChildId; docId < rootDocId; ++docId) {
        final int segDocId = docId - segBaseId;

        // check whether doc is "live"
        if (liveDocs != null && !liveDocs.get(segDocId)) {
          // doc is not "live"; return fast
          continue;
        }

        // get the path.  (note will default to ANON_CHILD_KEY if schema is not nested or empty
        // string if blank)
        final String fullDocPath = getPathByDocId(segDocId, segPathDocValues);

        if (isNestedSchema && !fullDocPath.startsWith(rootDocPath)) {
          // is not a descendant of the transformed doc; return fast.
          continue;
        }

        // Is this doc a direct ancestor of another doc we've seen?
        boolean isAncestor = pendingParentPathsToChildren.containsKey(fullDocPath);

        // Do we need to do anything with this doc (either ancestor or matched the child query)
        if (isAncestor || childDocSet == null || childDocSet.exists(docId)) {

          // If we reached the limit, only add if it's an ancestor
          if (limit != -1 && matches >= limit && !isAncestor) {
            continue;
          }
          ++matches; // note: includes ancestors that are not necessarily in childDocSet

          // load the doc
          SolrDocument doc = searcher.getDocFetcher().solrDoc(docId, childReturnFields);
          if (childReturnFields.getTransformer() != null) {
            if (childReturnFields.getTransformer().context == null) {
              childReturnFields.getTransformer().setContext(context);
            }
            childReturnFields.getTransformer().transform(doc, docId);
          }

          if (isAncestor) {
            // if this path has pending child docs, add them.
            addChildrenToParent(
                doc, pendingParentPathsToChildren.remove(fullDocPath)); // no longer pending
          }

          // get parent path
          String parentDocPath = getParentPath(fullDocPath);
          String lastPath = getLastPath(fullDocPath);
          // put into pending:
          // trim path if the doc was inside array, see trimPathIfArrayDoc()
          // e.g. toppings#1/ingredients#1 -> outer map key toppings#1
          // -> inner Map key ingredients
          // or lonely#/lonelyGrandChild# -> outer map key lonely#
          // -> inner Map key lonelyGrandChild#
          pendingParentPathsToChildren
              .computeIfAbsent(parentDocPath, x -> new HashMap<>())
              .computeIfAbsent(trimLastPoundIfArray(lastPath), k -> new ArrayList<>())
              .add(doc);
        }
      }

      if (pendingParentPathsToChildren.isEmpty()) {
        // no child docs matched the child filter; return fast.
        return;
      }

      // only children of parent remain
      assert pendingParentPathsToChildren.keySet().size() == 1;

      // size == 1, so get the last remaining entry
      addChildrenToParent(rootDoc, pendingParentPathsToChildren.values().iterator().next());

    } catch (IOException e) {
      // TODO DWS: reconsider this unusual error handling approach; shouldn't we rethrow?
      log.warn("Could not fetch child documents", e);
      rootDoc.put(getName(), "Could not fetch child documents");
    }
  }

  private static void addChildrenToParent(
      SolrDocument parent, Map> children) {
    for (Map.Entry> entry : children.entrySet()) {
      addChildrenToParent(parent, entry.getValue(), entry.getKey());
    }
  }

  private static void addChildrenToParent(
      SolrDocument parent, List children, String cDocsPath) {
    // if no paths; we do not need to add the child document's relation to its parent document.
    if (cDocsPath.equals(ANON_CHILD_KEY)) {
      parent.addChildDocuments(children);
      return;
    }
    // lookup leaf key for these children using path
    // depending on the label, add to the parent at the right key/label
    String trimmedPath = trimLastPound(cDocsPath);
    // if the child doc's path does not end with #, it is an array(same string is returned by
    // ChildDocTransformer#trimLastPound)
    if (!parent.containsKey(trimmedPath) && (trimmedPath.equals(cDocsPath))) {
      List list = new ArrayList<>(children);
      parent.setField(trimmedPath, list);
      return;
    }
    // is single value
    parent.setField(trimmedPath, children.get(0));
  }

  private static String getLastPath(String path) {
    int lastIndexOfPathSepChar = path.lastIndexOf(PATH_SEP_CHAR);
    if (lastIndexOfPathSepChar == -1) {
      return path;
    }
    return path.substring(lastIndexOfPathSepChar + 1);
  }

  private static String trimLastPoundIfArray(String path) {
    // remove index after last pound sign and if there is an array index e.g. toppings#1 -> toppings
    // or return original string if child doc is not in an array ingredients# -> ingredients#
    final int indexOfSepChar = path.lastIndexOf(NUM_SEP_CHAR);
    if (indexOfSepChar == -1) {
      return path;
    }
    int lastIndex = path.length() - 1;
    boolean singleDocVal = indexOfSepChar == lastIndex;
    return singleDocVal ? path : path.substring(0, indexOfSepChar);
  }

  private static String trimLastPound(String path) {
    // remove index after last pound sign and index from e.g. toppings#1 -> toppings
    int lastIndex = path.lastIndexOf('#');
    return lastIndex == -1 ? path : path.substring(0, lastIndex);
  }

  /** Returns the *parent* path for this document. Children of the root will yield null. */
  private static String getParentPath(String currDocPath) {
    // chop off leaf (after last '/')
    // if child of leaf then return null (special value)
    int lastPathIndex = currDocPath.lastIndexOf(PATH_SEP_CHAR);
    return lastPathIndex == -1 ? null : currDocPath.substring(0, lastPathIndex);
  }

  /** Looks up the nest path. If there is none, returns {@link #ANON_CHILD_KEY}. */
  private String getPathByDocId(int segDocId, SortedDocValues segPathDocValues) throws IOException {
    if (!isNestedSchema) {
      return ANON_CHILD_KEY;
    }
    int numToAdvance =
        segPathDocValues.docID() == -1 ? segDocId : segDocId - (segPathDocValues.docID());
    assert numToAdvance >= 0;
    boolean advanced = segPathDocValues.advanceExact(segDocId);
    return advanced ? segPathDocValues.lookupOrd(segPathDocValues.ordValue()).utf8ToString() : "";
  }
}