All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.index.SoftDeletesRetentionMergePolicy Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.lucene.index;


import java.io.IOException;
import java.util.Objects;
import java.util.function.Supplier;

import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.DocValuesFieldExistsQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.IOSupplier;

/**
 * This {@link MergePolicy} allows to carry over soft deleted documents across merges. The policy wraps
 * the merge reader and marks documents as "live" that have a value in the soft delete field and match the
 * provided query. This allows for instance to keep documents alive based on time or any other constraint in the index.
 * The main purpose for this merge policy is to implement retention policies for document modification to vanish in the
 * index. Using this merge policy allows to control when soft deletes are claimed by merges.
 * @lucene.experimental
 */
public final class SoftDeletesRetentionMergePolicy extends OneMergeWrappingMergePolicy {
  private final String field;
  private final Supplier retentionQuerySupplier;
  /**
   * Creates a new {@link SoftDeletesRetentionMergePolicy}
   * @param field the soft deletes field
   * @param retentionQuerySupplier a query supplier for the retention query
   * @param in the wrapped MergePolicy
   */
  public SoftDeletesRetentionMergePolicy(String field, Supplier retentionQuerySupplier, MergePolicy in) {
    super(in, toWrap -> new MergePolicy.OneMerge(toWrap.segments) {
      @Override
      public CodecReader wrapForMerge(CodecReader reader) throws IOException {
        CodecReader wrapped = toWrap.wrapForMerge(reader);
        Bits liveDocs = reader.getLiveDocs();
        if (liveDocs == null) { // no deletes - just keep going
          return wrapped;
        }
        return applyRetentionQuery(field, retentionQuerySupplier.get(), wrapped);
      }
    });
    Objects.requireNonNull(field, "field must not be null");
    Objects.requireNonNull(retentionQuerySupplier, "retentionQuerySupplier must not be null");
    this.field = field;
    this.retentionQuerySupplier = retentionQuerySupplier;
  }

  @Override
  public boolean keepFullyDeletedSegment(IOSupplier readerIOSupplier) throws IOException {
    CodecReader reader = readerIOSupplier.get();
    /* we only need a single hit to keep it no need for soft deletes to be checked*/
    Scorer scorer = getScorer(retentionQuerySupplier.get(), FilterCodecReader.wrapLiveDocs(reader, null, reader.maxDoc()));
    if (scorer != null) {
      DocIdSetIterator iterator = scorer.iterator();
      boolean atLeastOneHit = iterator.nextDoc() != DocIdSetIterator.NO_MORE_DOCS;
      return atLeastOneHit;
    }
    return super.keepFullyDeletedSegment(readerIOSupplier) ;
  }

  // pkg private for testing
  static CodecReader applyRetentionQuery(String softDeleteField, Query retentionQuery, CodecReader reader) throws IOException {
    Bits liveDocs = reader.getLiveDocs();
    if (liveDocs == null) { // no deletes - just keep going
      return reader;
    }
    CodecReader wrappedReader = FilterCodecReader.wrapLiveDocs(reader, new Bits() { // only search deleted
      @Override
      public boolean get(int index) {
        return liveDocs.get(index) == false;
      }

      @Override
      public int length() {
        return liveDocs.length();
      }
    }, reader.maxDoc() - reader.numDocs());
    BooleanQuery.Builder builder = new BooleanQuery.Builder();
    builder.add(new DocValuesFieldExistsQuery(softDeleteField), BooleanClause.Occur.FILTER);
    builder.add(retentionQuery, BooleanClause.Occur.FILTER);
    Scorer scorer = getScorer(builder.build(), wrappedReader);
    if (scorer != null) {
      FixedBitSet cloneLiveDocs = FixedBitSet.copyOf(liveDocs);
      DocIdSetIterator iterator = scorer.iterator();
      int numExtraLiveDocs = 0;
      while (iterator.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
        if (cloneLiveDocs.getAndSet(iterator.docID()) == false) {
          // if we bring one back to live we need to account for it
          numExtraLiveDocs++;
        }
      }
      assert reader.numDocs() + numExtraLiveDocs <= reader.maxDoc() : "numDocs: " + reader.numDocs() + " numExtraLiveDocs: " + numExtraLiveDocs + " maxDoc: " + reader.maxDoc();
      return FilterCodecReader.wrapLiveDocs(reader, cloneLiveDocs, reader.numDocs() + numExtraLiveDocs);
    } else {
      return reader;
    }
  }

  private static Scorer getScorer(Query query, CodecReader reader) throws IOException {
    IndexSearcher s = new IndexSearcher(reader);
    s.setQueryCache(null);
    Weight weight = s.createWeight(s.rewrite(query), ScoreMode.COMPLETE_NO_SCORES, 1.0f);
    return weight.scorer(reader.getContext());
  }

  @Override
  public int numDeletesToMerge(SegmentCommitInfo info, int delCount, IOSupplier readerSupplier) throws IOException {
    final int numDeletesToMerge = super.numDeletesToMerge(info, delCount, readerSupplier);
    if (numDeletesToMerge != 0 && info.getSoftDelCount() > 0) {
      final CodecReader reader = readerSupplier.get();
      if (reader.getLiveDocs() != null) {
        BooleanQuery.Builder builder = new BooleanQuery.Builder();
        builder.add(new DocValuesFieldExistsQuery(field), BooleanClause.Occur.FILTER);
        builder.add(retentionQuerySupplier.get(), BooleanClause.Occur.FILTER);
        Scorer scorer = getScorer(builder.build(), FilterCodecReader.wrapLiveDocs(reader, null, reader.maxDoc()));
        if (scorer != null) {
          DocIdSetIterator iterator = scorer.iterator();
          Bits liveDocs = reader.getLiveDocs();
          int numDeletedDocs = reader.numDeletedDocs();
          while (iterator.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
            if (liveDocs.get(iterator.docID()) == false) {
              numDeletedDocs--;
            }
          }
          return numDeletedDocs;
        }
      }
    }
    assert numDeletesToMerge >= 0 : "numDeletesToMerge: " + numDeletesToMerge;
    assert numDeletesToMerge <= info.info.maxDoc() : "numDeletesToMerge: " + numDeletesToMerge + " maxDoc:" + info.info.maxDoc();
    return numDeletesToMerge;
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy