org.apache.solr.search.HashQParserPlugin Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of solr-core Show documentation
Apache Solr (module: core)
There is a newer version: 9.7.0
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.solr.search;

import java.io.IOException;
import java.util.Arrays;
import java.util.Objects;
import java.util.function.DoublePredicate;
import java.util.stream.Collectors;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.queries.function.FunctionMatchQuery;
import org.apache.lucene.search.DoubleValues;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.LongValues;
import org.apache.lucene.search.LongValuesSource;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.BytesRef;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.Hash;
import org.apache.solr.request.SolrQueryRequest;

/** syntax fq={!hash workers=11 worker=4 keys=field1,field2} */
public class HashQParserPlugin extends QParserPlugin {

  public static final String NAME = "hash";

  @Override
  public QParser createParser(
      String query, SolrParams localParams, SolrParams params, SolrQueryRequest request) {
    return new HashQParser(query, localParams, params, request);
  }

  private static class HashQParser extends QParser {

    public HashQParser(
        String query, SolrParams localParams, SolrParams params, SolrQueryRequest request) {
      super(query, localParams, params, request);
    }

    @Override
    public Query parse() {
      int workers = localParams.getInt("workers", 0);
      if (workers < 2) {
        throw new SolrException(
            SolrException.ErrorCode.BAD_REQUEST, "workers needs to be more than 1");
      }
      int worker = localParams.getInt("worker", 0);
      String keyParam = params.get("partitionKeys");
      String[] keys = keyParam.replace(" ", "").split(",");
      // validate all fields exist
      Arrays.stream(keys).forEach(field -> req.getSchema().getField(field));

      // TODO wish to provide matchCost on FunctionMatchQuery's TwoPhaseIterator -- LUCENE-9373
      return new FunctionMatchQuery(
          new HashCodeValuesSource(keys).toDoubleValuesSource(),
          new HashPartitionPredicate(workers, worker));
    }
  }

  /**
   * A {@link LongValuesSource} that is a hash of some fields' values. They are fetched via
   * DocValues
   */
  private static class HashCodeValuesSource extends LongValuesSource {

    private final String[] fields;

    private HashCodeValuesSource(String[] fields) {
      this.fields = fields;
    }

    @Override
    public LongValues getValues(LeafReaderContext ctx, DoubleValues scores) throws IOException {
      // produce an array of the LongValues of the hash of each field
      final LongValues[] resultValues = new LongValues[fields.length];
      for (int i = 0; i < fields.length; i++) {
        final String field = fields[i];
        final NumericDocValues numericDocValues = ctx.reader().getNumericDocValues(field);
        if (numericDocValues != null) {
          // Numeric
          resultValues[i] =
              new LongValues() {
                // Even if not a Long field; could be int, double, float and this still works
                // because DocValues numerics are based on a Long.
                final NumericDocValues values = numericDocValues;
                boolean atDoc = false;

                @Override
                public boolean advanceExact(int doc) throws IOException {
                  atDoc = values.advanceExact(doc);
                  return true;
                }

                @Override
                public long longValue() throws IOException {
                  return atDoc ? Long.hashCode(values.longValue()) : 0;
                }
              };
          continue;
        }
        final SortedDocValues sortedDocValues = ctx.reader().getSortedDocValues(field);
        if (sortedDocValues != null) {
          // String
          resultValues[i] =
              new LongValues() {
                final SortedDocValues values = sortedDocValues;
                boolean atDoc = false;

                @Override
                public boolean advanceExact(int doc) throws IOException {
                  atDoc = values.advanceExact(doc);
                  return true;
                }

                @Override
                public long longValue() throws IOException {
                  // TODO: maybe cache hashCode if same ord as prev doc to save lookupOrd?
                  return atDoc ? hashCode(values.lookupOrd(values.ordValue())) : 0;
                }

                private int hashCode(BytesRef bytesRef) {
                  // Use deterministic hashCode.  BytesRef.hashCode() varies!
                  return Hash.murmurhash3_x86_32(
                      bytesRef.bytes, bytesRef.offset, bytesRef.length, 0);
                }
              };
          continue;
        }
        // fail if some other DocValuesType is present
        final FieldInfo fieldInfo = ctx.reader().getFieldInfos().fieldInfo(field);
        if (fieldInfo != null && fieldInfo.getDocValuesType() != DocValuesType.NONE) {
          throw new SolrException(
              SolrException.ErrorCode.BAD_REQUEST, "Can't compute hash on field " + field);
        }
        // fall back on zero for empty case
        resultValues[i] = LongValuesSource.constant(0).getValues(ctx, scores);
      }

      // produce a hash LongValues of all those in this array
      if (resultValues.length == 1) {
        return resultValues[0];
      } else {
        // Combine
        return new LongValues() {
          private long result;

          @Override
          public boolean advanceExact(int doc) throws IOException {
            // compute the hash here.
            // algorithm borrowed from Arrays.hashCode(Object[]) but without needing to call
            // hashCode redundantly
            result = 1;
            for (LongValues longValues : resultValues) {
              boolean present = longValues.advanceExact(doc);
              result = 31 * result + (present == false ? 0 : longValues.longValue());
            }
            return true; // we always have a hash value
          }

          @Override
          public long longValue() throws IOException {
            return result;
          }
        };
      }
    }

    @Override
    public boolean needsScores() {
      return false;
    }

    @Override
    public boolean equals(Object o) {
      if (this == o) return true;
      if (!(o instanceof HashCodeValuesSource)) return false;
      HashCodeValuesSource that = (HashCodeValuesSource) o;
      return Arrays.equals(fields, that.fields);
    }

    @Override
    public int hashCode() {
      return Arrays.hashCode(fields);
    }

    @Override
    public String toString() {
      return Arrays.stream(fields).collect(Collectors.joining(",", "hash(", ")"));
    }

    @Override
    public LongValuesSource rewrite(IndexSearcher searcher) throws IOException {
      return this;
    }

    @Override
    public boolean isCacheable(LeafReaderContext ctx) {
      return DocValues.isCacheable(ctx, fields);
    }
  }

  /**
   * Simple modulus check against the input to see if the result is a particular value (standard
   * hash partition approach). Can't use a lambda because need equals/hashcode
   */
  private static class HashPartitionPredicate implements DoublePredicate {
    final int workers;
    final int worker;

    private HashPartitionPredicate(int workers, int worker) {
      this.workers = workers;
      this.worker = worker;
    }

    @Override
    public boolean test(double hashAsDouble) {
      return Math.abs((long) hashAsDouble) % workers == worker;
    }

    @Override
    public boolean equals(Object o) {
      if (this == o) return true;
      if (!(o instanceof HashPartitionPredicate)) return false;
      HashPartitionPredicate that = (HashPartitionPredicate) o;
      return workers == that.workers && worker == that.worker;
    }

    @Override
    public int hashCode() {
      return Objects.hash(workers, worker);
    }
  }
}