All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.demo.facet.CustomFacetSetExample Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.demo.facet;

import java.io.IOException;
import java.time.LocalDate;
import java.time.ZoneOffset;
import java.util.Arrays;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FloatPoint;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.document.StringField;
import org.apache.lucene.facet.FacetResult;
import org.apache.lucene.facet.Facets;
import org.apache.lucene.facet.FacetsCollector;
import org.apache.lucene.facet.FacetsCollectorManager;
import org.apache.lucene.facet.facetset.DimRange;
import org.apache.lucene.facet.facetset.ExactFacetSetMatcher;
import org.apache.lucene.facet.facetset.FacetSet;
import org.apache.lucene.facet.facetset.FacetSetDecoder;
import org.apache.lucene.facet.facetset.FacetSetMatcher;
import org.apache.lucene.facet.facetset.FacetSetsField;
import org.apache.lucene.facet.facetset.MatchingFacetSetsCounts;
import org.apache.lucene.facet.facetset.RangeFacetSetMatcher;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermInSetQuery;
import org.apache.lucene.store.ByteBuffersDirectory;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.NumericUtils;

/**
 * Shows usage of indexing and searching {@link FacetSetsField} with a custom {@link FacetSet}
 * implementation. Unlike the out of the box {@link FacetSet} implementations, this example shows
 * how to mix and match dimensions of different types, as well as implementing a custom {@link
 * FacetSetMatcher}.
 */
public class CustomFacetSetExample {

  private static final long MAY_SECOND_2022 = date("2022-05-02");
  private static final long JUNE_SECOND_2022 = date("2022-06-02");
  private static final long JULY_SECOND_2022 = date("2022-07-02");
  private static final float HUNDRED_TWENTY_DEGREES = fahrenheitToCelsius(120);
  private static final float HUNDRED_DEGREES = fahrenheitToCelsius(100);
  private static final float EIGHTY_DEGREES = fahrenheitToCelsius(80);

  private final Directory indexDir = new ByteBuffersDirectory();

  /** Empty constructor */
  public CustomFacetSetExample() {}

  /** Build the example index. */
  private void index() throws IOException {
    IndexWriter indexWriter =
        new IndexWriter(
            indexDir, new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE));

    // Every document holds the temperature measures for a City by Date

    Document doc = new Document();
    doc.add(new StringField("city", "city1", Field.Store.YES));
    doc.add(
        FacetSetsField.create(
            "temperature",
            new TemperatureReadingFacetSet(MAY_SECOND_2022, HUNDRED_DEGREES),
            new TemperatureReadingFacetSet(JUNE_SECOND_2022, EIGHTY_DEGREES),
            new TemperatureReadingFacetSet(JULY_SECOND_2022, HUNDRED_TWENTY_DEGREES)));
    addFastMatchFields(doc);
    indexWriter.addDocument(doc);

    doc = new Document();
    doc.add(new StringField("city", "city2", Field.Store.YES));
    doc.add(
        FacetSetsField.create(
            "temperature",
            new TemperatureReadingFacetSet(MAY_SECOND_2022, EIGHTY_DEGREES),
            new TemperatureReadingFacetSet(JUNE_SECOND_2022, HUNDRED_DEGREES),
            new TemperatureReadingFacetSet(JULY_SECOND_2022, HUNDRED_TWENTY_DEGREES)));
    addFastMatchFields(doc);
    indexWriter.addDocument(doc);

    indexWriter.close();
  }

  private void addFastMatchFields(Document doc) {
    // day field
    doc.add(new StringField("day", String.valueOf(MAY_SECOND_2022), Field.Store.NO));
    doc.add(new StringField("day", String.valueOf(JUNE_SECOND_2022), Field.Store.NO));
    doc.add(new StringField("day", String.valueOf(JULY_SECOND_2022), Field.Store.NO));

    // temp field
    doc.add(new StringField("temp", String.valueOf(EIGHTY_DEGREES), Field.Store.NO));
    doc.add(new StringField("temp", String.valueOf(HUNDRED_DEGREES), Field.Store.NO));
    doc.add(new StringField("temp", String.valueOf(HUNDRED_TWENTY_DEGREES), Field.Store.NO));
  }

  /** Counting documents which exactly match a given {@link FacetSet}. */
  private FacetResult exactMatching() throws IOException {
    try (DirectoryReader indexReader = DirectoryReader.open(indexDir)) {
      IndexSearcher searcher = new IndexSearcher(indexReader);

      // MatchAllDocsQuery is for "browsing" (counts facets
      // for all non-deleted docs in the index); normally
      // you'd use a "normal" query:
      FacetsCollector fc = searcher.search(new MatchAllDocsQuery(), new FacetsCollectorManager());

      // Count both "May 2022, 100 degrees" and "July 2022, 120 degrees" dimensions
      Facets facets =
          new MatchingFacetSetsCounts(
              "temperature",
              fc,
              TemperatureReadingFacetSet::decodeTemperatureReading,
              new ExactFacetSetMatcher(
                  "May 2022 (100f)",
                  new TemperatureReadingFacetSet(MAY_SECOND_2022, HUNDRED_DEGREES)),
              new ExactFacetSetMatcher(
                  "July 2022 (120f)",
                  new TemperatureReadingFacetSet(JULY_SECOND_2022, HUNDRED_TWENTY_DEGREES)));

      // Retrieve results
      return facets.getAllChildren("temperature");
    }
  }

  /**
   * Counting documents which exactly match a given {@link FacetSet}. This example also demonstrates
   * how to use a fast match query to improve the counting efficiency by skipping over documents
   * which cannot possibly match a set.
   */
  private FacetResult exactMatchingWithFastMatchQuery() throws IOException {
    try (DirectoryReader indexReader = DirectoryReader.open(indexDir)) {
      IndexSearcher searcher = new IndexSearcher(indexReader);

      // MatchAllDocsQuery is for "browsing" (counts facets
      // for all non-deleted docs in the index); normally
      // you'd use a "normal" query:
      FacetsCollector fc = searcher.search(new MatchAllDocsQuery(), new FacetsCollectorManager());

      // Match documents whose "day" field is either "May 2022" or "July 2022"
      Query dateQuery =
          new TermInSetQuery(
              "day",
              Arrays.asList(
                  new BytesRef(String.valueOf(MAY_SECOND_2022)),
                  new BytesRef(String.valueOf(JULY_SECOND_2022))));
      // Match documents whose "temp" field is either "80" or "120" degrees
      Query temperatureQuery =
          new TermInSetQuery(
              "temp",
              Arrays.asList(
                  new BytesRef(String.valueOf(HUNDRED_DEGREES)),
                  new BytesRef(String.valueOf(HUNDRED_TWENTY_DEGREES))));
      // Documents must match both clauses
      Query fastMatchQuery =
          new BooleanQuery.Builder()
              .add(dateQuery, BooleanClause.Occur.MUST)
              .add(temperatureQuery, BooleanClause.Occur.MUST)
              .build();

      // Count both "May 2022, 100 degrees" and "July 2022, 120 degrees" dimensions
      Facets facets =
          new MatchingFacetSetsCounts(
              "temperature",
              fc,
              TemperatureReadingFacetSet::decodeTemperatureReading,
              fastMatchQuery,
              new ExactFacetSetMatcher(
                  "May 2022 (100f)",
                  new TemperatureReadingFacetSet(MAY_SECOND_2022, HUNDRED_DEGREES)),
              new ExactFacetSetMatcher(
                  "July 2022 (120f)",
                  new TemperatureReadingFacetSet(JULY_SECOND_2022, HUNDRED_TWENTY_DEGREES)));

      // Retrieve results
      return facets.getAllChildren("temperature");
    }
  }

  /** Counting documents which match a certain degrees value for any date. */
  private FacetResult rangeMatching() throws IOException {
    try (DirectoryReader indexReader = DirectoryReader.open(indexDir)) {
      IndexSearcher searcher = new IndexSearcher(indexReader);

      // MatchAllDocsQuery is for "browsing" (counts facets
      // for all non-deleted docs in the index); normally
      // you'd use a "normal" query:
      FacetsCollector fc = searcher.search(new MatchAllDocsQuery(), new FacetsCollectorManager());

      // Count 80-100 degrees
      Facets facets =
          new MatchingFacetSetsCounts(
              "temperature",
              fc,
              TemperatureReadingFacetSet::decodeTemperatureReading,
              new RangeFacetSetMatcher(
                  "Eighty to Hundred Degrees",
                  DimRange.fromLongs(Long.MIN_VALUE, true, Long.MAX_VALUE, true),
                  DimRange.fromFloats(EIGHTY_DEGREES, true, HUNDRED_DEGREES, true)));

      // Retrieve results
      return facets.getAllChildren("temperature");
    }
  }

  /**
   * Like {@link #rangeMatching()}, however this example demonstrates a custom {@link
   * FacetSetMatcher} which only considers certain dimensions (in this case only the temperature
   * one).
   */
  private FacetResult customRangeMatching() throws IOException {
    try (DirectoryReader indexReader = DirectoryReader.open(indexDir)) {
      IndexSearcher searcher = new IndexSearcher(indexReader);

      // MatchAllDocsQuery is for "browsing" (counts facets
      // for all non-deleted docs in the index); normally
      // you'd use a "normal" query:
      FacetsCollector fc = searcher.search(new MatchAllDocsQuery(), new FacetsCollectorManager());

      // Count 80-100 degrees
      Facets facets =
          new MatchingFacetSetsCounts(
              "temperature",
              fc,
              TemperatureReadingFacetSet::decodeTemperatureReading,
              new TemperatureOnlyFacetSetMatcher(
                  "Eighty to Hundred Degrees",
                  DimRange.fromFloats(EIGHTY_DEGREES, true, HUNDRED_DEGREES, true)));

      // Retrieve results
      return facets.getAllChildren("temperature");
    }
  }

  private static long date(String dateString) {
    return LocalDate.parse(dateString).atStartOfDay().toInstant(ZoneOffset.UTC).toEpochMilli();
  }

  private static float fahrenheitToCelsius(int degrees) {
    return (degrees - 32.0f) * 5.f / 9.f;
  }

  /** Runs the exact matching example. */
  public FacetResult runExactMatching() throws IOException {
    index();
    return exactMatching();
  }

  /** Runs the exact matching with fast match query example. */
  public FacetResult runExactMatchingWithFastMatchQuery() throws IOException {
    index();
    return exactMatchingWithFastMatchQuery();
  }

  /** Runs the range matching example. */
  public FacetResult runRangeMatching() throws IOException {
    index();
    return rangeMatching();
  }

  /** Runs the custom range matching example. */
  public FacetResult runCustomRangeMatching() throws IOException {
    index();
    return customRangeMatching();
  }

  /** Runs the search and drill-down examples and prints the results. */
  public static void main(String[] args) throws Exception {
    CustomFacetSetExample example = new CustomFacetSetExample();

    System.out.println("Exact Facet Set matching example:");
    System.out.println("-----------------------");
    FacetResult result = example.runExactMatching();
    System.out.println("Temperature Reading: " + result);

    System.out.println("Exact Facet Set matching with fast match query example:");
    System.out.println("-----------------------");
    result = example.runExactMatchingWithFastMatchQuery();
    System.out.println("Temperature Reading: " + result);

    System.out.println("Range Facet Set matching example:");
    System.out.println("-----------------------");
    result = example.runRangeMatching();
    System.out.println("Temperature Reading: " + result);

    System.out.println("Custom Range Facet Set matching example:");
    System.out.println("-----------------------");
    result = example.runCustomRangeMatching();
    System.out.println("Temperature Reading: " + result);
  }

  /**
   * A {@link FacetSet} which encodes a temperature reading in a date (long) and degrees (celsius;
   * float).
   */
  public static class TemperatureReadingFacetSet extends FacetSet {

    private static final int SIZE_PACKED_BYTES = Long.BYTES + Float.BYTES;

    private final long date;
    private final float degrees;

    /** Constructor */
    public TemperatureReadingFacetSet(long date, float degrees) {
      super(2); // We encode two dimensions

      this.date = date;
      this.degrees = degrees;
    }

    @Override
    public long[] getComparableValues() {
      return new long[] {date, NumericUtils.floatToSortableInt(degrees)};
    }

    @Override
    public int packValues(byte[] buf, int start) {
      LongPoint.encodeDimension(date, buf, start);
      // Encode 'degrees' as a sortable integer.
      FloatPoint.encodeDimension(degrees, buf, start + Long.BYTES);
      return sizePackedBytes();
    }

    @Override
    public int sizePackedBytes() {
      return SIZE_PACKED_BYTES;
    }

    /**
     * An implementation of {@link FacetSetDecoder#decode(BytesRef, int, long[])} for {@link
     * TemperatureReadingFacetSet}.
     */
    public static int decodeTemperatureReading(BytesRef bytesRef, int start, long[] dest) {
      dest[0] = LongPoint.decodeDimension(bytesRef.bytes, start);
      // Decode the degrees as a sortable integer.
      dest[1] = IntPoint.decodeDimension(bytesRef.bytes, start + Long.BYTES);
      return SIZE_PACKED_BYTES;
    }
  }

  /**
   * A {@link FacetSetMatcher} which matches facet sets only by their temperature dimension,
   * ignoring the date.
   */
  public static class TemperatureOnlyFacetSetMatcher extends FacetSetMatcher {

    private final DimRange temperatureRange;

    /** Constructor */
    protected TemperatureOnlyFacetSetMatcher(String label, DimRange temperatureRange) {
      super(label, 1); // We only evaluate one dimension

      this.temperatureRange = temperatureRange;
    }

    @Override
    public boolean matches(long[] dimValues) {
      return temperatureRange.min() <= dimValues[1] && temperatureRange.max() >= dimValues[1];
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy