All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.facet.enhancements.association.AssociationsPayloadIterator Maven / Gradle / Ivy

There is a newer version: 9.11.1
Show newest version
package org.apache.lucene.facet.enhancements.association;

import java.io.IOException;

import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;

import org.apache.lucene.facet.index.params.CategoryListParams;
import org.apache.lucene.facet.search.PayloadIntDecodingIterator;
import org.apache.lucene.util.collections.IntIterator;
import org.apache.lucene.util.collections.IntToIntMap;
import org.apache.lucene.util.encoding.SimpleIntDecoder;

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/**
 * Allows easy iteration over the associations payload, decoding and breaking it
 * to (ordinal, value) pairs, stored in a hash.
 * 
 * @lucene.experimental
 */
public class AssociationsPayloadIterator {

  /**
   * Default Term for associations
   */
  public static final Term ASSOCIATION_POSTING_TERM = new Term(
      CategoryListParams.DEFAULT_TERM.field(),
      AssociationEnhancement.CATEGORY_LIST_TERM_TEXT);

  /**
   * Hash mapping to ordinals to the associated int value
   */
  private IntToIntMap ordinalToAssociationMap;

  /**
   * An inner payload decoder which actually goes through the posting and
   * decode the ints representing the ordinals and the values
   */
  private PayloadIntDecodingIterator associationPayloadIter;

  /**
   * Marking whether there are associations (at all) in the given index
   */
  private boolean hasAssociations = false;

  /**
   * The long-special-value returned for ordinals which have no associated int
   * value. It is not in the int range of values making it a valid mark.
   */
  public final static long NO_ASSOCIATION = Integer.MAX_VALUE + 1;

  /**
   * Construct a new association-iterator, initializing the inner payload
   * iterator, with the supplied term and checking whether there are any
   * associations within the given index
   * 
   * @param reader
   *            a reader containing the postings to be iterated
   * @param field
   *            the field containing the relevant associations list term
   */
  public AssociationsPayloadIterator(IndexReader reader, String field)
      throws IOException {
    // Initialize the payloadDecodingIterator
    associationPayloadIter = new PayloadIntDecodingIterator(
        reader,
        // TODO (Facet): should consolidate with AssociationListTokenizer which
        // uses AssociationEnhancement.getCatTermText()
        new Term(field, AssociationEnhancement.CATEGORY_LIST_TERM_TEXT),
        new SimpleIntDecoder());

    // Check whether there are any associations
    hasAssociations = associationPayloadIter.init();

    ordinalToAssociationMap = new IntToIntMap();
  }

  /**
   * Skipping to the next document, fetching its associations & populating the
   * map.
   * 
   * @param docId
   *            document id to be skipped to
   * @return true if the document contains associations and they were fetched
   *         correctly. false otherwise.
   * @throws IOException
   *             on error
   */
  public boolean setNextDoc(int docId) throws IOException {
    ordinalToAssociationMap.clear();
    boolean docContainsAssociations = false;
    try {
      docContainsAssociations = fetchAssociations(docId);
    } catch (IOException e) {
      IOException ioe = new IOException(
          "An Error occured while reading a document's associations payload (docId="
              + docId + ")");
      ioe.initCause(e);
      throw ioe;
    }

    return docContainsAssociations;
  }

  /**
   * Get int association value for the given ordinal. 
* The return is either an int value casted as long if the ordinal has an * associated value. Otherwise the returned value would be * {@link #NO_ASSOCIATION} which is 'pure long' value (e.g not in the int * range of values) * * @param ordinal * for which the association value is requested * @return the associated int value (encapsulated in a long) if the ordinal * had an associated value, or {@link #NO_ASSOCIATION} otherwise */ public long getAssociation(int ordinal) { if (ordinalToAssociationMap.containsKey(ordinal)) { return ordinalToAssociationMap.get(ordinal); } return NO_ASSOCIATION; } /** * Get an iterator over the ordinals which has an association for the * document set by {@link #setNextDoc(int)}. */ public IntIterator getAssociatedOrdinals() { return ordinalToAssociationMap.keyIterator(); } /** * Skips to the given docId, getting the values in pairs of (ordinal, value) * and populating the map * * @param docId * document id owning the associations * @return true if associations were fetched successfully, false otherwise * @throws IOException * on error */ private boolean fetchAssociations(int docId) throws IOException { // No associations at all? don't bother trying to seek the docID in the // posting if (!hasAssociations) { return false; } // No associations for this document? well, nothing to decode than, // return false if (!associationPayloadIter.skipTo(docId)) { return false; } // loop over all the values decoded from the payload in pairs. for (;;) { // Get the ordinal long ordinal = associationPayloadIter.nextCategory(); // if no ordinal - it's the end of data, break the loop if (ordinal > Integer.MAX_VALUE) { break; } // get the associated value long association = associationPayloadIter.nextCategory(); // If we're at this step - it means we have an ordinal, do we have // an association for it? if (association > Integer.MAX_VALUE) { // No association!!! A Broken Pair!! PANIC! throw new IOException( "ERROR! Associations should come in pairs of (ordinal, value), yet this payload has an odd number of values! (docId=" + docId + ")"); } // Populate the map with the given ordinal and association pair ordinalToAssociationMap.put((int) ordinal, (int) association); } return true; } @Override public int hashCode() { final int prime = 31; int result = 1; result = prime * result + ((associationPayloadIter == null) ? 0 : associationPayloadIter.hashCode()); return result; } @Override public boolean equals(Object obj) { if (this == obj) { return true; } if (obj == null) { return false; } if (getClass() != obj.getClass()) { return false; } AssociationsPayloadIterator other = (AssociationsPayloadIterator) obj; if (associationPayloadIter == null) { if (other.associationPayloadIter != null) { return false; } } else if (!associationPayloadIter.equals(other.associationPayloadIter)) { return false; } return true; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy