All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.facet.index.streaming.CountingListTokenizer Maven / Gradle / Ivy

There is a newer version: 9.11.1
Show newest version
package org.apache.lucene.facet.index.streaming;

import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map.Entry;

import org.apache.lucene.analysis.TokenStream;

import org.apache.lucene.facet.index.CategoryListPayloadStream;
import org.apache.lucene.facet.index.attributes.OrdinalProperty;
import org.apache.lucene.facet.index.params.CategoryListParams;
import org.apache.lucene.facet.index.params.FacetIndexingParams;
import org.apache.lucene.facet.taxonomy.CategoryPath;
import org.apache.lucene.facet.util.PartitionsUtils;
import org.apache.lucene.util.encoding.IntEncoder;

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/**
 * {@link CategoryListTokenizer} for facet counting
 * 
 * @lucene.experimental
 */
public class CountingListTokenizer extends CategoryListTokenizer {

  /** A table for retrieving payload streams by category-list name. */
  protected HashMap payloadStreamsByName = 
    new HashMap();

  /** An iterator over the payload streams */
  protected Iterator> payloadStreamIterator;

  public CountingListTokenizer(TokenStream input,
      FacetIndexingParams indexingParams) {
    super(input, indexingParams);
    this.payloadStreamsByName = new HashMap();
  }

  @Override
  protected void handleStartOfInput() throws IOException {
    payloadStreamsByName.clear();
    payloadStreamIterator = null;
  }

  @Override
  public final boolean incrementToken() throws IOException {
    if (input.incrementToken()) {
      if (this.categoryAttribute != null) {
        OrdinalProperty ordinalProperty = (OrdinalProperty) this.categoryAttribute
            .getProperty(OrdinalProperty.class);
        if (ordinalProperty != null && legalCategory()) {
          CategoryPath categoryPath = this.categoryAttribute
              .getCategoryPath();
          int ordinal = ordinalProperty.getOrdinal();
          CategoryListPayloadStream payloadStream = getPayloadStream(
              categoryPath, ordinal);
          int partitionSize = indexingParams.getPartitionSize();
          payloadStream.appendIntToStream(ordinal % partitionSize);
        }
      }
      return true;
    }
    if (this.payloadStreamIterator == null) {
      this.handleEndOfInput();
      this.payloadStreamIterator = this.payloadStreamsByName.entrySet()
          .iterator();
    }
    if (this.payloadStreamIterator.hasNext()) {
      Entry entry = this.payloadStreamIterator
          .next();
      String countingListName = entry.getKey();
      int length = countingListName.length();
      this.termAttribute.resizeBuffer(length);
      countingListName.getChars(0, length, termAttribute.buffer(), 0);
      this.termAttribute.setLength(length);
      CategoryListPayloadStream payloadStream = entry.getValue();
      payload.setData(payloadStream.convertStreamToByteArray());
      this.payloadAttribute.setPayload(payload);
      return true;
    }
    return false;
  }

  /**
   * A method which allows extending classes to filter the categories going
   * into the counting list.
   * 
   * @return By default returns {@code true}, meaning the current category is
   *         to be part of the counting list. For categories that should be
   *         filtered, return {@code false}.
   */
  protected boolean legalCategory() {
    return true;
  }

  protected CategoryListPayloadStream getPayloadStream(
      CategoryPath categoryPath, int ordinal) throws IOException {
    CategoryListParams clParams = this.indexingParams.getCategoryListParams(categoryPath);
    String name = PartitionsUtils.partitionNameByOrdinal(indexingParams, clParams, ordinal); 
    CategoryListPayloadStream fps = payloadStreamsByName.get(name);
    if (fps == null) {
      IntEncoder encoder = clParams.createEncoder();
      fps = new CategoryListPayloadStream(encoder);
      payloadStreamsByName.put(name, fps);
    }
    return fps;
  }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy