All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.facet.taxonomy.writercache.UTF8TaxonomyWriterCache Maven / Gradle / Ivy

There is a newer version: 10.0.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.facet.taxonomy.writercache;

import org.apache.lucene.facet.taxonomy.FacetLabel;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.ByteBlockPool;
import org.apache.lucene.util.ByteBlockPool.DirectTrackingAllocator;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.BytesRefHash;
import org.apache.lucene.util.Counter;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.UnicodeUtil;

/**
 * A "cache" that never frees memory, and stores labels in a BytesRefHash (utf-8 encoding).
 *
 * @deprecated Use {@link LruTaxonomyWriterCache} instead.
 */
@Deprecated
public final class UTF8TaxonomyWriterCache implements TaxonomyWriterCache, Accountable {
  private final ThreadLocal bytes =
      new ThreadLocal() {
        @Override
        protected BytesRefBuilder initialValue() {
          return new BytesRefBuilder();
        }
      };

  private final Counter bytesUsed = Counter.newCounter();
  private final BytesRefHash map =
      new BytesRefHash(new ByteBlockPool(new DirectTrackingAllocator(bytesUsed)));

  private static final int PAGE_BITS = 16;
  private static final int PAGE_SIZE = 1 << PAGE_BITS;
  private static final int PAGE_MASK = PAGE_SIZE - 1;

  private volatile int[][] ordinals;

  // How many labels we are storing:
  private int count;

  // How many pages in ordinals we've allocated:
  private int pageCount;

  /** Sole constructor. */
  public UTF8TaxonomyWriterCache() {
    ordinals = new int[1][];
    ordinals[0] = new int[PAGE_SIZE];
  }

  @Override
  public int get(FacetLabel label) {
    BytesRef bytes = toBytes(label);
    int id;
    synchronized (this) {
      id = map.find(bytes);
    }
    if (id == -1) {
      return LabelToOrdinal.INVALID_ORDINAL;
    }
    int page = id >>> PAGE_BITS;
    int offset = id & PAGE_MASK;
    return ordinals[page][offset];
  }

  // Called only from assert
  private boolean assertSameOrdinal(FacetLabel label, int id, int ord) {
    id = -id - 1;
    int page = id >>> PAGE_BITS;
    int offset = id & PAGE_MASK;
    int oldOrd = ordinals[page][offset];
    if (oldOrd != ord) {
      throw new IllegalArgumentException(
          "label "
              + label
              + " was already cached, with old ord="
              + oldOrd
              + " versus new ord="
              + ord);
    }
    return true;
  }

  @Override
  public boolean put(FacetLabel label, int ord) {
    BytesRef bytes = toBytes(label);
    int id;
    synchronized (this) {
      id = map.add(bytes);
      if (id < 0) {
        assert assertSameOrdinal(label, id, ord);
        return false;
      }
      assert id == count;
      int page = id >>> PAGE_BITS;
      int offset = id & PAGE_MASK;
      if (page == pageCount) {
        if (page == ordinals.length) {
          int[][] newOrdinals =
              new int[ArrayUtil.oversize(page + 1, RamUsageEstimator.NUM_BYTES_OBJECT_REF)][];
          System.arraycopy(ordinals, 0, newOrdinals, 0, ordinals.length);
          ordinals = newOrdinals;
        }
        ordinals[page] = new int[PAGE_SIZE];
        pageCount++;
      }
      ordinals[page][offset] = ord;
      count++;

      // we never prune from the cache
      return false;
    }
  }

  @Override
  public boolean isFull() {
    // we are never full
    return false;
  }

  @Override
  public synchronized void clear() {
    map.clear();
    map.reinit();
    ordinals = new int[1][];
    ordinals[0] = new int[PAGE_SIZE];
    count = 0;
    pageCount = 0;
    assert bytesUsed.get() == 0;
  }

  /** How many labels are currently stored in the cache. */
  @Override
  public int size() {
    return count;
  }

  @Override
  public synchronized long ramBytesUsed() {
    return bytesUsed.get() + pageCount * (long) PAGE_SIZE * Integer.BYTES;
  }

  @Override
  public void close() {}

  private static final byte DELIM_CHAR = (byte) 0x1F;

  private BytesRef toBytes(FacetLabel label) {
    BytesRefBuilder bytes = this.bytes.get();
    bytes.clear();
    for (int i = 0; i < label.length; i++) {
      String part = label.components[i];
      if (i > 0) {
        bytes.append(DELIM_CHAR);
      }
      bytes.grow(bytes.length() + UnicodeUtil.maxUTF8Length(part.length()));
      bytes.setLength(
          UnicodeUtil.UTF16toUTF8(part, 0, part.length(), bytes.bytes(), bytes.length()));
    }
    return bytes.get();
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy