org.apache.lucene.facet.taxonomy.TaxonomyReader Maven / Gradle / Ivy
Show all versions of lucene-facet Show documentation
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.facet.taxonomy;
import java.io.Closeable;
import java.io.IOException;
import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.store.AlreadyClosedException;
/**
* TaxonomyReader is the read-only interface with which the faceted-search library uses the taxonomy
* during search time.
*
* A TaxonomyReader holds a list of categories. Each category has a serial number which we call
* an "ordinal", and a hierarchical "path" name:
*
*
* - The ordinal is an integer that starts at 0 for the first category (which is always the root
* category), and grows contiguously as more categories are added; Note that once a category
* is added, it can never be deleted.
*
- The path is a CategoryPath object specifying the category's position in the hierarchy.
*
*
* Notes about concurrent access to the taxonomy:
*
* An implementation must allow multiple readers to be active concurrently with a single writer.
* Readers follow so-called "point in time" semantics, i.e., a TaxonomyReader object will only see
* taxonomy entries which were available at the time it was created. What the writer writes is only
* available to (new) readers after the writer's commit() is called.
*
*
In faceted search, two separate indices are used: the main Lucene index, and the taxonomy.
* Because the main index refers to the categories listed in the taxonomy, it is important to open
* the taxonomy *after* opening the main index, and it is also necessary to reopen() the taxonomy
* after reopen()ing the main index.
*
*
This order is important, otherwise it would be possible for the main index to refer to a
* category which is not yet visible in the old snapshot of the taxonomy. Note that it is indeed
* fine for the taxonomy to be opened after the main index - even a long time after. The reason is
* that once a category is added to the taxonomy, it can never be changed or deleted, so there is no
* danger that a "too new" taxonomy not being consistent with an older index.
*
* @lucene.experimental
*/
public abstract class TaxonomyReader implements Closeable {
/** An iterator over a category's children. */
public static class ChildrenIterator {
private final int[] siblings;
private int child;
ChildrenIterator(int child, int[] siblings) {
this.siblings = siblings;
this.child = child;
}
/**
* Return the next child ordinal, or {@link TaxonomyReader#INVALID_ORDINAL} if no more children.
*/
public int next() {
int res = child;
if (child != TaxonomyReader.INVALID_ORDINAL) {
child = siblings[child];
}
return res;
}
}
/** Sole constructor. */
public TaxonomyReader() {}
/**
* The root category (the category with the empty path) always has the ordinal 0, to which we give
* a name ROOT_ORDINAL. {@link #getOrdinal(FacetLabel)} of an empty path will always return {@code
* ROOT_ORDINAL}, and {@link #getPath(int)} with {@code ROOT_ORDINAL} will return the empty path.
*/
public static final int ROOT_ORDINAL = 0;
/**
* Ordinals are always non-negative, so a negative ordinal can be used to signify an error.
* Methods here return INVALID_ORDINAL (-1) in this case.
*/
public static final int INVALID_ORDINAL = -1;
/**
* If the taxonomy has changed since the provided reader was opened, open and return a new {@link
* TaxonomyReader}; else, return {@code null}. The new reader, if not {@code null}, will be the
* same type of reader as the one given to this method.
*
*
This method is typically far less costly than opening a fully new {@link TaxonomyReader} as
* it shares resources with the provided {@link TaxonomyReader}, when possible.
*/
public static T openIfChanged(T oldTaxoReader) throws IOException {
@SuppressWarnings("unchecked")
final T newTaxoReader = (T) oldTaxoReader.doOpenIfChanged();
assert newTaxoReader != oldTaxoReader;
return newTaxoReader;
}
private volatile boolean closed = false;
// set refCount to 1 at start
private final AtomicInteger refCount = new AtomicInteger(1);
/** performs the actual task of closing the resources that are used by the taxonomy reader. */
protected abstract void doClose() throws IOException;
/**
* Implements the actual opening of a new {@link TaxonomyReader} instance if the taxonomy has
* changed.
*
* @see #openIfChanged(TaxonomyReader)
*/
protected abstract TaxonomyReader doOpenIfChanged() throws IOException;
/** Throws {@link AlreadyClosedException} if this IndexReader is closed */
protected final void ensureOpen() throws AlreadyClosedException {
if (getRefCount() <= 0) {
throw new AlreadyClosedException("this TaxonomyReader is closed");
}
}
@Override
public final void close() throws IOException {
if (!closed) {
synchronized (this) {
if (!closed) {
decRef();
closed = true;
}
}
}
}
/**
* Expert: decreases the refCount of this TaxonomyReader instance. If the refCount drops to 0 this
* taxonomy reader is closed.
*/
public final void decRef() throws IOException {
ensureOpen();
final int rc = refCount.decrementAndGet();
if (rc == 0) {
boolean success = false;
try {
doClose();
closed = true;
success = true;
} finally {
if (!success) {
// Put reference back on failure
refCount.incrementAndGet();
}
}
} else if (rc < 0) {
throw new IllegalStateException(
"too many decRef calls: refCount is " + rc + " after decrement");
}
}
/**
* Returns a {@link ParallelTaxonomyArrays} object which can be used to efficiently traverse the
* taxonomy tree.
*/
public abstract ParallelTaxonomyArrays getParallelTaxonomyArrays() throws IOException;
/** Returns an iterator over the children of the given ordinal. */
public ChildrenIterator getChildren(final int ordinal) throws IOException {
ParallelTaxonomyArrays arrays = getParallelTaxonomyArrays();
int child = ordinal >= 0 ? arrays.children()[ordinal] : INVALID_ORDINAL;
return new ChildrenIterator(child, arrays.siblings());
}
/**
* Retrieve user committed data.
*
* @see TaxonomyWriter#setLiveCommitData(Iterable)
*/
public abstract Map getCommitUserData() throws IOException;
/**
* Returns the ordinal of the category given as a path. The ordinal is the category's serial
* number, an integer which starts with 0 and grows as more categories are added (note that once a
* category is added, it can never be deleted).
*
* @return the category's ordinal or {@link #INVALID_ORDINAL} if the category wasn't foun.
*/
public abstract int getOrdinal(FacetLabel categoryPath) throws IOException;
/**
* Returns the ordinals of the categories given as a path. The ordinal is the category's serial
* number, an integer which starts with 0 and grows as more categories are added (note that once a
* category is added, it can never be deleted).
*
* The implementation in {@link
* org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader} is generally faster than
* iteratively calling {@link #getOrdinal(FacetLabel)}
*
* @return array of the category's' ordinals or {@link #INVALID_ORDINAL} if the category wasn't
* found.
*/
public int[] getBulkOrdinals(FacetLabel... categoryPath) throws IOException {
// This is a slow default implementation. DirectoryTaxonomyReader overrides this method to make
// it faster.
int[] ords = new int[categoryPath.length];
for (int i = 0; i < categoryPath.length; i++) {
ords[i] = getOrdinal(categoryPath[i]);
}
return ords;
}
/** Returns ordinal for the dim + path. */
public int getOrdinal(String dim, String... path) throws IOException {
String[] fullPath = new String[path.length + 1];
fullPath[0] = dim;
System.arraycopy(path, 0, fullPath, 1, path.length);
return getOrdinal(new FacetLabel(fullPath));
}
/** Returns the path name of the category with the given ordinal. */
public abstract FacetLabel getPath(int ordinal) throws IOException;
/**
* Returns the path names of the list of ordinals associated with different categories.
*
*
The implementation in {@link
* org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader} is generally faster than
* the default implementation which iteratively calls {@link #getPath(int)}
*
*
Note: this method may change (reorder elements) its parameter, you should avoid reusing the
* parameter after the method is called.
*/
public FacetLabel[] getBulkPath(int... ordinals) throws IOException {
FacetLabel[] facetLabels = new FacetLabel[ordinals.length];
for (int i = 0; i < ordinals.length; i++) {
facetLabels[i] = getPath(ordinals[i]);
}
return facetLabels;
}
/** Returns the current refCount for this taxonomy reader. */
public final int getRefCount() {
return refCount.get();
}
/**
* Returns the number of categories in the taxonomy. Note that the number of categories returned
* is often slightly higher than the number of categories inserted into the taxonomy; This is
* because when a category is added to the taxonomy, its ancestors are also added automatically
* (including the root, which always get ordinal 0).
*/
public abstract int getSize();
/**
* Expert: increments the refCount of this TaxonomyReader instance. RefCounts can be used to
* determine when a taxonomy reader can be closed safely, i.e. as soon as there are no more
* references. Be sure to always call a corresponding decRef(), in a finally clause; otherwise the
* reader may never be closed.
*/
public final void incRef() {
ensureOpen();
refCount.incrementAndGet();
}
/**
* Expert: increments the refCount of this TaxonomyReader instance only if it has not been closed
* yet. Returns true on success.
*/
public final boolean tryIncRef() {
int count;
while ((count = refCount.get()) > 0) {
if (refCount.compareAndSet(count, count + 1)) {
return true;
}
}
return false;
}
}