
solutions.siren.join.action.terms.collector.BytesRefTermStream Maven / Gradle / Ivy
/**
* Copyright (c) 2016, SIREn Solutions. All Rights Reserved.
*
* This file is part of the SIREn project.
*
* SIREn is a free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* SIREn is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public
* License along with this program. If not, see .
*/
package solutions.siren.join.action.terms.collector;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.NumericUtils;
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.fielddata.IndexNumericFieldData;
import org.elasticsearch.index.fielddata.SortedBinaryDocValues;
/**
* A stream of terms coming for a given document and field. A {@link BytesRefTermStream} is a reusable object
* used in combination with {@link HitStream#getTermStream(TermStream)}.
*/
abstract class BytesRefTermStream extends TermStream {
protected final IndexFieldData fieldData;
protected BytesRefTermStream(IndexReader reader, IndexFieldData indexFieldData) {
super(reader);
this.fieldData = indexFieldData;
}
/**
* Returns true if there is at least one remaining term in the stream.
*/
public abstract boolean hasNext();
/**
* Move to the next term in the stream, and returns its long value (i.e., hash for string field type).
*/
public abstract BytesRef next();
/**
* Set the stream to the given document.
* @see HitStream#getTermStream(TermStream)
*/
protected abstract void set(int atomicReaderId, int atomicDocId);
private static class BytesBytesRefTermStream extends BytesRefTermStream {
private int lastAtomicReaderId = -1;
private SortedBinaryDocValues values;
private int count;
protected BytesBytesRefTermStream(IndexReader reader, IndexFieldData indexFieldData) {
super(reader, indexFieldData);
}
@Override
public boolean hasNext() {
if (this.count < this.values.count()) {
return true;
}
return false;
}
@Override
public BytesRef next() {
return values.valueAt(this.count++);
}
@Override
protected void set(int atomicReaderId, int atomicDocId) {
// loading values from field data cache is costly,
// therefore we load values from cache only if new atomic reader id
if (lastAtomicReaderId != atomicReaderId) {
LeafReaderContext leafReader = reader.leaves().get(atomicReaderId);
this.values = this.fieldData.load(leafReader).getBytesValues();
}
this.values.setDocument(atomicDocId);
this.count = 0;
this.lastAtomicReaderId = atomicReaderId;
}
}
private static abstract class NumericBytesRefTermStream extends BytesRefTermStream {
private int lastAtomicReaderId = -1;
protected SortedNumericDocValues values;
protected int count;
protected NumericBytesRefTermStream(IndexReader reader, IndexFieldData indexFieldData) {
super(reader, indexFieldData);
}
@Override
public boolean hasNext() {
if (this.count < this.values.count()) {
return true;
}
return false;
}
@Override
protected void set(int atomicReaderId, int atomicDocId) {
// loading values from field data cache is costly,
// therefore we load values from cache only if new atomic reader id
if (lastAtomicReaderId != atomicReaderId) {
LeafReaderContext leafReader = reader.leaves().get(atomicReaderId);
this.values = ((IndexNumericFieldData) this.fieldData).load(leafReader).getLongValues();
}
this.values.setDocument(atomicDocId);
this.count = 0;
this.lastAtomicReaderId = atomicReaderId;
}
}
private static class IntegerBytesRefTermStream extends NumericBytesRefTermStream {
protected IntegerBytesRefTermStream(IndexReader reader, IndexFieldData indexFieldData) {
super(reader, indexFieldData);
}
@Override
public BytesRef next() {
BytesRefBuilder b = new BytesRefBuilder();
NumericUtils.intToPrefixCoded((int) values.valueAt(this.count++), 0, b);
return b.toBytesRef();
}
}
private static class LongBytesRefTermStream extends NumericBytesRefTermStream {
protected LongBytesRefTermStream(IndexReader reader, IndexFieldData indexFieldData) {
super(reader, indexFieldData);
}
@Override
public BytesRef next() {
BytesRefBuilder b = new BytesRefBuilder();
NumericUtils.longToPrefixCoded((int) values.valueAt(this.count++), 0, b);
return b.toBytesRef();
}
}
/**
* Instantiates a new reusable {@link BytesRefTermStream} based on the field type.
*/
public static BytesRefTermStream get(IndexReader reader, IndexFieldData indexFieldData) {
if (indexFieldData instanceof IndexNumericFieldData) {
IndexNumericFieldData numFieldData = (IndexNumericFieldData) indexFieldData;
switch (numFieldData.getNumericType()) {
case INT:
return new IntegerBytesRefTermStream(reader, numFieldData);
case LONG:
return new LongBytesRefTermStream(reader, numFieldData);
default:
throw new UnsupportedOperationException("Streaming numeric type '" + numFieldData.getNumericType().name() + "' is unsupported");
}
}
else {
return new BytesBytesRefTermStream(reader, indexFieldData);
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy