
solutions.siren.join.action.terms.collector.NumericTermStream Maven / Gradle / Ivy
/**
* Copyright (c) 2016, SIREn Solutions. All Rights Reserved.
*
* This file is part of the SIREn project.
*
* SIREn is a free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* SIREn is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public
* License along with this program. If not, see .
*/
package solutions.siren.join.action.terms.collector;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.fielddata.IndexNumericFieldData;
import org.elasticsearch.index.fielddata.SortedBinaryDocValues;
/**
* A stream of numeric terms coming for a given document and field. A {@link NumericTermStream} is a reusable object
* used in combination with {@link HitStream#getTermStream(TermStream)}.
*/
abstract class NumericTermStream extends TermStream {
/**
* Instantiates a new reusable {@link NumericTermStream} based on the field type.
*/
public static NumericTermStream get(IndexReader reader, IndexFieldData indexFieldData) {
if (indexFieldData instanceof IndexNumericFieldData) {
IndexNumericFieldData numFieldData = (IndexNumericFieldData) indexFieldData;
if (!numFieldData.getNumericType().isFloatingPoint()) {
return new LongTermStream(reader, numFieldData);
}
else {
throw new UnsupportedOperationException("Streaming floating points is unsupported");
}
}
else {
return new HashTermStream(reader, indexFieldData);
}
}
protected NumericTermStream(IndexReader reader) {
super(reader);
}
/**
* Returns true if there is at least one remaining term in the stream.
*/
public abstract boolean hasNext();
/**
* Move to the next term in the stream, and returns its long value (i.e., hash for string field type).
*/
public abstract long next();
/**
* Set the stream to the given document.
* @see HitStream#getTermStream(TermStream)
*/
protected abstract void set(int atomicReaderId, int atomicDocId);
/**
* A term stream for numeric long values.
*/
private static class LongTermStream extends NumericTermStream {
private final IndexNumericFieldData fieldData;
private int lastAtomicReaderId = -1;
private SortedNumericDocValues values;
private int count;
protected LongTermStream(IndexReader reader, IndexNumericFieldData fieldData) {
super(reader);
this.fieldData = fieldData;
}
@Override
protected void set(int atomicReaderId, int atomicDocId) {
// loading values from field data cache is costly,
// therefore we load values from cache only if new atomic reader id
if (lastAtomicReaderId != atomicReaderId) {
LeafReaderContext leafReader = reader.leaves().get(atomicReaderId);
this.values = this.fieldData.load(leafReader).getLongValues();
}
this.values.setDocument(atomicDocId);
this.count = 0;
this.lastAtomicReaderId = atomicReaderId;
}
@Override
public boolean hasNext() {
return this.count < this.values.count();
}
@Override
public long next() {
return this.values.valueAt(this.count++);
}
}
/**
* A term stream for string values. It computes a Murmur hash of the term.
*/
private static class HashTermStream extends NumericTermStream {
private final IndexFieldData fieldData;
private int lastAtomicReaderId = -1;
private SortedBinaryDocValues values;
private int count;
protected HashTermStream(IndexReader reader, IndexFieldData fieldData) {
super(reader);
this.fieldData = fieldData;
}
@Override
protected void set(int atomicReaderId, int atomicDocId) {
// loading values from field data cache is costly,
// therefore we load values from cache only if new atomic reader id
if (lastAtomicReaderId != atomicReaderId) {
LeafReaderContext leafReader = reader.leaves().get(atomicReaderId);
this.values = this.fieldData.load(leafReader).getBytesValues();
}
this.values.setDocument(atomicDocId);
this.count = 0;
this.lastAtomicReaderId = atomicReaderId;
}
@Override
public boolean hasNext() {
return this.count < this.values.count();
}
@Override
public long next() {
final BytesRef term = values.valueAt(this.count++);
return LongBloomFilter.hash3_x64_128(term.bytes, term.offset, term.length, 0);
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy