org.apache.lucene.index.BinaryDocValuesFieldUpdates Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.index;
import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.InPlaceMergeSorter;
import org.apache.lucene.util.packed.PackedInts;
import org.apache.lucene.util.packed.PagedGrowableWriter;
import org.apache.lucene.util.packed.PagedMutable;
/**
* A {@link DocValuesFieldUpdates} which holds updates of documents, of a single
* {@link BinaryDocValuesField}.
*
* @lucene.experimental
*/
class BinaryDocValuesFieldUpdates extends DocValuesFieldUpdates {
final static class Iterator extends DocValuesFieldUpdates.Iterator {
private final PagedGrowableWriter offsets;
private final int size;
private final PagedGrowableWriter lengths;
private final PagedMutable docs;
private long idx = 0; // long so we don't overflow if size == Integer.MAX_VALUE
private int doc = -1;
private final BytesRef value;
private int offset, length;
Iterator(int size, PagedGrowableWriter offsets, PagedGrowableWriter lengths,
PagedMutable docs, BytesRef values) {
this.offsets = offsets;
this.size = size;
this.lengths = lengths;
this.docs = docs;
value = values.clone();
}
@Override
BytesRef value() {
value.offset = offset;
value.length = length;
return value;
}
@Override
int nextDoc() {
if (idx >= size) {
offset = -1;
return doc = DocIdSetIterator.NO_MORE_DOCS;
}
doc = (int) docs.get(idx);
++idx;
while (idx < size && docs.get(idx) == doc) {
++idx;
}
// idx points to the "next" element
long prevIdx = idx - 1;
// cannot change 'value' here because nextDoc is called before the
// value is used, and it's a waste to clone the BytesRef when we
// obtain the value
offset = (int) offsets.get(prevIdx);
length = (int) lengths.get(prevIdx);
return doc;
}
@Override
int doc() {
return doc;
}
@Override
void reset() {
doc = -1;
offset = -1;
idx = 0;
}
}
private PagedMutable docs;
private PagedGrowableWriter offsets, lengths;
private BytesRefBuilder values;
private int size;
private final int bitsPerValue;
public BinaryDocValuesFieldUpdates(String field, int maxDoc) {
super(field, DocValuesType.BINARY);
bitsPerValue = PackedInts.bitsRequired(maxDoc - 1);
docs = new PagedMutable(1, PAGE_SIZE, bitsPerValue, PackedInts.COMPACT);
offsets = new PagedGrowableWriter(1, PAGE_SIZE, 1, PackedInts.FAST);
lengths = new PagedGrowableWriter(1, PAGE_SIZE, 1, PackedInts.FAST);
values = new BytesRefBuilder();
size = 0;
}
@Override
public void add(int doc, Object value) {
// TODO: if the Sorter interface changes to take long indexes, we can remove that limitation
if (size == Integer.MAX_VALUE) {
throw new IllegalStateException("cannot support more than Integer.MAX_VALUE doc/value entries");
}
BytesRef val = (BytesRef) value;
// grow the structures to have room for more elements
if (docs.size() == size) {
docs = docs.grow(size + 1);
offsets = offsets.grow(size + 1);
lengths = lengths.grow(size + 1);
}
docs.set(size, doc);
offsets.set(size, values.length());
lengths.set(size, val.length);
values.append(val);
++size;
}
@Override
public Iterator iterator() {
final PagedMutable docs = this.docs;
final PagedGrowableWriter offsets = this.offsets;
final PagedGrowableWriter lengths = this.lengths;
final BytesRef values = this.values.get();
new InPlaceMergeSorter() {
@Override
protected void swap(int i, int j) {
long tmpDoc = docs.get(j);
docs.set(j, docs.get(i));
docs.set(i, tmpDoc);
long tmpOffset = offsets.get(j);
offsets.set(j, offsets.get(i));
offsets.set(i, tmpOffset);
long tmpLength = lengths.get(j);
lengths.set(j, lengths.get(i));
lengths.set(i, tmpLength);
}
@Override
protected int compare(int i, int j) {
int x = (int) docs.get(i);
int y = (int) docs.get(j);
return (x < y) ? -1 : ((x == y) ? 0 : 1);
}
}.sort(0, size);
return new Iterator(size, offsets, lengths, docs, values);
}
@Override
public void merge(DocValuesFieldUpdates other) {
BinaryDocValuesFieldUpdates otherUpdates = (BinaryDocValuesFieldUpdates) other;
if (otherUpdates.size > Integer.MAX_VALUE - size) {
throw new IllegalStateException(
"cannot support more than Integer.MAX_VALUE doc/value entries; size="
+ size + " other.size=" + otherUpdates.size);
}
final int newSize = size + otherUpdates.size;
docs = docs.grow(newSize);
offsets = offsets.grow(newSize);
lengths = lengths.grow(newSize);
for (int i = 0; i < otherUpdates.size; i++) {
int doc = (int) otherUpdates.docs.get(i);
docs.set(size, doc);
offsets.set(size, values.length() + otherUpdates.offsets.get(i)); // correct relative offset
lengths.set(size, otherUpdates.lengths.get(i));
++size;
}
values.append(otherUpdates.values);
}
@Override
public boolean any() {
return size > 0;
}
@Override
public long ramBytesPerDoc() {
long bytesPerDoc = (long) Math.ceil((double) (bitsPerValue) / 8); // docs
final int capacity = estimateCapacity(size);
bytesPerDoc += (long) Math.ceil((double) offsets.ramBytesUsed() / capacity); // offsets
bytesPerDoc += (long) Math.ceil((double) lengths.ramBytesUsed() / capacity); // lengths
bytesPerDoc += (long) Math.ceil((double) values.length() / size); // values
return bytesPerDoc;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy