org.elasticsearch.index.codec.bloomfilter.ES85BloomFilterPostingsFormat Maven / Gradle / Ivy
Show all versions of elasticsearch Show documentation
/*
* @notice
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Modifications copyright (C) 2022 Elasticsearch B.V.
*/
package org.elasticsearch.index.codec.bloomfilter;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.index.BaseTermsEnum;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.FilterLeafReader;
import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.RandomAccessInput;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.BitUtil;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.lucene.store.IndexOutputOutputStream;
import org.elasticsearch.common.util.BigArrays;
import org.elasticsearch.common.util.ByteArray;
import org.elasticsearch.core.IOUtils;
import java.io.Closeable;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.function.Function;
/**
* This implementation is forked from Lucene's BloomFilterPosting to support on-disk bloom filters.
*
* A {@link PostingsFormat} useful for low doc-frequency fields such as primary keys. Bloom filters
* offers "fast-fail" for reads in segments known to have no record of the key.
*/
public class ES85BloomFilterPostingsFormat extends PostingsFormat {
static final String BLOOM_CODEC_NAME = "ES85BloomFilter";
static final int VERSION_START = 0;
static final int VERSION_CURRENT = VERSION_START;
static final String BLOOM_FILTER_META_FILE = "bfm";
static final String BLOOM_FILTER_INDEX_FILE = "bfi";
private Function postingsFormats;
private BigArrays bigArrays;
public ES85BloomFilterPostingsFormat(BigArrays bigArrays, Function postingsFormats) {
this();
this.bigArrays = Objects.requireNonNull(bigArrays);
this.postingsFormats = Objects.requireNonNull(postingsFormats);
}
public ES85BloomFilterPostingsFormat() {
super(BLOOM_CODEC_NAME);
}
@Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
if (postingsFormats == null || bigArrays == null) {
assert false : BLOOM_CODEC_NAME + " was initialized with a wrong constructor";
throw new UnsupportedOperationException(BLOOM_CODEC_NAME + " was initialized with a wrong constructor");
}
return new FieldsWriter(state);
}
@Override
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
return new FieldsReader(state);
}
@Override
public String toString() {
return BLOOM_CODEC_NAME;
}
private static String metaFile(SegmentInfo si, String segmentSuffix) {
return IndexFileNames.segmentFileName(si.name, segmentSuffix, BLOOM_FILTER_META_FILE);
}
private static String indexFile(SegmentInfo si, String segmentSuffix) {
return IndexFileNames.segmentFileName(si.name, segmentSuffix, BLOOM_FILTER_INDEX_FILE);
}
final class FieldsWriter extends FieldsConsumer {
private final SegmentWriteState state;
private final IndexOutput indexOut;
private final List bloomFilters = new ArrayList<>();
private final List fieldsGroups = new ArrayList<>();
private final List toCloses = new ArrayList<>();
private boolean closed;
FieldsWriter(SegmentWriteState state) throws IOException {
this.state = state;
boolean success = false;
try {
indexOut = state.directory.createOutput(indexFile(state.segmentInfo, state.segmentSuffix), state.context);
toCloses.add(indexOut);
CodecUtil.writeIndexHeader(indexOut, BLOOM_CODEC_NAME, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
success = true;
} finally {
if (success == false) {
IOUtils.closeWhileHandlingException(toCloses);
}
}
}
@Override
public void write(Fields fields, NormsProducer norms) throws IOException {
writePostings(fields, norms);
writeBloomFilters(fields);
}
private void writePostings(Fields fields, NormsProducer norms) throws IOException {
final Map currentGroups = new HashMap<>();
for (String field : fields) {
final PostingsFormat postingsFormat = postingsFormats.apply(field);
if (postingsFormat == null) {
throw new IllegalStateException("PostingsFormat for field [" + field + "] wasn't specified");
}
FieldsGroup group = currentGroups.get(postingsFormat);
if (group == null) {
group = new FieldsGroup(postingsFormat, Integer.toString(fieldsGroups.size()), new ArrayList<>());
currentGroups.put(postingsFormat, group);
fieldsGroups.add(group);
}
group.fields.add(field);
}
for (FieldsGroup group : currentGroups.values()) {
final FieldsConsumer writer = group.postingsFormat.fieldsConsumer(new SegmentWriteState(state, group.suffix));
toCloses.add(writer);
final Fields maskedFields = new FilterLeafReader.FilterFields(fields) {
@Override
public Iterator iterator() {
return group.fields.iterator();
}
};
writer.write(maskedFields, norms);
}
}
private void writeBloomFilters(Fields fields) throws IOException {
for (String field : fields) {
final Terms terms = fields.terms(field);
if (terms == null) {
continue;
}
final int bloomFilterSize = bloomFilterSize(state.segmentInfo.maxDoc());
final int numBytes = numBytesForBloomFilter(bloomFilterSize);
try (ByteArray buffer = bigArrays.newByteArray(numBytes)) {
final TermsEnum termsEnum = terms.iterator();
while (true) {
final BytesRef term = termsEnum.next();
if (term == null) {
break;
}
final int hash = hashTerm(term) % bloomFilterSize;
final int pos = hash >> 3;
final int mask = 1 << (hash & 0x7);
final byte val = (byte) (buffer.get(pos) | mask);
buffer.set(pos, val);
}
bloomFilters.add(new BloomFilter(field, indexOut.getFilePointer(), bloomFilterSize));
final BytesReference bytes = BytesReference.fromByteArray(buffer, numBytes);
bytes.writeTo(new IndexOutputOutputStream(indexOut));
}
}
}
@Override
public void close() throws IOException {
if (closed) {
return;
}
closed = true;
try {
CodecUtil.writeFooter(indexOut);
} finally {
IOUtils.close(toCloses);
}
try (IndexOutput metaOut = state.directory.createOutput(metaFile(state.segmentInfo, state.segmentSuffix), state.context)) {
CodecUtil.writeIndexHeader(metaOut, BLOOM_CODEC_NAME, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
// write postings formats
metaOut.writeVInt(fieldsGroups.size());
for (FieldsGroup group : fieldsGroups) {
group.writeTo(metaOut, state.fieldInfos);
}
// Write bloom filters
metaOut.writeVInt(bloomFilters.size());
for (BloomFilter bloomFilter : bloomFilters) {
bloomFilter.writeTo(metaOut, state.fieldInfos);
}
CodecUtil.writeFooter(metaOut);
}
}
}
private record BloomFilter(String field, long startFilePointer, int bloomFilterSize) {
void writeTo(IndexOutput out, FieldInfos fieldInfos) throws IOException {
out.writeVInt(fieldInfos.fieldInfo(field).number);
out.writeVLong(startFilePointer);
out.writeVInt(bloomFilterSize);
}
static BloomFilter readFrom(IndexInput in, FieldInfos fieldInfos) throws IOException {
final String fieldName = fieldInfos.fieldInfo(in.readVInt()).name;
final long startFilePointer = in.readVLong();
final int bloomFilterSize = in.readVInt();
return new BloomFilter(fieldName, startFilePointer, bloomFilterSize);
}
}
private record FieldsGroup(PostingsFormat postingsFormat, String suffix, List fields) {
void writeTo(IndexOutput out, FieldInfos fieldInfos) throws IOException {
out.writeString(postingsFormat.getName());
out.writeString(suffix);
out.writeVInt(fields.size());
for (String field : fields) {
out.writeVInt(fieldInfos.fieldInfo(field).number);
}
}
static FieldsGroup readFrom(IndexInput in, FieldInfos fieldInfos) throws IOException {
final PostingsFormat postingsFormat = forName(in.readString());
final String suffix = in.readString();
final int numFields = in.readVInt();
final List fields = new ArrayList<>();
for (int i = 0; i < numFields; i++) {
fields.add(fieldInfos.fieldInfo(in.readVInt()).name);
}
return new FieldsGroup(postingsFormat, suffix, fields);
}
}
static final class FieldsReader extends FieldsProducer {
private final Map bloomFilters;
private final List toCloses = new ArrayList<>();
private final Map readerMap = new HashMap<>();
private final IndexInput indexIn;
FieldsReader(SegmentReadState state) throws IOException {
boolean success = false;
try (
ChecksumIndexInput metaIn = state.directory.openChecksumInput(
metaFile(state.segmentInfo, state.segmentSuffix),
IOContext.READONCE
)
) {
CodecUtil.checkIndexHeader(
metaIn,
BLOOM_CODEC_NAME,
VERSION_START,
VERSION_CURRENT,
state.segmentInfo.getId(),
state.segmentSuffix
);
// read postings formats
final int numFieldsGroups = metaIn.readVInt();
for (int i = 0; i < numFieldsGroups; i++) {
final FieldsGroup group = FieldsGroup.readFrom(metaIn, state.fieldInfos);
final FieldsProducer reader = group.postingsFormat.fieldsProducer(new SegmentReadState(state, group.suffix));
toCloses.add(reader);
for (String field : group.fields) {
readerMap.put(field, reader);
}
}
// read bloom filters
final int numBloomFilters = metaIn.readVInt();
bloomFilters = new HashMap<>(numBloomFilters);
for (int i = 0; i < numBloomFilters; i++) {
final BloomFilter bloomFilter = BloomFilter.readFrom(metaIn, state.fieldInfos);
bloomFilters.put(bloomFilter.field, bloomFilter);
}
CodecUtil.checkFooter(metaIn);
indexIn = state.directory.openInput(indexFile(state.segmentInfo, state.segmentSuffix), state.context);
toCloses.add(indexIn);
CodecUtil.checkIndexHeader(
indexIn,
BLOOM_CODEC_NAME,
VERSION_START,
VERSION_CURRENT,
state.segmentInfo.getId(),
state.segmentSuffix
);
CodecUtil.retrieveChecksum(indexIn);
assert assertBloomFilterSizes(state.segmentInfo);
success = true;
} finally {
if (success == false) {
IOUtils.closeWhileHandlingException(toCloses);
}
}
}
private boolean assertBloomFilterSizes(SegmentInfo segmentInfo) {
for (BloomFilter bloomFilter : bloomFilters.values()) {
assert bloomFilter.bloomFilterSize == bloomFilterSize(segmentInfo.maxDoc())
: "bloom_filter=" + bloomFilter + ", max_docs=" + segmentInfo.maxDoc();
}
return true;
}
@Override
public Iterator iterator() {
return readerMap.keySet().iterator();
}
@Override
public void close() throws IOException {
IOUtils.close(toCloses);
}
@Override
public Terms terms(String field) throws IOException {
final FieldsProducer reader = readerMap.get(field);
if (reader == null) {
return null;
}
final Terms terms = reader.terms(field);
if (terms == null) {
return null;
}
final BloomFilter bloomFilter = bloomFilters.get(field);
if (bloomFilter != null) {
final RandomAccessInput data = indexIn.randomAccessSlice(
bloomFilter.startFilePointer(),
numBytesForBloomFilter(bloomFilter.bloomFilterSize)
);
return new BloomFilterTerms(terms, data, bloomFilter.bloomFilterSize);
} else {
return terms;
}
}
@Override
public int size() {
return readerMap.size();
}
@Override
public void checkIntegrity() throws IOException {
// already fully checked the meta file; let's fully checked the index file.
CodecUtil.checksumEntireFile(indexIn);
// multiple fields can share the same reader
final Set seenReaders = new HashSet<>();
for (FieldsProducer reader : readerMap.values()) {
if (seenReaders.add(reader)) {
reader.checkIntegrity();
}
}
}
}
private static class BloomFilterTerms extends FilterLeafReader.FilterTerms {
private final RandomAccessInput data;
private final int bloomFilterSize;
BloomFilterTerms(Terms in, RandomAccessInput data, int bloomFilterSize) {
super(in);
this.data = data;
this.bloomFilterSize = bloomFilterSize;
}
private boolean mayContainTerm(BytesRef term) throws IOException {
final int hash = hashTerm(term) % bloomFilterSize;
final int pos = hash >> 3;
final int mask = 1 << (hash & 0x7);
final byte bits = data.readByte(pos);
return (bits & mask) != 0;
}
@Override
public TermsEnum iterator() throws IOException {
return new LazyFilterTermsEnum() {
private TermsEnum delegate;
@Override
TermsEnum getDelegate() throws IOException {
if (delegate == null) {
delegate = in.iterator();
}
return delegate;
}
@Override
public boolean seekExact(BytesRef term) throws IOException {
if (mayContainTerm(term)) {
return getDelegate().seekExact(term);
} else {
return false;
}
}
@Override
public void seekExact(BytesRef term, TermState state) throws IOException {
getDelegate().seekExact(term, state);
}
@Override
public TermState termState() throws IOException {
// TODO: return TermState that includes BloomFilter and fix _disk_usage API
return getDelegate().termState();
}
};
}
}
private abstract static class LazyFilterTermsEnum extends BaseTermsEnum {
abstract TermsEnum getDelegate() throws IOException;
@Override
public SeekStatus seekCeil(BytesRef text) throws IOException {
return getDelegate().seekCeil(text);
}
@Override
public void seekExact(long ord) throws IOException {
getDelegate().seekExact(ord);
}
@Override
public BytesRef term() throws IOException {
return getDelegate().term();
}
@Override
public long ord() throws IOException {
return getDelegate().ord();
}
@Override
public int docFreq() throws IOException {
return getDelegate().docFreq();
}
@Override
public long totalTermFreq() throws IOException {
return getDelegate().totalTermFreq();
}
@Override
public PostingsEnum postings(PostingsEnum reuse, int flags) throws IOException {
return getDelegate().postings(reuse, flags);
}
@Override
public ImpactsEnum impacts(int flags) throws IOException {
return getDelegate().impacts(flags);
}
@Override
public BytesRef next() throws IOException {
return getDelegate().next();
}
@Override
public AttributeSource attributes() {
try {
return getDelegate().attributes();
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
}
static int bloomFilterSize(int maxDocs) {
// 10% saturation (i.e., 10 bits for each term)
final long numBits = maxDocs * 10L;
if (numBits > Integer.MAX_VALUE) {
return Integer.MAX_VALUE;
} else {
return (int) numBits;
}
}
static int numBytesForBloomFilter(int bloomFilterSize) {
return Math.toIntExact((bloomFilterSize + 7L) / 8L);
}
static int hashTerm(BytesRef br) {
final int hash = murmurhash3_x86_32(br.bytes, br.offset, br.length, 0x9747b28c);
return hash & 0x7FFF_FFFF;
}
/**
* Forked from Lucene's StringHelper#murmurhash3_x86_32 so that changes to the Lucene implementation
* do not break the compatibility of this format.
*/
@SuppressWarnings("fallthrough")
private static int murmurhash3_x86_32(byte[] data, int offset, int len, int seed) {
final int c1 = 0xcc9e2d51;
final int c2 = 0x1b873593;
int h1 = seed;
int roundedEnd = offset + (len & 0xfffffffc); // round down to 4 byte block
for (int i = offset; i < roundedEnd; i += 4) {
// little endian load order
int k1 = (int) BitUtil.VH_LE_INT.get(data, i);
k1 *= c1;
k1 = Integer.rotateLeft(k1, 15);
k1 *= c2;
h1 ^= k1;
h1 = Integer.rotateLeft(h1, 13);
h1 = h1 * 5 + 0xe6546b64;
}
// tail
int k1 = 0;
switch (len & 0x03) {
case 3:
k1 = (data[roundedEnd + 2] & 0xff) << 16;
// fallthrough
case 2:
k1 |= (data[roundedEnd + 1] & 0xff) << 8;
// fallthrough
case 1:
k1 |= (data[roundedEnd] & 0xff);
k1 *= c1;
k1 = Integer.rotateLeft(k1, 15);
k1 *= c2;
h1 ^= k1;
}
// finalization
h1 ^= len;
// fmix(h1);
h1 ^= h1 >>> 16;
h1 *= 0x85ebca6b;
h1 ^= h1 >>> 13;
h1 *= 0xc2b2ae35;
h1 ^= h1 >>> 16;
return h1;
}
}