All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.elasticsearch.index.cache.bloom.simple.SimpleBloomCache Maven / Gradle / Ivy

There is a newer version: 7.10.2_1
Show newest version
/*
 * Licensed to Elastic Search and Shay Banon under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership. Elastic Search licenses this
 * file to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.elasticsearch.index.cache.bloom.simple;

import org.apache.lucene.index.*;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.UnicodeUtil;
import org.elasticsearch.ElasticSearchException;
import org.elasticsearch.common.Unicode;
import org.elasticsearch.common.bloom.BloomFilter;
import org.elasticsearch.common.bloom.BloomFilterFactory;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.unit.SizeUnit;
import org.elasticsearch.common.unit.SizeValue;
import org.elasticsearch.common.util.concurrent.ConcurrentCollections;
import org.elasticsearch.index.AbstractIndexComponent;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.cache.bloom.BloomCache;
import org.elasticsearch.index.settings.IndexSettings;
import org.elasticsearch.threadpool.ThreadPool;

import java.nio.channels.ClosedChannelException;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.atomic.AtomicBoolean;

/**
 *
 */
public class SimpleBloomCache extends AbstractIndexComponent implements BloomCache, SegmentReader.CoreClosedListener {

    private final ThreadPool threadPool;

    private final long maxSize;

    private final ConcurrentMap> cache;

    private final Object creationMutex = new Object();

    @Inject
    public SimpleBloomCache(Index index, @IndexSettings Settings indexSettings, ThreadPool threadPool) {
        super(index, indexSettings);
        this.threadPool = threadPool;

        this.maxSize = indexSettings.getAsSize("index.cache.bloom.max_size", new SizeValue(500, SizeUnit.MEGA)).singles();
        this.cache = ConcurrentCollections.newConcurrentMap();
    }

    @Override
    public void close() throws ElasticSearchException {
        clear();
    }

    @Override
    public void clear() {
        cache.clear();
    }

    @Override
    public void onClose(SegmentReader owner) {
        clear(owner);
    }

    @Override
    public void clear(IndexReader reader) {
        ConcurrentMap map = cache.remove(reader.getCoreCacheKey());
        // help soft/weak handling GC
        if (map != null) {
            map.clear();
        }
    }

    @Override
    public long sizeInBytes() {
        // the overhead of the map is not really relevant...
        long sizeInBytes = 0;
        for (ConcurrentMap map : cache.values()) {
            for (BloomFilterEntry filter : map.values()) {
                sizeInBytes += filter.filter.sizeInBytes();
            }
        }
        return sizeInBytes;
    }

    @Override
    public long sizeInBytes(String fieldName) {
        long sizeInBytes = 0;
        for (ConcurrentMap map : cache.values()) {
            BloomFilterEntry filter = map.get(fieldName);
            if (filter != null) {
                sizeInBytes += filter.filter.sizeInBytes();
            }
        }
        return sizeInBytes;
    }

    @Override
    public BloomFilter filter(IndexReader reader, String fieldName, boolean asyncLoad) {
        int currentNumDocs = reader.numDocs();
        if (currentNumDocs == 0) {
            return BloomFilter.EMPTY;
        }
        ConcurrentMap fieldCache = cache.get(reader.getCoreCacheKey());
        if (fieldCache == null) {
            synchronized (creationMutex) {
                fieldCache = cache.get(reader.getCoreCacheKey());
                if (fieldCache == null) {
                    if (reader instanceof SegmentReader) {
                        ((SegmentReader) reader).addCoreClosedListener(this);
                    }
                    fieldCache = ConcurrentCollections.newConcurrentMap();
                    cache.put(reader.getCoreCacheKey(), fieldCache);
                }
            }
        }
        BloomFilterEntry filter = fieldCache.get(fieldName);
        if (filter == null) {
            synchronized (fieldCache) {
                filter = fieldCache.get(fieldName);
                if (filter == null) {
                    filter = new BloomFilterEntry(currentNumDocs, BloomFilter.NONE);
                    fieldCache.put(fieldName, filter);
                    // now, do the async load of it...
                    if (currentNumDocs < maxSize) {
                        filter.loading.set(true);
                        BloomFilterLoader loader = new BloomFilterLoader(reader, fieldName);
                        if (asyncLoad) {
                            threadPool.executor(ThreadPool.Names.CACHE).execute(loader);
                        } else {
                            loader.run();
                            filter = fieldCache.get(fieldName);
                        }
                    }
                }
            }
        }
        // if we too many deletes, we need to reload the bloom filter so it will be more effective
        if (filter.numDocs > 1000 && filter.numDocs < maxSize && (currentNumDocs / filter.numDocs) < 0.6) {
            if (filter.loading.compareAndSet(false, true)) {
                // do the async loading
                BloomFilterLoader loader = new BloomFilterLoader(reader, fieldName);
                if (asyncLoad) {
                    threadPool.executor(ThreadPool.Names.CACHE).execute(loader);
                } else {
                    loader.run();
                    filter = fieldCache.get(fieldName);
                }
            }
        }
        return filter.filter;
    }

    class BloomFilterLoader implements Runnable {
        private final IndexReader reader;
        private final String field;

        BloomFilterLoader(IndexReader reader, String field) {
            this.reader = reader;
            this.field = StringHelper.intern(field);
        }

        @SuppressWarnings({"StringEquality"})
        @Override
        public void run() {
            TermDocs termDocs = null;
            TermEnum termEnum = null;
            try {
                UnicodeUtil.UTF8Result utf8Result = new UnicodeUtil.UTF8Result();
                BloomFilter filter = BloomFilterFactory.getFilter(reader.numDocs(), 15);
                termDocs = reader.termDocs();
                termEnum = reader.terms(new Term(field));
                do {
                    Term term = termEnum.term();
                    if (term == null || term.field() != field) break;

                    // LUCENE MONITOR: 4.0, move to use bytes!
                    Unicode.fromStringAsUtf8(term.text(), utf8Result);
                    termDocs.seek(termEnum);
                    while (termDocs.next()) {
                        // when traversing, make sure to ignore deleted docs, so the key->docId will be correct
                        if (!reader.isDeleted(termDocs.doc())) {
                            filter.add(utf8Result.result, 0, utf8Result.length);
                        }
                    }
                } while (termEnum.next());
                ConcurrentMap fieldCache = cache.get(reader.getCoreCacheKey());
                if (fieldCache != null) {
                    if (fieldCache.containsKey(field)) {
                        BloomFilterEntry filterEntry = new BloomFilterEntry(reader.numDocs(), filter);
                        filterEntry.loading.set(false);
                        fieldCache.put(field, filterEntry);
                    }
                }
            } catch (AlreadyClosedException e) {
                // ignore, we are getting closed
            } catch (ClosedChannelException e) {
                // ignore, we are getting closed
            } catch (Exception e) {
                // ignore failures that result from a closed reader...
                if (reader.getRefCount() > 0) {
                    logger.warn("failed to load bloom filter for [{}]", e, field);
                }
            } finally {
                try {
                    if (termDocs != null) {
                        termDocs.close();
                    }
                } catch (Exception e) {
                    // ignore
                }
                try {
                    if (termEnum != null) {
                        termEnum.close();
                    }
                } catch (Exception e) {
                    // ignore
                }
            }
        }
    }

    static class BloomFilterEntry {
        final int numDocs;
        final BloomFilter filter;
        final AtomicBoolean loading = new AtomicBoolean();

        public BloomFilterEntry(int numDocs, BloomFilter filter) {
            this.numDocs = numDocs;
            this.filter = filter;
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy