org.elasticsearch.index.engine.LiveVersionMap Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of elasticsearch Show documentation
Elasticsearch subproject :server
There is a newer version: 8.15.1
/*
 * Licensed to Elasticsearch under one or more contributor
 * license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright
 * ownership. Elasticsearch licenses this file to you under
 * the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.elasticsearch.index.engine;

import org.apache.lucene.index.Term;
import org.apache.lucene.search.ReferenceManager;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.RamUsageEstimator;
import org.elasticsearch.common.util.concurrent.ConcurrentCollections;

import java.io.IOException;
import java.util.Collection;
import java.util.Collections;
import java.util.Map;
import java.util.concurrent.atomic.AtomicLong;

/** Maps _uid value to its version information. */
class LiveVersionMap implements ReferenceManager.RefreshListener, Accountable {

    private static class Maps {

        // All writes (adds and deletes) go into here:
        final Map current;

        // Used while refresh is running, and to hold adds/deletes until refresh finishes.  We read from both current and old on lookup:
        final Map old;

        public Maps(Map current, Map old) {
           this.current = current;
           this.old = old;
        }

        public Maps() {
            this(ConcurrentCollections.newConcurrentMapWithAggressiveConcurrency(),
                 ConcurrentCollections.newConcurrentMapWithAggressiveConcurrency());
        }
    }

    // All deletes also go here, and delete "tombstones" are retained after refresh:
    private final Map tombstones = ConcurrentCollections.newConcurrentMapWithAggressiveConcurrency();

    private volatile Maps maps = new Maps();

    private ReferenceManager mgr;

    /** Bytes consumed for each BytesRef UID:
     * In this base value, we account for the {@link BytesRef} object itself as
     * well as the header of the byte[] array it holds, and some lost bytes due
     * to object alignment. So consumers of this constant just have to add the
     * length of the byte[] (assuming it is not shared between multiple
     * instances). */
    private static final long BASE_BYTES_PER_BYTESREF =
            // shallow memory usage of the BytesRef object
            RamUsageEstimator.shallowSizeOfInstance(BytesRef.class) +
            // header of the byte[] array
            RamUsageEstimator.NUM_BYTES_ARRAY_HEADER +
            // with an alignment size (-XX:ObjectAlignmentInBytes) of 8 (default),
            // there could be between 0 and 7 lost bytes, so we account for 3
            // lost bytes on average
            3;

    /** Bytes used by having CHM point to a key/value. */
    private static final long BASE_BYTES_PER_CHM_ENTRY;
    static {
        // use the same impl as the Maps does
        Map map = ConcurrentCollections.newConcurrentMapWithAggressiveConcurrency();
        map.put(0, 0);
        long chmEntryShallowSize = RamUsageEstimator.shallowSizeOf(map.entrySet().iterator().next());
        // assume a load factor of 50%
        // for each entry, we need two object refs, one for the entry itself
        // and one for the free space that is due to the fact hash tables can
        // not be fully loaded
        BASE_BYTES_PER_CHM_ENTRY = chmEntryShallowSize + 2 * RamUsageEstimator.NUM_BYTES_OBJECT_REF;
    }

    /** Tracks bytes used by current map, i.e. what is freed on refresh. For deletes, which are also added to tombstones, we only account
     *  for the CHM entry here, and account for BytesRef/VersionValue against the tombstones, since refresh would not clear this RAM. */
    final AtomicLong ramBytesUsedCurrent = new AtomicLong();

    /** Tracks bytes used by tombstones (deletes) */
    final AtomicLong ramBytesUsedTombstones = new AtomicLong();

    /** Sync'd because we replace old mgr. */
    synchronized void setManager(ReferenceManager newMgr) {
        if (mgr != null) {
            mgr.removeListener(this);
        }
        mgr = newMgr;

        // In case InternalEngine closes & opens a new IndexWriter/SearcherManager, all deletes are made visible, so we clear old and
        // current here.  This is safe because caller holds writeLock here (so no concurrent adds/deletes can be happeninge):
        maps = new Maps();

        // So we are notified when reopen starts and finishes
        mgr.addListener(this);
    }

    @Override
    public void beforeRefresh() throws IOException {
        // Start sending all updates after this point to the new
        // map.  While reopen is running, any lookup will first
        // try this new map, then fallback to old, then to the
        // current searcher:
        maps = new Maps(ConcurrentCollections.newConcurrentMapWithAggressiveConcurrency(), maps.current);

        // This is not 100% correct, since concurrent indexing ops can change these counters in between our execution of the previous
        // line and this one, but that should be minor, and the error won't accumulate over time:
        ramBytesUsedCurrent.set(0);
    }

    @Override
    public void afterRefresh(boolean didRefresh) throws IOException {
        // We can now drop old because these operations are now visible via the newly opened searcher.  Even if didRefresh is false, which
        // means Lucene did not actually open a new reader because it detected no changes, it's possible old has some entries in it, which
        // is fine: it means they were actually already included in the previously opened reader, so we can still safely drop them in that
        // case.  This is because we assign new maps (in beforeRefresh) slightly before Lucene actually flushes any segments for the
        // reopen, and so any concurrent indexing requests can still sneak in a few additions to that current map that are in fact reflected
        // in the previous reader.   We don't touch tombstones here: they expire on their own index.gc_deletes timeframe:
        maps = new Maps(maps.current, ConcurrentCollections.newConcurrentMapWithAggressiveConcurrency());
    }

    /** Returns the live version (add or delete) for this uid. */
    VersionValue getUnderLock(final Term uid) {
        Maps currentMaps = maps;

        // First try to get the "live" value:
        VersionValue value = currentMaps.current.get(uid.bytes());
        if (value != null) {
            return value;
        }

        value = currentMaps.old.get(uid.bytes());
        if (value != null) {
            return value;
        }

        return tombstones.get(uid.bytes());
    }

    /** Adds this uid/version to the pending adds map. */
    void putUnderLock(BytesRef uid, VersionValue version) {
        assert uid.bytes.length == uid.length : "Oversized _uid! UID length: " + uid.length + ", bytes length: " + uid.bytes.length;
        long uidRAMBytesUsed = BASE_BYTES_PER_BYTESREF + uid.bytes.length;

        final VersionValue prev = maps.current.put(uid, version);
        if (prev != null) {
            // Deduct RAM for the version we just replaced:
            long prevBytes = BASE_BYTES_PER_CHM_ENTRY;
            if (prev.delete() == false) {
                prevBytes += prev.ramBytesUsed() + uidRAMBytesUsed;
            }
            ramBytesUsedCurrent.addAndGet(-prevBytes);
        }

        // Add RAM for the new version:
        long newBytes = BASE_BYTES_PER_CHM_ENTRY;
        if (version.delete() == false) {
            newBytes += version.ramBytesUsed() + uidRAMBytesUsed;
        }
        ramBytesUsedCurrent.addAndGet(newBytes);

        final VersionValue prevTombstone;
        if (version.delete()) {
            // Also enroll the delete into tombstones, and account for its RAM too:
            prevTombstone = tombstones.put(uid, version);

            // We initially account for BytesRef/VersionValue RAM for a delete against the tombstones, because this RAM will not be freed up
            // on refresh. Later, in removeTombstoneUnderLock, if we clear the tombstone entry but the delete remains in current, we shift
            // the accounting to current:
            ramBytesUsedTombstones.addAndGet(BASE_BYTES_PER_CHM_ENTRY + version.ramBytesUsed() + uidRAMBytesUsed);

            if (prevTombstone == null && prev != null && prev.delete()) {
                // If prev was a delete that had already been removed from tombstones, then current was already accounting for the
                // BytesRef/VersionValue RAM, so we now deduct that as well:
                ramBytesUsedCurrent.addAndGet(-(prev.ramBytesUsed() + uidRAMBytesUsed));
            }
        } else {
            // UID came back to life so we remove the tombstone:
            prevTombstone = tombstones.remove(uid);
        }

        // Deduct tombstones bytes used for the version we just removed or replaced:
        if (prevTombstone != null) {
            long v = ramBytesUsedTombstones.addAndGet(-(BASE_BYTES_PER_CHM_ENTRY + prevTombstone.ramBytesUsed() + uidRAMBytesUsed));
            assert v >= 0: "bytes=" + v;
        }
    }

    /** Removes this uid from the pending deletes map. */
    void removeTombstoneUnderLock(BytesRef uid) {

        long uidRAMBytesUsed = BASE_BYTES_PER_BYTESREF + uid.bytes.length;

        final VersionValue prev = tombstones.remove(uid);
        if (prev != null) {
            assert prev.delete();
            long v = ramBytesUsedTombstones.addAndGet(-(BASE_BYTES_PER_CHM_ENTRY + prev.ramBytesUsed() + uidRAMBytesUsed));
            assert v >= 0: "bytes=" + v;
        }
        final VersionValue curVersion = maps.current.get(uid);
        if (curVersion != null && curVersion.delete()) {
            // We now shift accounting of the BytesRef from tombstones to current, because a refresh would clear this RAM.  This should be
            // uncommon, because with the default refresh=1s and gc_deletes=60s, deletes should be cleared from current long before we drop
            // them from tombstones:
            ramBytesUsedCurrent.addAndGet(curVersion.ramBytesUsed() + uidRAMBytesUsed);
        }
    }

    /** Caller has a lock, so that this uid will not be concurrently added/deleted by another thread. */
    VersionValue getTombstoneUnderLock(BytesRef uid) {
        return tombstones.get(uid);
    }

    /** Iterates over all deleted versions, including new ones (not yet exposed via reader) and old ones (exposed via reader but not yet GC'd). */
    Iterable> getAllTombstones() {
        return tombstones.entrySet();
    }

    /** Called when this index is closed. */
    synchronized void clear() {
        maps = new Maps();
        tombstones.clear();
        ramBytesUsedCurrent.set(0);

        // NOTE: we can't zero this here, because a refresh thread could be calling InternalEngine.pruneDeletedTombstones at the same time,
        // and this will lead to an assert trip.  Presumably it's fine if our ramBytesUsedTombstones is non-zero after clear since the index
        // is being closed:
        //ramBytesUsedTombstones.set(0);

        if (mgr != null) {
            mgr.removeListener(this);
            mgr = null;
        }
    }

    @Override
    public long ramBytesUsed() {
        return ramBytesUsedCurrent.get() + ramBytesUsedTombstones.get();
    }

    /** Returns how much RAM would be freed up by refreshing. This is {@link #ramBytesUsed} except does not include tombstones because they
     *  don't clear on refresh. */
    long ramBytesUsedForRefresh() {
        return ramBytesUsedCurrent.get();
    }

    @Override
    public Collection getChildResources() {
        // TODO: useful to break down RAM usage here?
        return Collections.emptyList();
    }
}