org.elasticsearch.repositories.blobstore.ChecksumBlobStoreFormat Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of elasticsearch Show documentation
Elasticsearch subproject :server
There is a newer version: 8.13.4
/*
 * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 * or more contributor license agreements. Licensed under the Elastic License
 * 2.0 and the Server Side Public License, v 1; you may not use this file except
 * in compliance with, at your election, the Elastic License 2.0 or the Server
 * Side Public License, v 1.
 */
package org.elasticsearch.repositories.blobstore;

import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexFormatTooNewException;
import org.apache.lucene.index.IndexFormatTooOldException;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.InputStreamDataInput;
import org.apache.lucene.store.OutputStreamIndexOutput;
import org.elasticsearch.cluster.metadata.Metadata;
import org.elasticsearch.common.CheckedBiFunction;
import org.elasticsearch.common.Numbers;
import org.elasticsearch.common.blobstore.BlobContainer;
import org.elasticsearch.common.bytes.BytesArray;
import org.elasticsearch.common.compress.CompressorFactory;
import org.elasticsearch.common.io.Streams;
import org.elasticsearch.common.lucene.store.IndexOutputOutputStream;
import org.elasticsearch.common.xcontent.LoggingDeprecationHandler;
import org.elasticsearch.common.xcontent.XContentParserUtils;
import org.elasticsearch.gateway.CorruptStateException;
import org.elasticsearch.xcontent.NamedXContentRegistry;
import org.elasticsearch.xcontent.ToXContent;
import org.elasticsearch.xcontent.XContentBuilder;
import org.elasticsearch.xcontent.XContentFactory;
import org.elasticsearch.xcontent.XContentParser;
import org.elasticsearch.xcontent.XContentType;

import java.io.FilterInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.Collections;
import java.util.Locale;
import java.util.Map;
import java.util.zip.CRC32;

/**
 * Snapshot metadata file format used in v2.0 and above
 */
public final class ChecksumBlobStoreFormat {

    // Serialization parameters to specify correct context for metadata serialization.
    // When metadata is serialized certain elements of the metadata shouldn't be included into snapshot
    // exclusion of these elements is done by setting Metadata.CONTEXT_MODE_PARAM to Metadata.CONTEXT_MODE_SNAPSHOT
    public static final ToXContent.Params SNAPSHOT_ONLY_FORMAT_PARAMS = new ToXContent.MapParams(
        Collections.singletonMap(Metadata.CONTEXT_MODE_PARAM, Metadata.CONTEXT_MODE_SNAPSHOT)
    );

    // The format version
    public static final int VERSION = 1;

    private static final int BUFFER_SIZE = 4096;

    private final String codec;

    private final String blobNameFormat;

    private final CheckedBiFunction reader;

    /**
     * @param codec          codec name
     * @param blobNameFormat format of the blobname in {@link String#format} format
     * @param reader         prototype object that can deserialize T from XContent
     */
    public ChecksumBlobStoreFormat(String codec, String blobNameFormat, CheckedBiFunction reader) {
        this.reader = reader;
        this.blobNameFormat = blobNameFormat;
        this.codec = codec;
    }

    /**
     * Reads and parses the blob with given name, applying name translation using the {link #blobName} method
     *
     * @param blobContainer blob container
     * @param name          name to be translated into
     * @return parsed blob object
     */
    public T read(String repoName, BlobContainer blobContainer, String name, NamedXContentRegistry namedXContentRegistry)
        throws IOException {
        String blobName = blobName(name);
        try (InputStream in = blobContainer.readBlob(blobName)) {
            return deserialize(repoName, namedXContentRegistry, in);
        }
    }

    public String blobName(String name) {
        return String.format(Locale.ROOT, blobNameFormat, name);
    }

    public T deserialize(String repoName, NamedXContentRegistry namedXContentRegistry, InputStream input) throws IOException {
        final DeserializeMetaBlobInputStream deserializeMetaBlobInputStream = new DeserializeMetaBlobInputStream(input);
        try {
            CodecUtil.checkHeader(new InputStreamDataInput(deserializeMetaBlobInputStream), codec, VERSION, VERSION);
            final InputStream wrappedStream;
            if (deserializeMetaBlobInputStream.nextBytesCompressed()) {
                wrappedStream = CompressorFactory.COMPRESSOR.threadLocalInputStream(deserializeMetaBlobInputStream);
            } else {
                wrappedStream = deserializeMetaBlobInputStream;
            }
            final T result;
            try (
                XContentParser parser = XContentType.SMILE.xContent()
                    .createParser(namedXContentRegistry, LoggingDeprecationHandler.INSTANCE, wrappedStream)
            ) {
                result = reader.apply(repoName, parser);
                XContentParserUtils.ensureExpectedToken(null, parser.nextToken(), parser);
            }
            deserializeMetaBlobInputStream.verifyFooter();
            return result;
        } catch (CorruptIndexException | IndexFormatTooOldException | IndexFormatTooNewException ex) {
            // we trick this into a dedicated exception with the original stacktrace
            throw new CorruptStateException(ex);
        } catch (Exception e) {
            try {
                // drain stream fully and check whether the footer is corrupted
                Streams.consumeFully(deserializeMetaBlobInputStream);
                deserializeMetaBlobInputStream.verifyFooter();
            } catch (CorruptStateException cse) {
                cse.addSuppressed(e);
                throw cse;
            } catch (Exception ex) {
                e.addSuppressed(ex);
            }
            throw e;
        }
    }

    /**
     * Wrapper input stream for deserializing blobs that come with a Lucene header and footer in a streaming manner. It manually manages
     * a read buffer to enable not reading into the last 16 bytes (the footer length) of the buffer via the standard read methods so that
     * a parser backed by this stream will only see the blob's body.
     */
    private static final class DeserializeMetaBlobInputStream extends FilterInputStream {

        // checksum updated with all but the last 8 bytes read from the wrapped stream
        private final CRC32 crc32 = new CRC32();

        // Only the first buffer.length - 16 bytes are exposed by the read() methods; once the read position reaches 16 bytes from the end
        // of the buffer the remaining 16 bytes are moved to the start of the buffer and the rest of the buffer is filled from the stream.
        private final byte[] buffer = new byte[1024 * 8];

        // the number of bytes in the buffer, in [0, buffer.length], equal to buffer.length unless the last fill hit EOF
        private int bufferCount;

        // the current read position within the buffer, in [0, bufferCount - 16]
        private int bufferPos;

        DeserializeMetaBlobInputStream(InputStream in) {
            super(in);
        }

        @Override
        public int read() throws IOException {
            if (getAvailable() <= 0) {
                return -1;
            }
            return buffer[bufferPos++];
        }

        @Override
        public int read(byte[] b, int off, int len) throws IOException {
            int remaining = len;
            int read = 0;
            while (remaining > 0) {
                final int r = doRead(b, off + read, remaining);
                if (r <= 0) {
                    break;
                }
                read += r;
                remaining -= r;
            }
            if (len > 0 && remaining == len) {
                // nothing to read, EOF
                return -1;
            }
            return read;
        }

        @Override
        public void close() throws IOException {
            // not closing the wrapped stream
        }

        private int doRead(byte[] b, int off, int len) throws IOException {
            final int available = getAvailable();
            if (available < 0) {
                return -1;
            }
            final int read = Math.min(available, len);
            System.arraycopy(buffer, bufferPos, b, off, read);
            bufferPos += read;
            return read;
        }

        /**
         * Verify footer of the bytes read by this stream the same way {@link CodecUtil#checkFooter(ChecksumIndexInput)} would.
         *
         * @throws CorruptStateException if footer is found to be corrupted
         */
        void verifyFooter() throws CorruptStateException {
            if (bufferCount - bufferPos != CodecUtil.footerLength()) {
                throw new CorruptStateException(
                    "should have consumed all but 16 bytes from the buffer but saw buffer pos ["
                        + bufferPos
                        + "] and count ["
                        + bufferCount
                        + "]"
                );
            }
            crc32.update(buffer, 0, bufferPos + 8);
            final int magicFound = Numbers.bytesToInt(buffer, bufferPos);
            if (magicFound != CodecUtil.FOOTER_MAGIC) {
                throw new CorruptStateException("unexpected footer magic [" + magicFound + "]");
            }
            final int algorithmFound = Numbers.bytesToInt(buffer, bufferPos + 4);
            if (algorithmFound != 0) {
                throw new CorruptStateException("unexpected algorithm [" + algorithmFound + "]");
            }
            final long checksum = crc32.getValue();
            final long checksumInFooter = Numbers.bytesToLong(buffer, bufferPos + 8);
            if (checksum != checksumInFooter) {
                throw new CorruptStateException("checksums do not match read [" + checksum + "] but expected [" + checksumInFooter + "]");
            }
        }

        /**
         * @return true if the next bytes in this stream are compressed
         */
        boolean nextBytesCompressed() {
            // we already have bytes buffered here because we verify the blob's header (far less than the 8k buffer size) before calling
            // this method
            assert bufferPos > 0 : "buffer position must be greater than 0 but was [" + bufferPos + "]";
            return CompressorFactory.COMPRESSOR.isCompressed(new BytesArray(buffer, bufferPos, bufferCount - bufferPos));
        }

        /**
         * @return the number of bytes available in the buffer, possibly refilling the buffer if needed
         */
        private int getAvailable() throws IOException {
            final int footerLen = CodecUtil.footerLength();
            if (bufferCount == 0) {
                // first read, fill the buffer
                bufferCount = Streams.readFully(in, buffer, 0, buffer.length);
            } else if (bufferPos == bufferCount - footerLen) {
                // crc and discard all but the last 16 bytes in the buffer that might be the footer bytes
                assert bufferCount >= footerLen;
                crc32.update(buffer, 0, bufferPos);
                System.arraycopy(buffer, bufferPos, buffer, 0, footerLen);
                bufferCount = footerLen + Streams.readFully(in, buffer, footerLen, buffer.length - footerLen);
                bufferPos = 0;
            }
            // bytes in the buffer minus 16 bytes that could be the footer
            return bufferCount - bufferPos - footerLen;
        }
    }

    /**
     * Writes blob with resolving the blob name using {@link #blobName} method.
     * 
     * The blob will optionally be compressed.
     *
     * @param obj                 object to be serialized
     * @param blobContainer       blob container
     * @param name                blob name
     * @param compress            whether to use compression
     */
    public void write(T obj, BlobContainer blobContainer, String name, boolean compress) throws IOException {
        write(obj, blobContainer, name, compress, Collections.emptyMap());
    }

    /**
     * Writes blob with resolving the blob name using {@link #blobName} method.
     * 
     * The blob will optionally be compressed.
     *
     * @param obj                            object to be serialized
     * @param blobContainer                  blob container
     * @param name                           blob name
     * @param compress                       whether to use compression
     * @param serializationParams            extra serialization parameters
     */
    public void write(T obj, BlobContainer blobContainer, String name, boolean compress, Map serializationParams)
        throws IOException {
        final String blobName = blobName(name);
        blobContainer.writeBlob(blobName, false, false, out -> serialize(obj, blobName, compress, serializationParams, out));
    }

    public void serialize(final T obj, final String blobName, final boolean compress, final OutputStream outputStream) throws IOException {
        serialize(obj, blobName, compress, Collections.emptyMap(), outputStream);
    }

    public void serialize(
        final T obj,
        final String blobName,
        final boolean compress,
        final Map extraParams,
        final OutputStream outputStream
    ) throws IOException {
        try (
            OutputStreamIndexOutput indexOutput = new OutputStreamIndexOutput(
                "ChecksumBlobStoreFormat.serialize(blob=\"" + blobName + "\")",
                blobName,
                org.elasticsearch.common.io.Streams.noCloseStream(outputStream),
                BUFFER_SIZE
            )
        ) {
            CodecUtil.writeHeader(indexOutput, codec, VERSION);
            try (OutputStream indexOutputOutputStream = new IndexOutputOutputStream(indexOutput) {
                @Override
                public void close() {
                    // this is important since some of the XContentBuilders write bytes on close.
                    // in order to write the footer we need to prevent closing the actual index input.
                }
            };
                XContentBuilder builder = XContentFactory.contentBuilder(
                    XContentType.SMILE,
                    compress ? CompressorFactory.COMPRESSOR.threadLocalOutputStream(indexOutputOutputStream) : indexOutputOutputStream
                )
            ) {
                ToXContent.Params params = extraParams.isEmpty()
                    ? SNAPSHOT_ONLY_FORMAT_PARAMS
                    : new ToXContent.DelegatingMapParams(extraParams, SNAPSHOT_ONLY_FORMAT_PARAMS);

                builder.startObject();
                obj.toXContent(builder, params);
                builder.endObject();
            }
            CodecUtil.writeFooter(indexOutput);
        }
    }
}