All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.elasticsearch.common.compress.CompressedXContent Maven / Gradle / Ivy

There is a newer version: 8.14.0
Show newest version
/*
 * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 * or more contributor license agreements. Licensed under the Elastic License
 * 2.0 and the Server Side Public License, v 1; you may not use this file except
 * in compliance with, at your election, the Elastic License 2.0 or the Server
 * Side Public License, v 1.
 */

package org.elasticsearch.common.compress;

import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.Version;
import org.elasticsearch.common.bytes.BytesArray;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.hash.MessageDigests;
import org.elasticsearch.common.io.Streams;
import org.elasticsearch.common.io.stream.BytesStreamOutput;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.core.Releasable;
import org.elasticsearch.xcontent.ToXContent;
import org.elasticsearch.xcontent.XContentBuilder;
import org.elasticsearch.xcontent.XContentFactory;
import org.elasticsearch.xcontent.XContentParser;
import org.elasticsearch.xcontent.XContentParserConfiguration;
import org.elasticsearch.xcontent.json.JsonXContent;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.security.DigestOutputStream;
import java.security.MessageDigest;
import java.util.Base64;
import java.util.Map;
import java.util.zip.CRC32;
import java.util.zip.DataFormatException;
import java.util.zip.Inflater;

/**
 * Similar class to the {@link String} class except that it internally stores
 * data using a compressed representation in order to require less permanent
 * memory. Note that the compressed string might still sometimes need to be
 * decompressed in order to perform equality checks or to compute hash codes.
 */
public final class CompressedXContent {

    private static final ThreadLocal inflater = ThreadLocal.withInitial(InflaterAndBuffer::new);

    private static final ThreadLocal baos = ThreadLocal.withInitial(BytesStreamOutput::new);

    private static String sha256(BytesReference data) {
        MessageDigest messageDigest = MessageDigests.sha256();
        try {
            data.writeTo(new DigestOutputStream(Streams.NULL_OUTPUT_STREAM, messageDigest));
        } catch (IOException bogus) {
            // cannot happen
            throw new Error(bogus);
        }
        return Base64.getEncoder().encodeToString(messageDigest.digest());
    }

    private static String sha256FromCompressed(byte[] compressed) {
        MessageDigest messageDigest = MessageDigests.sha256();
        try (InflaterAndBuffer inflaterAndBuffer = inflater.get()) {
            final Inflater inflater = inflaterAndBuffer.inflater;
            final ByteBuffer buffer = inflaterAndBuffer.buffer;
            assert assertBufferIsCleared(buffer);
            setInflaterInput(compressed, inflater);
            do {
                if (inflater.inflate(buffer) > 0) {
                    messageDigest.update(buffer.flip());
                }
                buffer.clear();
            } while (inflater.finished() == false);
            return Base64.getEncoder().encodeToString(messageDigest.digest());
        } catch (DataFormatException e) {
            throw new ElasticsearchException(e);
        }
    }

    private final byte[] bytes;
    private final String sha256;

    // Used for serialization
    private CompressedXContent(byte[] compressed, String sha256) {
        this.bytes = compressed;
        this.sha256 = sha256;
        assertConsistent();
    }

    public CompressedXContent(Map map) throws IOException {
        this(((builder, params) -> builder.mapContents(map)), ToXContent.EMPTY_PARAMS);
    }

    public CompressedXContent(ToXContent xcontent) throws IOException {
        this(xcontent, ToXContent.EMPTY_PARAMS);
    }

    /**
     * Create a {@link CompressedXContent} out of a {@link ToXContent} instance.
     */
    public CompressedXContent(ToXContent xcontent, ToXContent.Params params) throws IOException {
        MessageDigest messageDigest = MessageDigests.sha256();
        BytesStreamOutput bStream = baos.get();
        try {
            OutputStream checkedStream = new DigestOutputStream(
                CompressorFactory.COMPRESSOR.threadLocalOutputStream(bStream),
                messageDigest
            );
            try (XContentBuilder builder = XContentFactory.jsonBuilder(checkedStream)) {
                if (xcontent.isFragment()) {
                    builder.startObject();
                }
                xcontent.toXContent(builder, params);
                if (xcontent.isFragment()) {
                    builder.endObject();
                }
            }
            this.bytes = bStream.copyBytes().array();
        } finally {
            bStream.reset();
        }
        this.sha256 = Base64.getEncoder().encodeToString(messageDigest.digest());
        assertConsistent();
    }

    /**
     * Create a {@link CompressedXContent} out of a serialized {@link ToXContent}
     * that may already be compressed.
     */
    public CompressedXContent(BytesReference data) throws IOException {
        Compressor compressor = CompressorFactory.compressor(data);
        if (compressor != null) {
            // already compressed...
            this.bytes = BytesReference.toBytes(data);
            this.sha256 = sha256FromCompressed(this.bytes);
        } else {
            this.bytes = BytesReference.toBytes(CompressorFactory.COMPRESSOR.compress(data));
            this.sha256 = sha256(data);
        }
        assertConsistent();
    }

    private void assertConsistent() {
        assert CompressorFactory.compressor(new BytesArray(bytes)) != null;
        assert this.sha256.equals(sha256(uncompressed()));
        assert this.sha256.equals(sha256FromCompressed(bytes));
    }

    public CompressedXContent(byte[] data) throws IOException {
        this(new BytesArray(data));
    }

    /**
     * Parses the given JSON string and then serializes it back in compressed form without any whitespaces. This is used to normalize
     * mapping json strings for deduplication.
     *
     * @param json string containing valid JSON
     * @return compressed x-content normalized to not contain any whitespaces
     */
    public static CompressedXContent fromJSON(String json) throws IOException {
        return new CompressedXContent(new ToXContent() {
            @Override
            public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
                return builder.copyCurrentStructure(JsonXContent.jsonXContent.createParser(XContentParserConfiguration.EMPTY, json));
            }

            @Override
            public boolean isFragment() {
                return false;
            }
        });
    }

    public CompressedXContent(String str) throws IOException {
        this(new BytesArray(str.getBytes(StandardCharsets.UTF_8)));
    }

    /** Return the compressed bytes. */
    public byte[] compressed() {
        return this.bytes;
    }

    /** Return the compressed bytes as a {@link BytesReference}. */
    public BytesReference compressedReference() {
        return new BytesArray(bytes);
    }

    /** Return the uncompressed bytes. */
    public BytesReference uncompressed() {
        try {
            return CompressorFactory.uncompress(new BytesArray(bytes));
        } catch (IOException e) {
            throw new IllegalStateException("Cannot decompress compressed string", e);
        }
    }

    public String string() {
        return uncompressed().utf8ToString();
    }

    public String getSha256() {
        return sha256;
    }

    public static CompressedXContent readCompressedString(StreamInput in) throws IOException {
        final String sha256;
        final byte[] compressedData;
        if (in.getVersion().onOrAfter(Version.V_8_0_0)) {
            sha256 = in.readString();
            compressedData = in.readByteArray();
        } else {
            int crc32 = in.readInt();
            compressedData = in.readByteArray();
            sha256 = sha256FromCompressed(compressedData);
        }
        return new CompressedXContent(compressedData, sha256);
    }

    public void writeTo(StreamOutput out) throws IOException {
        if (out.getVersion().onOrAfter(Version.V_8_0_0)) {
            out.writeString(sha256);
        } else {
            int crc32 = crc32FromCompressed(bytes);
            out.writeInt(crc32);
        }
        out.writeByteArray(bytes);
    }

    @Override
    public boolean equals(Object o) {
        if (this == o) return true;
        if (o == null || getClass() != o.getClass()) return false;

        CompressedXContent that = (CompressedXContent) o;
        return sha256.equals(that.sha256);
    }

    /**
     * Copies the x-content in this instance to the given builder token by token. This operation is equivalent to parsing the contents
     * of this instance into a map and then writing the map to the given {@link XContentBuilder} functionally but is much more efficient.
     *
     * @param builder builder to copy to
     * @throws IOException on failure
     */
    public void copyTo(XContentBuilder builder) throws IOException {
        try (
            InputStream decompressed = CompressorFactory.COMPRESSOR.threadLocalInputStream(new ByteArrayInputStream(bytes));
            XContentParser parser = JsonXContent.jsonXContent.createParser(XContentParserConfiguration.EMPTY, decompressed)
        ) {
            builder.copyCurrentStructure(parser);
        }
    }

    @Override
    public int hashCode() {
        return sha256.hashCode();
    }

    @Override
    public String toString() {
        return string();
    }

    private static int crc32FromCompressed(byte[] compressed) {
        CRC32 crc32 = new CRC32();
        try (InflaterAndBuffer inflaterAndBuffer = inflater.get()) {
            final Inflater inflater = inflaterAndBuffer.inflater;
            final ByteBuffer buffer = inflaterAndBuffer.buffer;
            assert assertBufferIsCleared(buffer);
            setInflaterInput(compressed, inflater);
            do {
                if (inflater.inflate(buffer) > 0) {
                    crc32.update(buffer.flip());
                }
                buffer.clear();
            } while (inflater.finished() == false);
            return (int) crc32.getValue();
        } catch (DataFormatException e) {
            throw new ElasticsearchException(e);
        }
    }

    /**
     * Set the given bytes as inflater input, accounting for the fact that they start with our header of size
     * {@link DeflateCompressor#HEADER_SIZE}.
     */
    private static void setInflaterInput(byte[] compressed, Inflater inflater) {
        inflater.setInput(compressed, DeflateCompressor.HEADER_SIZE, compressed.length - DeflateCompressor.HEADER_SIZE);
    }

    private static boolean assertBufferIsCleared(ByteBuffer buffer) {
        assert buffer.limit() == buffer.capacity()
            : "buffer limit != capacity, was [" + buffer.limit() + "] and [" + buffer.capacity() + "]";
        assert buffer.position() == 0 : "buffer position != 0, was [" + buffer.position() + "]";
        return true;
    }

    private static final class InflaterAndBuffer implements Releasable {

        final ByteBuffer buffer = ByteBuffer.allocate(DeflateCompressor.BUFFER_SIZE);

        final Inflater inflater = new Inflater(true);

        @Override
        public void close() {
            inflater.reset();
            buffer.clear();
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy