All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.orc.impl.ZstdCodec Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.orc.impl;

import com.github.luben.zstd.Zstd;
import com.github.luben.zstd.ZstdCompressCtx;
import org.apache.orc.CompressionCodec;
import org.apache.orc.CompressionKind;

import java.io.IOException;
import java.nio.ByteBuffer;

/* This file is based on source code from the ORC Project (http://orc.apache.org/), licensed by the Apache
 * Software Foundation (ASF) under the Apache License, Version 2.0. See the NOTICE file distributed with this work for
 * additional information regarding copyright ownership. */

/**
 * orc ZstdCodec.
 *
 * 

NOTE: The file was copied and modified to support zstd-jni. This feature is only supported in * ORC 2.0, but 2.0 only supports JDK17. We need to support JDK8. */ public class ZstdCodec implements CompressionCodec, DirectDecompressionCodec { private ZstdOptions zstdOptions = null; private ZstdCompressCtx zstdCompressCtx = null; public ZstdCodec(int level, int windowLog) { this.zstdOptions = new ZstdOptions(level, windowLog); } public ZstdCodec() { this(3, 0); } public ZstdOptions getZstdOptions() { return zstdOptions; } // Thread local buffer private static final ThreadLocal threadBuffer = ThreadLocal.withInitial(() -> null); protected static byte[] getBuffer(int size) { byte[] result = threadBuffer.get(); if (result == null || result.length < size || result.length > size * 2) { result = new byte[size]; threadBuffer.set(result); } return result; } static class ZstdOptions implements Options { private int level; private int windowLog; ZstdOptions(int level, int windowLog) { this.level = level; this.windowLog = windowLog; } @Override public ZstdOptions copy() { return new ZstdOptions(level, windowLog); } @Override public Options setSpeed(SpeedModifier newValue) { return this; } /** * Sets the Zstandard long mode maximum back-reference distance, expressed as a power of 2. * *

The value must be between ZSTD_WINDOWLOG_MIN (10) and ZSTD_WINDOWLOG_MAX (30 and 31 on * 32/64-bit architectures, respectively). * *

A value of 0 is a special value indicating to use the default * ZSTD_WINDOWLOG_LIMIT_DEFAULT of 27, which corresponds to back-reference window size of * 128MiB. * * @param newValue The desired power-of-2 value back-reference distance. * @return ZstdOptions */ public ZstdOptions setWindowLog(int newValue) { if ((newValue < Zstd.windowLogMin() || newValue > Zstd.windowLogMax()) && newValue != 0) { throw new IllegalArgumentException( String.format( "Zstd compression window size should be in the range %d to %d," + " or set to the default value of 0.", Zstd.windowLogMin(), Zstd.windowLogMax())); } windowLog = newValue; return this; } /** * Sets the Zstandard compression codec compression level directly using the integer * setting. This value is typically between 0 and 22, with larger numbers indicating more * aggressive compression and lower speed. * *

This method provides additional granularity beyond the setSpeed method so that users * can select a specific level. * * @param newValue The level value of compression to set. * @return ZstdOptions */ public ZstdOptions setLevel(int newValue) { if (newValue < Zstd.minCompressionLevel() || newValue > Zstd.maxCompressionLevel()) { throw new IllegalArgumentException( String.format( "Zstd compression level should be in the range %d to %d", Zstd.minCompressionLevel(), Zstd.maxCompressionLevel())); } level = newValue; return this; } @Override public ZstdOptions setData(DataKind newValue) { return this; // We don't support setting DataKind in ZstdCodec. } @Override public boolean equals(Object o) { if (this == o) { return true; } if (o == null || getClass() != o.getClass()) { return false; } ZstdOptions that = (ZstdOptions) o; if (level != that.level) { return false; } return windowLog == that.windowLog; } @Override public int hashCode() { int result = level; result = 31 * result + windowLog; return result; } } private static final ZstdOptions DEFAULT_OPTIONS = new ZstdOptions(3, 0); @Override public Options getDefaultOptions() { return DEFAULT_OPTIONS; } /** * Compresses an input ByteBuffer into an output ByteBuffer using Zstandard compression. If the * maximum bound of the number of output bytes exceeds the output ByteBuffer size, the remaining * bytes are written to the overflow ByteBuffer. * * @param in the bytes to compress * @param out the compressed bytes * @param overflow put any additional bytes here * @param options the options to control compression * @return ZstdOptions */ @Override public boolean compress(ByteBuffer in, ByteBuffer out, ByteBuffer overflow, Options options) throws IOException { int inBytes = in.remaining(); // Skip with minimum ZStandard format size: // https://datatracker.ietf.org/doc/html/rfc8878#name-zstandard-frames // Magic Number (4 bytes) + Frame Header (2 bytes) + Data Block Header (3 bytes) if (inBytes < 10) { return false; } ZstdOptions zso = (ZstdOptions) options; zstdCompressCtx = new ZstdCompressCtx(); zstdCompressCtx.setLevel(zso.level); zstdCompressCtx.setLong(zso.windowLog); zstdCompressCtx.setChecksum(false); try { byte[] compressed = getBuffer((int) Zstd.compressBound(inBytes)); int outBytes = zstdCompressCtx.compressByteArray( compressed, 0, compressed.length, in.array(), in.arrayOffset() + in.position(), inBytes); if (outBytes < inBytes) { int remaining = out.remaining(); if (remaining >= outBytes) { System.arraycopy( compressed, 0, out.array(), out.arrayOffset() + out.position(), outBytes); out.position(out.position() + outBytes); } else { System.arraycopy( compressed, 0, out.array(), out.arrayOffset() + out.position(), remaining); out.position(out.limit()); System.arraycopy( compressed, remaining, overflow.array(), overflow.arrayOffset(), outBytes - remaining); overflow.position(outBytes - remaining); } return true; } else { return false; } } finally { zstdCompressCtx.close(); } } @Override public void decompress(ByteBuffer in, ByteBuffer out) throws IOException { if (in.isDirect() && out.isDirect()) { directDecompress(in, out); return; } int srcOffset = in.arrayOffset() + in.position(); int srcSize = in.remaining(); int dstOffset = out.arrayOffset() + out.position(); int dstSize = out.remaining() - dstOffset; long decompressOut = Zstd.decompressByteArray( out.array(), dstOffset, dstSize, in.array(), srcOffset, srcSize); in.position(in.limit()); out.position(dstOffset + (int) decompressOut); out.flip(); } @Override public boolean isAvailable() { return true; } @Override public void directDecompress(ByteBuffer in, ByteBuffer out) throws IOException { Zstd.decompress(out, in); out.flip(); } @Override public void reset() {} @Override public void destroy() { if (zstdCompressCtx != null) { zstdCompressCtx.close(); } } @Override public CompressionKind getKind() { return CompressionKind.ZSTD; } @Override public void close() { OrcCodecPool.returnCodec(CompressionKind.ZSTD, this); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy