All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.backward_codecs.compressing.CompressionMode Maven / Gradle / Ivy

There is a newer version: 10.0.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.backward_codecs.compressing;

import java.io.IOException;
import java.util.zip.DataFormatException;
import java.util.zip.Deflater;
import java.util.zip.Inflater;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.compress.LZ4;

/**
 * A compression mode. Tells how much effort should be spent on compression and decompression of
 * stored fields.
 *
 * @lucene.experimental
 */
public abstract class CompressionMode {

  /**
   * A compression mode that trades compression ratio for speed. Although the compression ratio
   * might remain high, compression and decompression are very fast. Use this mode with indices that
   * have a high update rate but should be able to load documents from disk quickly.
   */
  public static final CompressionMode FAST =
      new CompressionMode() {

        @Override
        public Compressor newCompressor() {
          return new LZ4FastCompressor();
        }

        @Override
        public Decompressor newDecompressor() {
          return LZ4_DECOMPRESSOR;
        }

        @Override
        public String toString() {
          return "FAST";
        }
      };

  /**
   * A compression mode that trades speed for compression ratio. Although compression and
   * decompression might be slow, this compression mode should provide a good compression ratio.
   * This mode might be interesting if/when your index size is much bigger than your OS cache.
   */
  public static final CompressionMode HIGH_COMPRESSION =
      new CompressionMode() {

        @Override
        public Compressor newCompressor() {
          // notes:
          // 3 is the highest level that doesn't have lazy match evaluation
          // 6 is the default, higher than that is just a waste of cpu
          return new DeflateCompressor(6);
        }

        @Override
        public Decompressor newDecompressor() {
          return new DeflateDecompressor();
        }

        @Override
        public String toString() {
          return "HIGH_COMPRESSION";
        }
      };

  /**
   * This compression mode is similar to {@link #FAST} but it spends more time compressing in order
   * to improve the compression ratio. This compression mode is best used with indices that have a
   * low update rate but should be able to load documents from disk quickly.
   */
  public static final CompressionMode FAST_DECOMPRESSION =
      new CompressionMode() {

        @Override
        public Compressor newCompressor() {
          return new LZ4HighCompressor();
        }

        @Override
        public Decompressor newDecompressor() {
          return LZ4_DECOMPRESSOR;
        }

        @Override
        public String toString() {
          return "FAST_DECOMPRESSION";
        }
      };

  /** Sole constructor. */
  protected CompressionMode() {}

  /** Create a new {@link Compressor} instance. */
  public abstract Compressor newCompressor();

  /** Create a new {@link Decompressor} instance. */
  public abstract Decompressor newDecompressor();

  private static final Decompressor LZ4_DECOMPRESSOR =
      new Decompressor() {

        @Override
        public void decompress(
            DataInput in, int originalLength, int offset, int length, BytesRef bytes)
            throws IOException {
          assert offset + length <= originalLength;
          // add 7 padding bytes, this is not necessary but can help decompression run faster
          if (bytes.bytes.length < originalLength + 7) {
            bytes.bytes = new byte[ArrayUtil.oversize(originalLength + 7, 1)];
          }
          final int decompressedLength = LZ4.decompress(in, offset + length, bytes.bytes, 0);
          if (decompressedLength > originalLength) {
            throw new CorruptIndexException(
                "Corrupted: lengths mismatch: " + decompressedLength + " > " + originalLength, in);
          }
          bytes.offset = offset;
          bytes.length = length;
        }

        @Override
        public Decompressor clone() {
          return this;
        }
      };

  private static final class LZ4FastCompressor extends Compressor {

    private final LZ4.FastCompressionHashTable ht;

    LZ4FastCompressor() {
      ht = new LZ4.FastCompressionHashTable();
    }

    @Override
    public void compress(byte[] bytes, int off, int len, DataOutput out) throws IOException {
      LZ4.compress(bytes, off, len, out, ht);
    }

    @Override
    public void close() throws IOException {
      // no-op
    }
  }

  private static final class LZ4HighCompressor extends Compressor {

    private final LZ4.HighCompressionHashTable ht;

    LZ4HighCompressor() {
      ht = new LZ4.HighCompressionHashTable();
    }

    @Override
    public void compress(byte[] bytes, int off, int len, DataOutput out) throws IOException {
      LZ4.compress(bytes, off, len, out, ht);
    }

    @Override
    public void close() throws IOException {
      // no-op
    }
  }

  private static final class DeflateDecompressor extends Decompressor {

    byte[] compressed;

    DeflateDecompressor() {
      compressed = new byte[0];
    }

    @Override
    public void decompress(DataInput in, int originalLength, int offset, int length, BytesRef bytes)
        throws IOException {
      assert offset + length <= originalLength;
      if (length == 0) {
        bytes.length = 0;
        return;
      }
      final int compressedLength = in.readVInt();
      // pad with extra "dummy byte": see javadocs for using Inflater(true)
      // we do it for compliance, but it's unnecessary for years in zlib.
      final int paddedLength = compressedLength + 1;
      compressed = ArrayUtil.growNoCopy(compressed, paddedLength);
      in.readBytes(compressed, 0, compressedLength);
      compressed[compressedLength] = 0; // explicitly set dummy byte to 0

      final Inflater decompressor = new Inflater(true);
      try {
        // extra "dummy byte"
        decompressor.setInput(compressed, 0, paddedLength);

        bytes.offset = bytes.length = 0;
        bytes.bytes = ArrayUtil.growNoCopy(bytes.bytes, originalLength);
        try {
          bytes.length = decompressor.inflate(bytes.bytes, bytes.length, originalLength);
        } catch (DataFormatException e) {
          throw new IOException(e);
        }
        if (!decompressor.finished()) {
          throw new CorruptIndexException(
              "Invalid decoder state: needsInput="
                  + decompressor.needsInput()
                  + ", needsDict="
                  + decompressor.needsDictionary(),
              in);
        }
      } finally {
        decompressor.end();
      }
      if (bytes.length != originalLength) {
        throw new CorruptIndexException(
            "Lengths mismatch: " + bytes.length + " != " + originalLength, in);
      }
      bytes.offset = offset;
      bytes.length = length;
    }

    @Override
    public Decompressor clone() {
      return new DeflateDecompressor();
    }
  }

  private static class DeflateCompressor extends Compressor {

    final Deflater compressor;
    byte[] compressed;
    boolean closed;

    DeflateCompressor(int level) {
      compressor = new Deflater(level, true);
      compressed = new byte[64];
    }

    @Override
    public void compress(byte[] bytes, int off, int len, DataOutput out) throws IOException {
      compressor.reset();
      compressor.setInput(bytes, off, len);
      compressor.finish();

      if (compressor.needsInput()) {
        // no output
        assert len == 0 : len;
        out.writeVInt(0);
        return;
      }

      int totalCount = 0;
      for (; ; ) {
        final int count =
            compressor.deflate(compressed, totalCount, compressed.length - totalCount);
        totalCount += count;
        assert totalCount <= compressed.length;
        if (compressor.finished()) {
          break;
        } else {
          compressed = ArrayUtil.grow(compressed);
        }
      }

      out.writeVInt(totalCount);
      out.writeBytes(compressed, totalCount);
    }

    @Override
    public void close() throws IOException {
      if (closed == false) {
        compressor.end();
        closed = true;
      }
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy