All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.util.encoding.FourFlagsIntEncoder Maven / Gradle / Ivy

There is a newer version: 9.11.1
Show newest version
package org.apache.lucene.util.encoding;

import java.io.IOException;

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/**
 * A {@link ChunksIntEncoder} which encodes values in chunks of 4. Every group
 * starts with a single byte (called indicator) which represents 4 - 2 bit
 * flags, where the values:
 * 
    *
  • 1, 2 or 3 mean the encoded value is '1', '2' or '3' respectively. *
  • 0 means the value is encoded using {@link VInt8IntEncoder}, and the * encoded bytes follow the indicator.
    * Since value 0 is illegal, and 1-3 are encoded in the indicator, the actual * value that is encoded is value-4, which saves some more bits. *
* Encoding example: *
    *
  • Original values: 6, 16, 5, 9, 7, 1, 11 *
  • After sorting: 1, 5, 6, 7, 9, 11, 16 *
  • D-Gap computing: 1, 4, 1, 1, 2, 5 (so far - done by * {@link DGapIntEncoder}) *
  • Encoding: 1,0,1,1 as the first indicator, followed by 0 (4-4), than * 2,0,0,0 as the second indicator, followed by 1 (5-4) encoded with. *
  • Binary encode: 01 | 01 | 00 | 01 00000000 00 | 00 | 00 | 10 * 00000001 (indicators are underlined).
    * NOTE: the order of the values in the indicator is lsb ⇒ msb, * which allows for more efficient decoding. *
* * @lucene.experimental */ public class FourFlagsIntEncoder extends ChunksIntEncoder { /** * Holds all combinations of indicator flags for fast encoding (saves * time on bit manipulation @ encode time) */ private static byte[][] encodeTable = new byte[][] { new byte[] { 0x00, 0x00, 0x00, 0x00 }, new byte[] { 0x01, 0x04, 0x10, 0x40 }, new byte[] { 0x02, 0x08, 0x20, (byte) 0x80 }, new byte[] { 0x03, 0x0C, 0x30, (byte) 0xC0 }, }; public FourFlagsIntEncoder() { super(4); } /** * Small values (<=3) are stored in the indicator while larger * values are saved for later encoding in the {@link #encodeQueue}. Since * Vint8 will only encode values larger or equal to 4, the values saves for * encoded are transformed to (value - 4).
* When a chunk is ready (got 4 values), the {@link #encodeChunk()} * takes control. */ @Override public void encode(int data) throws IOException { if (data <= 3) { indicator |= encodeTable[data][ordinal]; } else { encodeQueue[encodeQueueSize++] = data - 4; } ++ordinal; // If 4 values were encoded thus far, 'flush' them including the indicator. if ((ordinal & 0x3) == 0) { encodeChunk(); } } @Override public IntDecoder createMatchingDecoder() { return new FourFlagsIntDecoder(); } @Override public String toString() { return "FourFlags (" + encoder.toString() + ")"; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy