All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.util.packed.DirectWriter Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.util.packed;


import java.io.EOFException;
import java.io.IOException;
import java.util.Arrays;

import org.apache.lucene.store.IndexOutput;

/** 
 * Class for writing packed integers to be directly read from Directory.
 * Integers can be read on-the-fly via {@link DirectReader}.
 * 

* Unlike PackedInts, it optimizes for read i/o operations and supports > 2B values. * Example usage: *

 *   int bitsPerValue = DirectWriter.bitsRequired(100); // values up to and including 100
 *   IndexOutput output = dir.createOutput("packed", IOContext.DEFAULT);
 *   DirectWriter writer = DirectWriter.getInstance(output, numberOfValues, bitsPerValue);
 *   for (int i = 0; i < numberOfValues; i++) {
 *     writer.add(value);
 *   }
 *   writer.finish();
 *   output.close();
 * 
* @see DirectReader */ public final class DirectWriter { final int bitsPerValue; final long numValues; final IndexOutput output; long count; boolean finished; // for now, just use the existing writer under the hood int off; final byte[] nextBlocks; final long[] nextValues; final BulkOperation encoder; final int iterations; DirectWriter(IndexOutput output, long numValues, int bitsPerValue) { this.output = output; this.numValues = numValues; this.bitsPerValue = bitsPerValue; encoder = BulkOperation.of(PackedInts.Format.PACKED, bitsPerValue); iterations = encoder.computeIterations((int) Math.min(numValues, Integer.MAX_VALUE), PackedInts.DEFAULT_BUFFER_SIZE); nextBlocks = new byte[iterations * encoder.byteBlockCount()]; nextValues = new long[iterations * encoder.byteValueCount()]; } /** Adds a value to this writer */ public void add(long l) throws IOException { assert bitsPerValue == 64 || (l >= 0 && l <= PackedInts.maxValue(bitsPerValue)) : bitsPerValue; assert !finished; if (count >= numValues) { throw new EOFException("Writing past end of stream"); } nextValues[off++] = l; if (off == nextValues.length) { flush(); } count++; } private void flush() throws IOException { encoder.encode(nextValues, 0, nextBlocks, 0, iterations); final int blockCount = (int) PackedInts.Format.PACKED.byteCount(PackedInts.VERSION_CURRENT, off, bitsPerValue); output.writeBytes(nextBlocks, blockCount); Arrays.fill(nextValues, 0L); off = 0; } /** finishes writing */ public void finish() throws IOException { if (count != numValues) { throw new IllegalStateException("Wrong number of values added, expected: " + numValues + ", got: " + count); } assert !finished; flush(); // pad for fast io: we actually only need this for certain BPV, but its just 3 bytes... for (int i = 0; i < 3; i++) { output.writeByte((byte) 0); } finished = true; } /** Returns an instance suitable for encoding {@code numValues} using {@code bitsPerValue} */ public static DirectWriter getInstance(IndexOutput output, long numValues, int bitsPerValue) { if (Arrays.binarySearch(SUPPORTED_BITS_PER_VALUE, bitsPerValue) < 0) { throw new IllegalArgumentException("Unsupported bitsPerValue " + bitsPerValue + ". Did you use bitsRequired?"); } return new DirectWriter(output, numValues, bitsPerValue); } /** * Round a number of bits per value to the next amount of bits per value that * is supported by this writer. * * @param bitsRequired the amount of bits required * @return the next number of bits per value that is gte the provided value * and supported by this writer */ private static int roundBits(int bitsRequired) { int index = Arrays.binarySearch(SUPPORTED_BITS_PER_VALUE, bitsRequired); if (index < 0) { return SUPPORTED_BITS_PER_VALUE[-index-1]; } else { return bitsRequired; } } /** * Returns how many bits are required to hold values up * to and including maxValue * * @param maxValue the maximum value that should be representable. * @return the amount of bits needed to represent values from 0 to maxValue. * @see PackedInts#bitsRequired(long) */ public static int bitsRequired(long maxValue) { return roundBits(PackedInts.bitsRequired(maxValue)); } /** * Returns how many bits are required to hold values up * to and including maxValue, interpreted as an unsigned value. * * @param maxValue the maximum value that should be representable. * @return the amount of bits needed to represent values from 0 to maxValue. * @see PackedInts#unsignedBitsRequired(long) */ public static int unsignedBitsRequired(long maxValue) { return roundBits(PackedInts.unsignedBitsRequired(maxValue)); } final static int SUPPORTED_BITS_PER_VALUE[] = new int[] { 1, 2, 4, 8, 12, 16, 20, 24, 28, 32, 40, 48, 56, 64 }; }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy