org.apache.lucene.util.bkd.OfflinePointReader Maven / Gradle / Ivy

Go to download
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.util.bkd;

import java.io.EOFException;
import java.io.IOException;

import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.LongBitSet;

/** Reads points from disk in a fixed-with format, previously written with {@link OfflinePointWriter}.
 *
 * @lucene.internal */
public final class OfflinePointReader extends PointReader {
  long countLeft;
  final IndexInput in;
  private final byte[] packedValue;
  final boolean singleValuePerDoc;
  final int bytesPerDoc;
  private long ord;
  private int docID;
  // true if ords are written as long (8 bytes), else 4 bytes
  private boolean longOrds;
  private boolean checked;

  // File name we are reading
  final String name;

  public OfflinePointReader(Directory tempDir, String tempFileName, int packedBytesLength, long start, long length,
                     boolean longOrds, boolean singleValuePerDoc) throws IOException {
    this.singleValuePerDoc = singleValuePerDoc;
    int bytesPerDoc = packedBytesLength + Integer.BYTES;
    if (singleValuePerDoc == false) {
      if (longOrds) {
        bytesPerDoc += Long.BYTES;
      } else {
        bytesPerDoc += Integer.BYTES;
      }
    }
    this.bytesPerDoc = bytesPerDoc;

    if ((start + length) * bytesPerDoc + CodecUtil.footerLength() > tempDir.fileLength(tempFileName)) {
      throw new IllegalArgumentException("requested slice is beyond the length of this file: start=" + start + " length=" + length + " bytesPerDoc=" + bytesPerDoc + " fileLength=" + tempDir.fileLength(tempFileName) + " tempFileName=" + tempFileName);
    }

    // Best-effort checksumming:
    if (start == 0 && length*bytesPerDoc == tempDir.fileLength(tempFileName) - CodecUtil.footerLength()) {
      // If we are going to read the entire file, e.g. because BKDWriter is now
      // partitioning it, we open with checksums:
      in = tempDir.openChecksumInput(tempFileName, IOContext.READONCE);
    } else {
      // Since we are going to seek somewhere in the middle of a possibly huge
      // file, and not read all bytes from there, don't use ChecksumIndexInput here.
      // This is typically fine, because this same file will later be read fully,
      // at another level of the BKDWriter recursion
      in = tempDir.openInput(tempFileName, IOContext.READONCE);
    }
    name = tempFileName;

    long seekFP = start * bytesPerDoc;
    in.seek(seekFP);
    countLeft = length;
    packedValue = new byte[packedBytesLength];
    this.longOrds = longOrds;
  }

  @Override
  public boolean next() throws IOException {
    if (countLeft >= 0) {
      if (countLeft == 0) {
        return false;
      }
      countLeft--;
    }
    try {
      in.readBytes(packedValue, 0, packedValue.length);
    } catch (EOFException eofe) {
      assert countLeft == -1;
      return false;
    }
    docID = in.readInt();
    if (singleValuePerDoc == false) {
      if (longOrds) {
        ord = in.readLong();
      } else {
        ord = in.readInt();
      }
    } else {
      ord = docID;
    }
    return true;
  }

  @Override
  public byte[] packedValue() {
    return packedValue;
  }

  @Override
  public long ord() {
    return ord;
  }

  @Override
  public int docID() {
    return docID;
  }

  @Override
  public void close() throws IOException {
    try {
      if (countLeft == 0 && in instanceof ChecksumIndexInput && checked == false) {
        //System.out.println("NOW CHECK: " + name);
        checked = true;
        CodecUtil.checkFooter((ChecksumIndexInput) in);
      }
    } finally {
      in.close();
    }
  }

  @Override
  public void markOrds(long count, LongBitSet ordBitSet) throws IOException {
    if (countLeft < count) {
      throw new IllegalStateException("only " + countLeft + " points remain, but " + count + " were requested");
    }
    long fp = in.getFilePointer() + packedValue.length;
    if (singleValuePerDoc == false) {
      fp += Integer.BYTES;
    }
    for(long i=0;i offline split since the default impl
    // is somewhat wasteful otherwise (e.g. decoding docID when we don't
    // need to)

    int packedBytesLength = packedValue.length;

    int bytesPerDoc = packedBytesLength + Integer.BYTES;
    if (singleValuePerDoc == false) {
      if (longOrds) {
        bytesPerDoc += Long.BYTES;
      } else {
        bytesPerDoc += Integer.BYTES;
      }
    }

    long rightCount = 0;

    IndexOutput rightOut = ((OfflinePointWriter) right).out;
    IndexOutput leftOut = ((OfflinePointWriter) left).out;

    assert count <= countLeft: "count=" + count + " countLeft=" + countLeft;

    countLeft -= count;

    long countStart = count;

    byte[] buffer = new byte[bytesPerDoc];
    while (count > 0) {
      in.readBytes(buffer, 0, buffer.length);

      long ord;
      if (longOrds) {
        // A long ord, after the docID:
        ord = readLong(buffer, packedBytesLength+Integer.BYTES);
      } else if (singleValuePerDoc) {
        // docID is the ord:
        ord = readInt(buffer, packedBytesLength);
      } else {
        // An int ord, after the docID:
        ord = readInt(buffer, packedBytesLength+Integer.BYTES);
      }

      if (rightTree.get(ord)) {
        rightOut.writeBytes(buffer, 0, bytesPerDoc);
        if (doClearBits) {
          rightTree.clear(ord);
        }
        rightCount++;
      } else {
        leftOut.writeBytes(buffer, 0, bytesPerDoc);
      }

      count--;
    }

    ((OfflinePointWriter) right).count = rightCount;
    ((OfflinePointWriter) left).count = countStart-rightCount;

    return rightCount;
  }

  // Poached from ByteArrayDataInput:
  private static long readLong(byte[] bytes, int pos) {
    final int i1 = ((bytes[pos++] & 0xff) << 24) | ((bytes[pos++] & 0xff) << 16) |
      ((bytes[pos++] & 0xff) << 8) | (bytes[pos++] & 0xff);
    final int i2 = ((bytes[pos++] & 0xff) << 24) | ((bytes[pos++] & 0xff) << 16) |
      ((bytes[pos++] & 0xff) << 8) | (bytes[pos++] & 0xff);
    return (((long)i1) << 32) | (i2 & 0xFFFFFFFFL);
  }

  // Poached from ByteArrayDataInput:
  private static int readInt(byte[] bytes, int pos) {
    return ((bytes[pos++] & 0xFF) << 24) | ((bytes[pos++] & 0xFF) << 16)
      | ((bytes[pos++] & 0xFF) <<  8) |  (bytes[pos++] & 0xFF);
  }
}