All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.util.fst.BytesStore Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.util.fst;


import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;

import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.RamUsageEstimator;

// TODO: merge with PagedBytes, except PagedBytes doesn't
// let you read while writing which FST needs

class BytesStore extends DataOutput implements Accountable {

  private static final long BASE_RAM_BYTES_USED =
        RamUsageEstimator.shallowSizeOfInstance(BytesStore.class)
      + RamUsageEstimator.shallowSizeOfInstance(ArrayList.class);

  private final List blocks = new ArrayList<>();

  private final int blockSize;
  private final int blockBits;
  private final int blockMask;

  private byte[] current;
  private int nextWrite;

  public BytesStore(int blockBits) {
    this.blockBits = blockBits;
    blockSize = 1 << blockBits;
    blockMask = blockSize-1;
    nextWrite = blockSize;
  }

  /** Pulls bytes from the provided IndexInput.  */
  public BytesStore(DataInput in, long numBytes, int maxBlockSize) throws IOException {
    int blockSize = 2;
    int blockBits = 1;
    while(blockSize < numBytes && blockSize < maxBlockSize) {
      blockSize *= 2;
      blockBits++;
    }
    this.blockBits = blockBits;
    this.blockSize = blockSize;
    this.blockMask = blockSize-1;
    long left = numBytes;
    while(left > 0) {
      final int chunk = (int) Math.min(blockSize, left);
      byte[] block = new byte[chunk];
      in.readBytes(block, 0, block.length);
      blocks.add(block);
      left -= chunk;
    }

    // So .getPosition still works
    nextWrite = blocks.get(blocks.size()-1).length;
  }

  /** Absolute write byte; you must ensure dest is < max
   *  position written so far. */
  public void writeByte(int dest, byte b) {
    int blockIndex = dest >> blockBits;
    byte[] block = blocks.get(blockIndex);
    block[dest & blockMask] = b;
  }

  @Override
  public void writeByte(byte b) {
    if (nextWrite == blockSize) {
      current = new byte[blockSize];
      blocks.add(current);
      nextWrite = 0;
    }
    current[nextWrite++] = b;
  }

  @Override
  public void writeBytes(byte[] b, int offset, int len) {
    while (len > 0) {
      int chunk = blockSize - nextWrite;
      if (len <= chunk) {
        assert b != null;
        assert current != null;
        System.arraycopy(b, offset, current, nextWrite, len);
        nextWrite += len;
        break;
      } else {
        if (chunk > 0) {
          System.arraycopy(b, offset, current, nextWrite, chunk);
          offset += chunk;
          len -= chunk;
        }
        current = new byte[blockSize];
        blocks.add(current);
        nextWrite = 0;
      }
    }
  }

  int getBlockBits() {
    return blockBits;
  }

  /** Absolute writeBytes without changing the current
   *  position.  Note: this cannot "grow" the bytes, so you
   *  must only call it on already written parts. */
  void writeBytes(long dest, byte[] b, int offset, int len) {
    //System.out.println("  BS.writeBytes dest=" + dest + " offset=" + offset + " len=" + len);
    assert dest + len <= getPosition(): "dest=" + dest + " pos=" + getPosition() + " len=" + len;

    // Note: weird: must go "backwards" because copyBytes
    // calls us with overlapping src/dest.  If we
    // go forwards then we overwrite bytes before we can
    // copy them:

    /*
    int blockIndex = dest >> blockBits;
    int upto = dest & blockMask;
    byte[] block = blocks.get(blockIndex);
    while (len > 0) {
      int chunk = blockSize - upto;
      System.out.println("    cycle chunk=" + chunk + " len=" + len);
      if (len <= chunk) {
        System.arraycopy(b, offset, block, upto, len);
        break;
      } else {
        System.arraycopy(b, offset, block, upto, chunk);
        offset += chunk;
        len -= chunk;
        blockIndex++;
        block = blocks.get(blockIndex);
        upto = 0;
      }
    }
    */

    final long end = dest + len;
    int blockIndex = (int) (end >> blockBits);
    int downTo = (int) (end & blockMask);
    if (downTo == 0) {
      blockIndex--;
      downTo = blockSize;
    }
    byte[] block = blocks.get(blockIndex);

    while (len > 0) {
      //System.out.println("    cycle downTo=" + downTo + " len=" + len);
      if (len <= downTo) {
        //System.out.println("      final: offset=" + offset + " len=" + len + " dest=" + (downTo-len));
        System.arraycopy(b, offset, block, downTo-len, len);
        break;
      } else {
        len -= downTo;
        //System.out.println("      partial: offset=" + (offset + len) + " len=" + downTo + " dest=0");
        System.arraycopy(b, offset + len, block, 0, downTo);
        blockIndex--;
        block = blocks.get(blockIndex);
        downTo = blockSize;
      }
    }
  }

  /** Absolute copy bytes self to self, without changing the
   *  position. Note: this cannot "grow" the bytes, so must
   *  only call it on already written parts. */
  public void copyBytes(long src, long dest, int len) {
    //System.out.println("BS.copyBytes src=" + src + " dest=" + dest + " len=" + len);
    assert src < dest;

    // Note: weird: must go "backwards" because copyBytes
    // calls us with overlapping src/dest.  If we
    // go forwards then we overwrite bytes before we can
    // copy them:

    /*
    int blockIndex = src >> blockBits;
    int upto = src & blockMask;
    byte[] block = blocks.get(blockIndex);
    while (len > 0) {
      int chunk = blockSize - upto;
      System.out.println("  cycle: chunk=" + chunk + " len=" + len);
      if (len <= chunk) {
        writeBytes(dest, block, upto, len);
        break;
      } else {
        writeBytes(dest, block, upto, chunk);
        blockIndex++;
        block = blocks.get(blockIndex);
        upto = 0;
        len -= chunk;
        dest += chunk;
      }
    }
    */

    long end = src + len;

    int blockIndex = (int) (end >> blockBits);
    int downTo = (int) (end & blockMask);
    if (downTo == 0) {
      blockIndex--;
      downTo = blockSize;
    }
    byte[] block = blocks.get(blockIndex);

    while (len > 0) {
      //System.out.println("  cycle downTo=" + downTo);
      if (len <= downTo) {
        //System.out.println("    finish");
        writeBytes(dest, block, downTo-len, len);
        break;
      } else {
        //System.out.println("    partial");
        len -= downTo;
        writeBytes(dest + len, block, 0, downTo);
        blockIndex--;
        block = blocks.get(blockIndex);
        downTo = blockSize;
      }
    }
  }

  /** Writes an int at the absolute position without
   *  changing the current pointer. */
  public void writeInt(long pos, int value) {
    int blockIndex = (int) (pos >> blockBits);
    int upto = (int) (pos & blockMask);
    byte[] block = blocks.get(blockIndex);
    int shift = 24;
    for(int i=0;i<4;i++) {
      block[upto++] = (byte) (value >> shift);
      shift -= 8;
      if (upto == blockSize) {
        upto = 0;
        blockIndex++;
        block = blocks.get(blockIndex);
      }
    }
  }

  /** Reverse from srcPos, inclusive, to destPos, inclusive. */
  public void reverse(long srcPos, long destPos) {
    assert srcPos < destPos;
    assert destPos < getPosition();
    //System.out.println("reverse src=" + srcPos + " dest=" + destPos);

    int srcBlockIndex = (int) (srcPos >> blockBits);
    int src = (int) (srcPos & blockMask);
    byte[] srcBlock = blocks.get(srcBlockIndex);

    int destBlockIndex = (int) (destPos >> blockBits);
    int dest = (int) (destPos & blockMask);
    byte[] destBlock = blocks.get(destBlockIndex);
    //System.out.println("  srcBlock=" + srcBlockIndex + " destBlock=" + destBlockIndex);

    int limit = (int) (destPos - srcPos + 1)/2;
    for(int i=0;i= 0;
    int blockIndex = (int) (newLen >> blockBits);
    nextWrite = (int) (newLen & blockMask);
    if (nextWrite == 0) {
      blockIndex--;
      nextWrite = blockSize;
    }
    blocks.subList(blockIndex+1, blocks.size()).clear();
    if (newLen == 0) {
      current = null;
    } else {
      current = blocks.get(blockIndex);
    }
    assert newLen == getPosition();
  }

  public void finish() {
    if (current != null) {
      byte[] lastBuffer = new byte[nextWrite];
      System.arraycopy(current, 0, lastBuffer, 0, nextWrite);
      blocks.set(blocks.size()-1, lastBuffer);
      current = null;
    }
  }

  /** Writes all of our bytes to the target {@link DataOutput}. */
  public void writeTo(DataOutput out) throws IOException {
    for(byte[] block : blocks) {
      out.writeBytes(block, 0, block.length);
    }
  }

  public FST.BytesReader getForwardReader() {
    if (blocks.size() == 1) {
      return new ForwardBytesReader(blocks.get(0));
    }
    return new FST.BytesReader() {
      private byte[] current;
      private int nextBuffer;
      private int nextRead = blockSize;

      @Override
      public byte readByte() {
        if (nextRead == blockSize) {
          current = blocks.get(nextBuffer++);
          nextRead = 0;
        }
        return current[nextRead++];
      }

      @Override
      public void skipBytes(long count) {
        setPosition(getPosition() + count);
      }

      @Override
      public void readBytes(byte[] b, int offset, int len) {
        while(len > 0) {
          int chunkLeft = blockSize - nextRead;
          if (len <= chunkLeft) {
            System.arraycopy(current, nextRead, b, offset, len);
            nextRead += len;
            break;
          } else {
            if (chunkLeft > 0) {
              System.arraycopy(current, nextRead, b, offset, chunkLeft);
              offset += chunkLeft;
              len -= chunkLeft;
            }
            current = blocks.get(nextBuffer++);
            nextRead = 0;
          }
        }
      }

      @Override
      public long getPosition() {
        return ((long) nextBuffer-1)*blockSize + nextRead;
      }

      @Override
      public void setPosition(long pos) {
        int bufferIndex = (int) (pos >> blockBits);
        nextBuffer = bufferIndex+1;
        current = blocks.get(bufferIndex);
        nextRead = (int) (pos & blockMask);
        assert getPosition() == pos;
      }

      @Override
      public boolean reversed() {
        return false;
      }
    };
  }

  public FST.BytesReader getReverseReader() {
    return getReverseReader(true);
  }

  FST.BytesReader getReverseReader(boolean allowSingle) {
    if (allowSingle && blocks.size() == 1) {
      return new ReverseBytesReader(blocks.get(0));
    }
    return new FST.BytesReader() {
      private byte[] current = blocks.size() == 0 ? null : blocks.get(0);
      private int nextBuffer = -1;
      private int nextRead = 0;

      @Override
      public byte readByte() {
        if (nextRead == -1) {
          current = blocks.get(nextBuffer--);
          nextRead = blockSize-1;
        }
        return current[nextRead--];
      }

      @Override
      public void skipBytes(long count) {
        setPosition(getPosition() - count);
      }

      @Override
      public void readBytes(byte[] b, int offset, int len) {
        for(int i=0;i> blockBits);
        nextBuffer = bufferIndex-1;
        current = blocks.get(bufferIndex);
        nextRead = (int) (pos & blockMask);
        assert getPosition() == pos: "pos=" + pos + " getPos()=" + getPosition();
      }

      @Override
      public boolean reversed() {
        return true;
      }
    };
  }

  @Override
  public long ramBytesUsed() {
    long size = BASE_RAM_BYTES_USED;
    for (byte[] block : blocks) {
      size += RamUsageEstimator.sizeOf(block);
    }
    return size;
  }
  
  @Override
  public Collection getChildResources() {
    return Collections.emptyList();
  }

  @Override
  public String toString() {
    return getClass().getSimpleName() + "(numBlocks=" + blocks.size() + ")";
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy