All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.codecs.simpletext.SimpleTextPointsReader Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.codecs.simpletext;

import static org.apache.lucene.codecs.simpletext.SimpleTextPointsWriter.BLOCK_FP;
import static org.apache.lucene.codecs.simpletext.SimpleTextPointsWriter.BYTES_PER_DIM;
import static org.apache.lucene.codecs.simpletext.SimpleTextPointsWriter.DOC_COUNT;
import static org.apache.lucene.codecs.simpletext.SimpleTextPointsWriter.FIELD_COUNT;
import static org.apache.lucene.codecs.simpletext.SimpleTextPointsWriter.FIELD_FP;
import static org.apache.lucene.codecs.simpletext.SimpleTextPointsWriter.FIELD_FP_NAME;
import static org.apache.lucene.codecs.simpletext.SimpleTextPointsWriter.INDEX_COUNT;
import static org.apache.lucene.codecs.simpletext.SimpleTextPointsWriter.MAX_LEAF_POINTS;
import static org.apache.lucene.codecs.simpletext.SimpleTextPointsWriter.MAX_VALUE;
import static org.apache.lucene.codecs.simpletext.SimpleTextPointsWriter.MIN_VALUE;
import static org.apache.lucene.codecs.simpletext.SimpleTextPointsWriter.NUM_DATA_DIMS;
import static org.apache.lucene.codecs.simpletext.SimpleTextPointsWriter.NUM_INDEX_DIMS;
import static org.apache.lucene.codecs.simpletext.SimpleTextPointsWriter.POINT_COUNT;
import static org.apache.lucene.codecs.simpletext.SimpleTextPointsWriter.SPLIT_COUNT;
import static org.apache.lucene.codecs.simpletext.SimpleTextPointsWriter.SPLIT_DIM;
import static org.apache.lucene.codecs.simpletext.SimpleTextPointsWriter.SPLIT_VALUE;

import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.codecs.PointsReader;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.PointValues;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.store.BufferedChecksumIndexInput;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.StringHelper;

class SimpleTextPointsReader extends PointsReader {

  private final IndexInput dataIn;
  final SegmentReadState readState;
  final Map readers = new HashMap<>();
  final BytesRefBuilder scratch = new BytesRefBuilder();

  public SimpleTextPointsReader(SegmentReadState readState) throws IOException {
    // Initialize readers now:

    // Read index:
    Map fieldToFileOffset = new HashMap<>();

    String indexFileName =
        IndexFileNames.segmentFileName(
            readState.segmentInfo.name,
            readState.segmentSuffix,
            SimpleTextPointsFormat.POINT_INDEX_EXTENSION);
    try (ChecksumIndexInput in = readState.directory.openChecksumInput(indexFileName)) {
      readLine(in);
      int count = parseInt(FIELD_COUNT);
      for (int i = 0; i < count; i++) {
        readLine(in);
        String fieldName = stripPrefix(FIELD_FP_NAME);
        readLine(in);
        long fp = parseLong(FIELD_FP);
        fieldToFileOffset.put(fieldName, fp);
      }
      SimpleTextUtil.checkFooter(in);
    }

    boolean success = false;
    String fileName =
        IndexFileNames.segmentFileName(
            readState.segmentInfo.name,
            readState.segmentSuffix,
            SimpleTextPointsFormat.POINT_EXTENSION);
    dataIn = readState.directory.openInput(fileName, IOContext.DEFAULT);
    try {
      for (Map.Entry ent : fieldToFileOffset.entrySet()) {
        readers.put(ent.getKey(), initReader(ent.getValue()));
      }
      success = true;
    } finally {
      if (success == false) {
        IOUtils.closeWhileHandlingException(this);
      }
    }

    this.readState = readState;
  }

  private SimpleTextBKDReader initReader(long fp) throws IOException {
    // NOTE: matches what writeIndex does in SimpleTextPointsWriter
    dataIn.seek(fp);
    readLine(dataIn);
    int numDataDims = parseInt(NUM_DATA_DIMS);

    readLine(dataIn);
    int numIndexDims = parseInt(NUM_INDEX_DIMS);

    readLine(dataIn);
    int bytesPerDim = parseInt(BYTES_PER_DIM);

    readLine(dataIn);
    int maxPointsInLeafNode = parseInt(MAX_LEAF_POINTS);

    readLine(dataIn);
    int count = parseInt(INDEX_COUNT);

    readLine(dataIn);
    assert startsWith(MIN_VALUE);
    BytesRef minValue = SimpleTextUtil.fromBytesRefString(stripPrefix(MIN_VALUE));
    assert minValue.length == numIndexDims * bytesPerDim;

    readLine(dataIn);
    assert startsWith(MAX_VALUE);
    BytesRef maxValue = SimpleTextUtil.fromBytesRefString(stripPrefix(MAX_VALUE));
    assert maxValue.length == numIndexDims * bytesPerDim;

    readLine(dataIn);
    assert startsWith(POINT_COUNT);
    long pointCount = parseLong(POINT_COUNT);

    readLine(dataIn);
    assert startsWith(DOC_COUNT);
    int docCount = parseInt(DOC_COUNT);

    long[] leafBlockFPs = new long[count];
    for (int i = 0; i < count; i++) {
      readLine(dataIn);
      leafBlockFPs[i] = parseLong(BLOCK_FP);
    }
    readLine(dataIn);
    count = parseInt(SPLIT_COUNT);

    byte[] splitPackedValues;
    int bytesPerIndexEntry;
    if (numIndexDims == 1) {
      bytesPerIndexEntry = bytesPerDim;
    } else {
      bytesPerIndexEntry = 1 + bytesPerDim;
    }
    splitPackedValues = new byte[count * bytesPerIndexEntry];
    for (int i = 0; i < count; i++) {
      readLine(dataIn);
      int address = bytesPerIndexEntry * i;
      int splitDim = parseInt(SPLIT_DIM);
      if (numIndexDims != 1) {
        splitPackedValues[address++] = (byte) splitDim;
      }
      readLine(dataIn);
      assert startsWith(SPLIT_VALUE);
      BytesRef br = SimpleTextUtil.fromBytesRefString(stripPrefix(SPLIT_VALUE));
      assert br.length == bytesPerDim;
      System.arraycopy(br.bytes, br.offset, splitPackedValues, address, bytesPerDim);
    }

    return new SimpleTextBKDReader(
        dataIn,
        numDataDims,
        numIndexDims,
        maxPointsInLeafNode,
        bytesPerDim,
        leafBlockFPs,
        splitPackedValues,
        minValue.bytes,
        maxValue.bytes,
        pointCount,
        docCount);
  }

  private void readLine(IndexInput in) throws IOException {
    SimpleTextUtil.readLine(in, scratch);
  }

  private boolean startsWith(BytesRef prefix) {
    return StringHelper.startsWith(scratch.get(), prefix);
  }

  private int parseInt(BytesRef prefix) {
    assert startsWith(prefix);
    return Integer.parseInt(stripPrefix(prefix));
  }

  private long parseLong(BytesRef prefix) {
    assert startsWith(prefix);
    return Long.parseLong(stripPrefix(prefix));
  }

  private String stripPrefix(BytesRef prefix) {
    return new String(
        scratch.bytes(), prefix.length, scratch.length() - prefix.length, StandardCharsets.UTF_8);
  }

  @Override
  public PointValues getValues(String fieldName) throws IOException {
    FieldInfo fieldInfo = readState.fieldInfos.fieldInfo(fieldName);
    if (fieldInfo == null) {
      throw new IllegalArgumentException("field=\"" + fieldName + "\" is unrecognized");
    }
    if (fieldInfo.getPointDimensionCount() == 0) {
      throw new IllegalArgumentException("field=\"" + fieldName + "\" did not index points");
    }
    return readers.get(fieldName);
  }

  @Override
  public void checkIntegrity() throws IOException {
    BytesRefBuilder scratch = new BytesRefBuilder();
    IndexInput clone = dataIn.clone();
    clone.seek(0);

    // checksum is fixed-width encoded with 20 bytes, plus 1 byte for newline (the space is included
    // in SimpleTextUtil.CHECKSUM):
    long footerStartPos = clone.length() - (SimpleTextUtil.CHECKSUM.length + 21);
    ChecksumIndexInput input = new BufferedChecksumIndexInput(clone);
    while (true) {
      SimpleTextUtil.readLine(input, scratch);
      if (input.getFilePointer() >= footerStartPos) {
        // Make sure we landed at precisely the right location:
        if (input.getFilePointer() != footerStartPos) {
          throw new CorruptIndexException(
              "SimpleText failure: footer does not start at expected position current="
                  + input.getFilePointer()
                  + " vs expected="
                  + footerStartPos,
              input);
        }
        SimpleTextUtil.checkFooter(input);
        break;
      }
    }
  }

  @Override
  public void close() throws IOException {
    dataIn.close();
  }

  @Override
  public String toString() {
    return "SimpleTextPointsReader(segment="
        + readState.segmentInfo.name
        + " maxDoc="
        + readState.segmentInfo.maxDoc()
        + ")";
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy