All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.codecs.blocktree.Stats Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.codecs.blocktree;


import java.io.ByteArrayOutputStream;
import java.io.PrintStream;
import java.io.UnsupportedEncodingException;
import java.util.Locale;

import org.apache.lucene.codecs.PostingsReaderBase;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;

/**
 * BlockTree statistics for a single field 
 * returned by {@link FieldReader#getStats()}.
 * @lucene.internal
 */
public class Stats {
  /** Byte size of the index. */
  public long indexNumBytes;

  /** Total number of terms in the field. */
  public long totalTermCount;

  /** Total number of bytes (sum of term lengths) across all terms in the field. */
  public long totalTermBytes;

  // TODO: add total auto-prefix term count

  /** The number of normal (non-floor) blocks in the terms file. */
  public int nonFloorBlockCount;

  /** The number of floor blocks (meta-blocks larger than the
   *  allowed {@code maxItemsPerBlock}) in the terms file. */
  public int floorBlockCount;
    
  /** The number of sub-blocks within the floor blocks. */
  public int floorSubBlockCount;

  /** The number of "internal" blocks (that have both
   *  terms and sub-blocks). */
  public int mixedBlockCount;

  /** The number of "leaf" blocks (blocks that have only
   *  terms). */
  public int termsOnlyBlockCount;

  /** The number of "internal" blocks that do not contain
   *  terms (have only sub-blocks). */
  public int subBlocksOnlyBlockCount;

  /** Total number of blocks. */
  public int totalBlockCount;

  /** Number of blocks at each prefix depth. */
  public int[] blockCountByPrefixLen = new int[10];
  private int startBlockCount;
  private int endBlockCount;

  /** Total number of bytes used to store term suffixes. */
  public long totalBlockSuffixBytes;

  /** Total number of bytes used to store term stats (not
   *  including what the {@link PostingsReaderBase}
   *  stores. */
  public long totalBlockStatsBytes;

  /** Total bytes stored by the {@link PostingsReaderBase},
   *  plus the other few vInts stored in the frame. */
  public long totalBlockOtherBytes;

  /** Segment name. */
  public final String segment;

  /** Field name. */
  public final String field;

  Stats(String segment, String field) {
    this.segment = segment;
    this.field = field;
  }

  void startBlock(SegmentTermsEnumFrame frame, boolean isFloor) {
    totalBlockCount++;
    if (isFloor) {
      if (frame.fp == frame.fpOrig) {
        floorBlockCount++;
      }
      floorSubBlockCount++;
    } else {
      nonFloorBlockCount++;
    }

    if (blockCountByPrefixLen.length <= frame.prefix) {
      blockCountByPrefixLen = ArrayUtil.grow(blockCountByPrefixLen, 1+frame.prefix);
    }
    blockCountByPrefixLen[frame.prefix]++;
    startBlockCount++;
    totalBlockSuffixBytes += frame.suffixesReader.length();
    totalBlockStatsBytes += frame.statsReader.length();
  }

  void endBlock(SegmentTermsEnumFrame frame) {
    final int termCount = frame.isLeafBlock ? frame.entCount : frame.state.termBlockOrd;
    final int subBlockCount = frame.entCount - termCount;
    totalTermCount += termCount;
    if (termCount != 0 && subBlockCount != 0) {
      mixedBlockCount++;
    } else if (termCount != 0) {
      termsOnlyBlockCount++;
    } else if (subBlockCount != 0) {
      subBlocksOnlyBlockCount++;
    } else {
      throw new IllegalStateException();
    }
    endBlockCount++;
    final long otherBytes = frame.fpEnd - frame.fp - frame.suffixesReader.length() - frame.statsReader.length();
    assert otherBytes > 0 : "otherBytes=" + otherBytes + " frame.fp=" + frame.fp + " frame.fpEnd=" + frame.fpEnd;
    totalBlockOtherBytes += otherBytes;
  }

  void term(BytesRef term) {
    totalTermBytes += term.length;
  }

  void finish() {
    assert startBlockCount == endBlockCount: "startBlockCount=" + startBlockCount + " endBlockCount=" + endBlockCount;
    assert totalBlockCount == floorSubBlockCount + nonFloorBlockCount: "floorSubBlockCount=" + floorSubBlockCount + " nonFloorBlockCount=" + nonFloorBlockCount + " totalBlockCount=" + totalBlockCount;
    assert totalBlockCount == mixedBlockCount + termsOnlyBlockCount + subBlocksOnlyBlockCount: "totalBlockCount=" + totalBlockCount + " mixedBlockCount=" + mixedBlockCount + " subBlocksOnlyBlockCount=" + subBlocksOnlyBlockCount + " termsOnlyBlockCount=" + termsOnlyBlockCount;
  }

  @Override
  public String toString() {
    final ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
    PrintStream out;
    try {
      out = new PrintStream(bos, false, IOUtils.UTF_8);
    } catch (UnsupportedEncodingException bogus) {
      throw new RuntimeException(bogus);
    }
      
    out.println("  index FST:");
    out.println("    " + indexNumBytes + " bytes");
    out.println("  terms:");
    out.println("    " + totalTermCount + " terms");
    out.println("    " + totalTermBytes + " bytes" + (totalTermCount != 0 ? " (" + String.format(Locale.ROOT, "%.1f", ((double) totalTermBytes)/totalTermCount) + " bytes/term)" : ""));
    out.println("  blocks:");
    out.println("    " + totalBlockCount + " blocks");
    out.println("    " + termsOnlyBlockCount + " terms-only blocks");
    out.println("    " + subBlocksOnlyBlockCount + " sub-block-only blocks");
    out.println("    " + mixedBlockCount + " mixed blocks");
    out.println("    " + floorBlockCount + " floor blocks");
    out.println("    " + (totalBlockCount-floorSubBlockCount) + " non-floor blocks");
    out.println("    " + floorSubBlockCount + " floor sub-blocks");
    out.println("    " + totalBlockSuffixBytes + " term suffix bytes" + (totalBlockCount != 0 ? " (" + String.format(Locale.ROOT, "%.1f", ((double) totalBlockSuffixBytes)/totalBlockCount) + " suffix-bytes/block)" : ""));
    out.println("    " + totalBlockStatsBytes + " term stats bytes" + (totalBlockCount != 0 ? " (" + String.format(Locale.ROOT, "%.1f", ((double) totalBlockStatsBytes)/totalBlockCount) + " stats-bytes/block)" : ""));
    out.println("    " + totalBlockOtherBytes + " other bytes" + (totalBlockCount != 0 ? " (" + String.format(Locale.ROOT, "%.1f", ((double) totalBlockOtherBytes)/totalBlockCount) + " other-bytes/block)" : ""));
    if (totalBlockCount != 0) {
      out.println("    by prefix length:");
      int total = 0;
      for(int prefix=0;prefix




© 2015 - 2025 Weber Informatics LLC | Privacy Policy