All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.pinot.segment.local.startree.StarTreeBuilderUtils Maven / Gradle / Ivy

There is a newer version: 1.2.0
Show newest version
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.pinot.segment.local.startree;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.nio.ByteOrder;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Queue;
import javax.annotation.Nullable;
import org.apache.commons.configuration.PropertiesConfiguration;
import org.apache.commons.io.FileUtils;
import org.apache.pinot.segment.local.startree.v2.builder.StarTreeV2BuilderConfig;
import org.apache.pinot.segment.spi.SegmentMetadata;
import org.apache.pinot.segment.spi.V1Constants;
import org.apache.pinot.segment.spi.index.startree.StarTreeV2Constants;
import org.apache.pinot.segment.spi.index.startree.StarTreeV2Metadata;
import org.apache.pinot.segment.spi.memory.PinotDataBuffer;
import org.apache.pinot.segment.spi.store.SegmentDirectoryPaths;
import org.apache.pinot.spi.config.table.StarTreeIndexConfig;
import org.apache.pinot.spi.env.CommonsConfigurationUtils;

import static java.nio.charset.StandardCharsets.UTF_8;


/**
 * The {@code StarTreeBuilderUtils} class contains utility methods for star-tree builders.
 */
public class StarTreeBuilderUtils {
  private StarTreeBuilderUtils() {
  }

  public static final int INVALID_ID = -1;

  public static class TreeNode {
    public int _dimensionId = INVALID_ID;
    public int _dimensionValue = INVALID_ID;
    public int _startDocId = INVALID_ID;
    public int _endDocId = INVALID_ID;
    public int _aggregatedDocId = INVALID_ID;
    public int _childDimensionId = INVALID_ID;
    public Map _children;
  }

  /**
   * Generates the deduplicated star-tree builder configs.
   */
  public static List generateBuilderConfigs(@Nullable List indexConfigs,
      boolean enableDefaultStarTree, SegmentMetadata segmentMetadata) {
    List builderConfigs = new ArrayList<>();
    if (indexConfigs != null) {
      for (StarTreeIndexConfig indexConfig : indexConfigs) {
        StarTreeV2BuilderConfig builderConfig = StarTreeV2BuilderConfig.fromIndexConfig(indexConfig);
        if (!builderConfigs.contains(builderConfig)) {
          builderConfigs.add(builderConfig);
        }
      }
    }
    if (enableDefaultStarTree) {
      StarTreeV2BuilderConfig defaultConfig = StarTreeV2BuilderConfig.generateDefaultConfig(segmentMetadata);
      if (!builderConfigs.contains(defaultConfig)) {
        builderConfigs.add(defaultConfig);
      }
    }
    return builderConfigs;
  }

  /**
   * Serialize the star-tree structure into a file.
   */
  public static void serializeTree(File starTreeFile, TreeNode rootNode, String[] dimensions, int numNodes)
      throws IOException {
    int headerSizeInBytes = computeHeaderByteSize(dimensions);
    long totalSizeInBytes = headerSizeInBytes + (long) numNodes * OffHeapStarTreeNode.SERIALIZABLE_SIZE_IN_BYTES;

    // Backward-compatible: star-tree file is always little-endian
    try (PinotDataBuffer buffer = PinotDataBuffer
        .mapFile(starTreeFile, false, 0, totalSizeInBytes, ByteOrder.LITTLE_ENDIAN,
            "StarTreeBuilderUtils#serializeTree: star-tree buffer")) {
      long offset = writeHeader(buffer, headerSizeInBytes, dimensions, numNodes);
      writeNodes(buffer, offset, rootNode);
    }
  }

  /**
   * Helper method to compute size of the star-tree header in bytes.
   * 

The header contains the following fields: *

    *
  • Magic marker (long)
  • *
  • Size of the header (int)
  • *
  • Version (int)
  • *
  • Number of dimensions (int)
  • *
  • For each dimension, index of the dimension (int), number of bytes in the dimension string (int), and the byte * array for the string
  • *
  • Number of nodes in the tree (int)
  • *
*/ private static int computeHeaderByteSize(String[] dimensions) { // Magic marker (8), version (4), size of header (4) and number of dimensions (4) int headerSizeInBytes = 20; for (String dimension : dimensions) { headerSizeInBytes += Integer.BYTES; // For dimension index headerSizeInBytes += Integer.BYTES; // For length of dimension name headerSizeInBytes += dimension.getBytes(UTF_8).length; // For dimension name } headerSizeInBytes += Integer.BYTES; // For number of nodes. return headerSizeInBytes; } /** * Helper method to write the header into the data buffer. */ private static int writeHeader(PinotDataBuffer dataBuffer, int headerSizeInBytes, String[] dimensions, int numNodes) { int offset = 0; dataBuffer.putLong(offset, OffHeapStarTree.MAGIC_MARKER); offset += Long.BYTES; dataBuffer.putInt(offset, OffHeapStarTree.VERSION); offset += Integer.BYTES; dataBuffer.putInt(offset, headerSizeInBytes); offset += Integer.BYTES; int numDimensions = dimensions.length; dataBuffer.putInt(offset, numDimensions); offset += Integer.BYTES; for (int i = 0; i < numDimensions; i++) { dataBuffer.putInt(offset, i); offset += Integer.BYTES; String dimension = dimensions[i]; byte[] dimensionBytes = dimension.getBytes(UTF_8); int dimensionLength = dimensionBytes.length; dataBuffer.putInt(offset, dimensionLength); offset += Integer.BYTES; dataBuffer.readFrom(offset, dimensionBytes, 0, dimensionLength); offset += dimensionLength; } dataBuffer.putInt(offset, numNodes); offset += Integer.BYTES; return offset; } /** * Helper method to traverse star-tree using BFS and write nodes into the data buffer. */ private static void writeNodes(PinotDataBuffer dataBuffer, long offset, TreeNode rootNode) { Queue queue = new LinkedList<>(); queue.add(rootNode); int currentNodeId = 0; while (!queue.isEmpty()) { TreeNode node = queue.remove(); if (node._children == null) { offset = writeNode(dataBuffer, offset, node, INVALID_ID, INVALID_ID); } else { // Sort all children nodes based on dimension value List sortedChildren = new ArrayList<>(node._children.values()); sortedChildren.sort((o1, o2) -> Integer.compare(o1._dimensionValue, o2._dimensionValue)); int firstChildId = currentNodeId + queue.size() + 1; int lastChildId = firstChildId + sortedChildren.size() - 1; offset = writeNode(dataBuffer, offset, node, firstChildId, lastChildId); queue.addAll(sortedChildren); } currentNodeId++; } } /** * Helper method to write one node into the data buffer. */ private static long writeNode(PinotDataBuffer dataBuffer, long offset, TreeNode node, int firstChildId, int lastChildId) { dataBuffer.putInt(offset, node._dimensionId); offset += Integer.BYTES; dataBuffer.putInt(offset, node._dimensionValue); offset += Integer.BYTES; dataBuffer.putInt(offset, node._startDocId); offset += Integer.BYTES; dataBuffer.putInt(offset, node._endDocId); offset += Integer.BYTES; dataBuffer.putInt(offset, node._aggregatedDocId); offset += Integer.BYTES; dataBuffer.putInt(offset, firstChildId); offset += Integer.BYTES; dataBuffer.putInt(offset, lastChildId); offset += Integer.BYTES; return offset; } /** * Returns {@code true} if the given star-tree builder configs do not match the star-tree metadata, in which case the * existing star-trees need to be removed, {@code false} otherwise. */ public static boolean shouldRemoveExistingStarTrees(List builderConfigs, List metadataList) { int numStarTrees = builderConfigs.size(); if (metadataList.size() != numStarTrees) { return true; } for (int i = 0; i < numStarTrees; i++) { StarTreeV2BuilderConfig builderConfig = builderConfigs.get(i); StarTreeV2Metadata metadata = metadataList.get(i); if (!builderConfig.getDimensionsSplitOrder().equals(metadata.getDimensionsSplitOrder())) { return true; } if (!builderConfig.getSkipStarNodeCreationForDimensions() .equals(metadata.getSkipStarNodeCreationForDimensions())) { return true; } if (!builderConfig.getFunctionColumnPairs().equals(metadata.getFunctionColumnPairs())) { return true; } if (builderConfig.getMaxLeafRecords() != metadata.getMaxLeafRecords()) { return true; } } return false; } /** * Removes all the star-trees from the given segment. */ public static void removeStarTrees(File indexDir) throws Exception { File segmentDirectory = SegmentDirectoryPaths.findSegmentDirectory(indexDir); // Remove the star-tree metadata PropertiesConfiguration metadataProperties = CommonsConfigurationUtils.fromFile(new File(segmentDirectory, V1Constants.MetadataKeys.METADATA_FILE_NAME)); metadataProperties.subset(StarTreeV2Constants.MetadataKey.STAR_TREE_SUBSET).clear(); // Commons Configuration 1.10 does not support file path containing '%'. // Explicitly providing the output stream for the file bypasses the problem. try (FileOutputStream fileOutputStream = new FileOutputStream(metadataProperties.getFile())) { metadataProperties.save(fileOutputStream); } // Remove the index file and index map file FileUtils.forceDelete(new File(segmentDirectory, StarTreeV2Constants.INDEX_FILE_NAME)); FileUtils.forceDelete(new File(segmentDirectory, StarTreeV2Constants.INDEX_MAP_FILE_NAME)); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy