All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hudi.secondary.index.SecondaryIndexManager Maven / Gradle / Ivy

There is a newer version: 1.0.0-beta2
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.hudi.secondary.index;

import org.apache.hudi.common.table.HoodieTableConfig;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.TableSchemaResolver;
import org.apache.hudi.common.util.CollectionUtils;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.exception.HoodieSecondaryIndexException;

import org.apache.avro.Schema;
import org.apache.hadoop.fs.Path;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.Collections;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.stream.Collectors;

import static org.apache.hudi.secondary.index.SecondaryIndexUtils.getSecondaryIndexes;

/**
 * Manages secondary index.
 */
public class SecondaryIndexManager {
  private static final Logger LOG = LoggerFactory.getLogger(SecondaryIndexManager.class);

  private static volatile SecondaryIndexManager _instance;

  private SecondaryIndexManager() {
  }

  public static SecondaryIndexManager getInstance() {
    if (_instance == null) {
      synchronized (SecondaryIndexManager.class) {
        if (_instance == null) {
          _instance = new SecondaryIndexManager();
        }
      }
    }

    return _instance;
  }

  /**
   * Create a secondary index for hoodie table, two steps will be performed:
   * 1. Add secondary index metadata to hoodie.properties
   * 2. Trigger build secondary index
   *
   * @param metaClient     Hoodie table meta client
   * @param indexName      The unique secondary index name
   * @param indexType      Index type
   * @param ignoreIfExists Whether ignore the creation if the specific secondary index exists
   * @param columns        The columns referenced by this secondary index, each column
   *                       has its own options
   * @param options        Options for this secondary index
   */
  public void create(
      HoodieTableMetaClient metaClient,
      String indexName,
      String indexType,
      boolean ignoreIfExists,
      LinkedHashMap> columns,
      Map options) {
    Option> secondaryIndexes = getSecondaryIndexes(metaClient);
    Set colNames = columns.keySet();
    Schema avroSchema;
    try {
      avroSchema = new TableSchemaResolver(metaClient).getTableAvroSchema(false);
    } catch (Exception e) {
      throw new HoodieSecondaryIndexException(
          "Failed to get table avro schema: " + metaClient.getTableConfig().getTableName());
    }

    for (String col : colNames) {
      if (avroSchema.getField(col) == null) {
        throw new HoodieSecondaryIndexException("Field not exists: " + col);
      }
    }

    if (indexExists(secondaryIndexes, indexName, Option.of(indexType), Option.of(colNames))) {
      if (ignoreIfExists) {
        return;
      } else {
        throw new HoodieSecondaryIndexException("Secondary index already exists: " + indexName);
      }
    }

    HoodieSecondaryIndex secondaryIndexToAdd = HoodieSecondaryIndex.builder()
        .setIndexName(indexName)
        .setIndexType(indexType)
        .setColumns(columns)
        .setOptions(options)
        .build();

    List newSecondaryIndexes = secondaryIndexes.map(h -> {
      h.add(secondaryIndexToAdd);
      return h;
    }).orElse(Collections.singletonList(secondaryIndexToAdd));
    newSecondaryIndexes.sort(new HoodieSecondaryIndex.HoodieIndexCompactor());

    // Persistence secondary indexes' metadata to hoodie.properties file
    Properties updatedProps = new Properties();
    updatedProps.put(HoodieTableConfig.SECONDARY_INDEXES_METADATA.key(),
        SecondaryIndexUtils.toJsonString(newSecondaryIndexes));
    HoodieTableConfig.update(metaClient.getFs(), new Path(metaClient.getMetaPath()), updatedProps);

    LOG.info("Success to add secondary index metadata: {}", secondaryIndexToAdd);

    // TODO: build index
  }

  /**
   * Drop a secondary index by index name
   *
   * @param metaClient        Hoodie table meta client
   * @param indexName         The unique secondary index name
   * @param ignoreIfNotExists Whether ignore drop if the specific secondary index no exists
   */
  public void drop(HoodieTableMetaClient metaClient, String indexName, boolean ignoreIfNotExists) {
    Option> secondaryIndexes = getSecondaryIndexes(metaClient);
    if (!indexExists(secondaryIndexes, indexName, Option.empty(), Option.empty())) {
      if (ignoreIfNotExists) {
        return;
      } else {
        throw new HoodieSecondaryIndexException("Secondary index not exists: " + indexName);
      }
    }

    List secondaryIndexesToKeep = secondaryIndexes.get().stream()
        .filter(i -> !i.getIndexName().equals(indexName))
        .sorted(new HoodieSecondaryIndex.HoodieIndexCompactor())
        .collect(Collectors.toList());
    if (CollectionUtils.nonEmpty(secondaryIndexesToKeep)) {
      Properties updatedProps = new Properties();
      updatedProps.put(HoodieTableConfig.SECONDARY_INDEXES_METADATA.key(),
          SecondaryIndexUtils.toJsonString(secondaryIndexesToKeep));
      HoodieTableConfig.update(metaClient.getFs(), new Path(metaClient.getMetaPath()), updatedProps);
    } else {
      HoodieTableConfig.delete(metaClient.getFs(), new Path(metaClient.getMetaPath()),
          CollectionUtils.createSet(HoodieTableConfig.SECONDARY_INDEXES_METADATA.key()));
    }

    LOG.info("Success to delete secondary index metadata: {}", indexName);

    // TODO: drop index data
  }

  /**
   * Show secondary indexes from hoodie table
   *
   * @param metaClient Hoodie table meta client
   * @return Indexes in this table
   */
  public Option> show(HoodieTableMetaClient metaClient) {
    return getSecondaryIndexes(metaClient);
  }

  /**
   * Refresh the specific secondary index
   *
   * @param metaClient Hoodie table meta client
   * @param indexName  The target secondary index name
   */
  public void refresh(HoodieTableMetaClient metaClient, String indexName) {
    // TODO
  }

  /**
   * Check if the specific secondary index exists. When drop a secondary index,
   * only check index name, but for adding a secondary index, we should also
   * check the index type and columns when index name is different.
   *
   * @param secondaryIndexes Current secondary indexes in this table
   * @param indexName        The index name of target secondary index
   * @param indexType        The index type of target secondary index
   * @param colNames         The column names of target secondary index
   * @return true if secondary index exists
   */
  private boolean indexExists(
      Option> secondaryIndexes,
      String indexName,
      Option indexType,
      Option> colNames) {
    return secondaryIndexes.map(indexes ->
        indexes.stream().anyMatch(index -> {
          if (index.getIndexName().equals(indexName)) {
            return true;
          } else if (indexType.isPresent() && colNames.isPresent()) {
            // When secondary index names are different, we should check index type
            // and index columns to avoid repeatedly creating the same index.
            // For example:
            //   create index idx_name on test using lucene (name);
            //   create index idx_name_1 on test using lucene (name);
            return index.getIndexType().name().equalsIgnoreCase(indexType.get())
                && CollectionUtils.diff(index.getColumns().keySet(), colNames.get()).isEmpty();
          }

          return false;
        })).orElse(false);
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy