All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hudi.common.table.cdc.HoodieCDCUtils Maven / Gradle / Ivy

There is a newer version: 1.0.0-beta2
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hudi.common.table.cdc;

import org.apache.hudi.avro.AvroSchemaUtils;
import org.apache.hudi.exception.HoodieException;

import org.apache.avro.JsonProperties;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericRecord;

import java.util.Arrays;
import java.util.List;

/**
 * Utilities for change log capture.
 */
public class HoodieCDCUtils {

  public static final String CDC_LOGFILE_SUFFIX = ".cdc";

  /* the `op` column represents how a record is changed. */
  public static final String CDC_OPERATION_TYPE = "op";

  /* the `ts_ms` column represents when a record is changed. */
  public static final String CDC_COMMIT_TIMESTAMP = "ts_ms";

  /* the pre-image before one record is changed */
  public static final String CDC_BEFORE_IMAGE = "before";

  /* the post-image after one record is changed */
  public static final String CDC_AFTER_IMAGE = "after";

  /* the key of the changed record */
  public static final String CDC_RECORD_KEY = "record_key";

  public static final String[] CDC_COLUMNS = new String[] {
      CDC_OPERATION_TYPE,
      CDC_COMMIT_TIMESTAMP,
      CDC_BEFORE_IMAGE,
      CDC_AFTER_IMAGE
  };

  /**
   * The schema of cdc log file in the case `hoodie.table.cdc.supplemental.logging.mode` is {@link HoodieCDCSupplementalLoggingMode#OP_KEY_ONLY}.
   */
  public static final String CDC_SCHEMA_OP_AND_RECORDKEY_STRING = "{\"type\":\"record\",\"name\":\"Record\","
      + "\"fields\":["
      + "{\"name\":\"op\",\"type\":[\"string\",\"null\"]},"
      + "{\"name\":\"record_key\",\"type\":[\"string\",\"null\"]}"
      + "]}";

  public static final Schema CDC_SCHEMA_OP_AND_RECORDKEY =
      new Schema.Parser().parse(CDC_SCHEMA_OP_AND_RECORDKEY_STRING);

  public static Schema schemaBySupplementalLoggingMode(
      HoodieCDCSupplementalLoggingMode supplementalLoggingMode,
      Schema tableSchema) {
    if (supplementalLoggingMode == HoodieCDCSupplementalLoggingMode.OP_KEY_ONLY) {
      return CDC_SCHEMA_OP_AND_RECORDKEY;
    } else if (supplementalLoggingMode == HoodieCDCSupplementalLoggingMode.DATA_BEFORE) {
      return createCDCSchema(tableSchema, false);
    } else if (supplementalLoggingMode == HoodieCDCSupplementalLoggingMode.DATA_BEFORE_AFTER) {
      return createCDCSchema(tableSchema, true);
    } else {
      throw new HoodieException("not support this supplemental logging mode: " + supplementalLoggingMode);
    }
  }

  private static Schema createCDCSchema(Schema tableSchema, boolean withAfterImage) {
    Schema imageSchema = AvroSchemaUtils.createNullableSchema(tableSchema);
    Schema.Field opField = new Schema.Field(CDC_OPERATION_TYPE,
        AvroSchemaUtils.createNullableSchema(Schema.Type.STRING), "", JsonProperties.NULL_VALUE);
    Schema.Field beforeField = new Schema.Field(
        CDC_BEFORE_IMAGE, imageSchema, "", JsonProperties.NULL_VALUE);
    List fields;
    if (withAfterImage) {
      Schema.Field tsField = new Schema.Field(CDC_COMMIT_TIMESTAMP,
          AvroSchemaUtils.createNullableSchema(Schema.Type.STRING), "", JsonProperties.NULL_VALUE);
      Schema.Field afterField = new Schema.Field(
          CDC_AFTER_IMAGE, imageSchema, "", JsonProperties.NULL_VALUE);
      fields = Arrays.asList(opField, tsField, beforeField, afterField);
    } else {
      Schema.Field keyField = new Schema.Field(CDC_RECORD_KEY,
          AvroSchemaUtils.createNullableSchema(Schema.Type.STRING), "", JsonProperties.NULL_VALUE);
      fields = Arrays.asList(opField, keyField, beforeField);
    }

    Schema mergedSchema = Schema.createRecord("CDC", null, tableSchema.getNamespace(), false);
    mergedSchema.setFields(fields);
    return mergedSchema;
  }

  /**
   * Build the cdc record which has all the cdc fields when `hoodie.table.cdc.supplemental.logging.mode` is {@link HoodieCDCSupplementalLoggingMode#DATA_BEFORE_AFTER}.
   */
  public static GenericData.Record cdcRecord(Schema cdcSchema, String op, String commitTime,
                                             GenericRecord before, GenericRecord after) {
    GenericData.Record record = new GenericData.Record(cdcSchema);
    record.put(CDC_OPERATION_TYPE, op);
    record.put(CDC_COMMIT_TIMESTAMP, commitTime);
    record.put(CDC_BEFORE_IMAGE, before);
    record.put(CDC_AFTER_IMAGE, after);
    return record;
  }

  /**
   * Build the cdc record when `hoodie.table.cdc.supplemental.logging.mode` is {@link HoodieCDCSupplementalLoggingMode#DATA_BEFORE}.
   */
  public static GenericData.Record cdcRecord(Schema cdcSchema, String op,
                                             String recordKey, GenericRecord before) {
    GenericData.Record record = new GenericData.Record(cdcSchema);
    record.put(CDC_OPERATION_TYPE, op);
    record.put(CDC_RECORD_KEY, recordKey);
    record.put(CDC_BEFORE_IMAGE, before);
    return record;
  }

  /**
   * Build the cdc record when `hoodie.table.cdc.supplemental.logging.mode` is {@link HoodieCDCSupplementalLoggingMode#OP_KEY_ONLY}.
   */
  public static GenericData.Record cdcRecord(Schema cdcSchema, String op, String recordKey) {
    GenericData.Record record = new GenericData.Record(cdcSchema);
    record.put(CDC_OPERATION_TYPE, op);
    record.put(CDC_RECORD_KEY, recordKey);
    return record;
  }

  public static String recordToJson(GenericRecord record) {
    return GenericData.get().toString(record);
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy