All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.snowflake.kafka.connector.internal.FileNameUtils Maven / Gradle / Ivy

package com.snowflake.kafka.connector.internal;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class FileNameUtils {
  private static final KCLogger LOGGER = new KCLogger(FileNameUtils.class.getName());

  /**
   * generate file name File Name Format: app/table/partition/start_end_timeStamp.fileFormat.gz
   * Note: all file names should using the this format
   *
   * @param appName connector name
   * @param table table name
   * @param partition partition number
   * @param start start offset
   * @param end end offset
   * @return file name
   */
  public static String fileName(String appName, String table, int partition, long start, long end) {
    return fileName(filePrefix(appName, table, partition), start, end);
  }

  // Used for testing only
  static String fileName(
      String appName, String table, int partition, long start, long end, long time) {
    return filePrefix(appName, table, partition) + start + "_" + end + "_" + time + ".json.gz";
  }

  /**
   * generate file name
   *
   * @param prefix prefix
   * @param start start offset
   * @param end end offset
   * @return file name
   */
  static String fileName(String prefix, long start, long end) {
    long time = System.currentTimeMillis();
    String fileName = prefix + start + "_" + end + "_" + time + ".json.gz";
    LOGGER.debug("generated file name: {}", fileName);
    return fileName;
  }

  /**
   * generate file name for broken data
   *
   * @param appName app name
   * @param table table name
   * @param partition partition id
   * @param offset record offset
   * @param isKey is the broken record a key or a value
   * @return file name
   */
  static String brokenRecordFileName(
      String appName, String table, int partition, long offset, boolean isKey) {
    return brokenRecordFileName(filePrefix(appName, table, partition), offset, isKey);
  }

  /**
   * generate file name for broken data
   *
   * @param prefix prefix
   * @param offset record offset
   * @param isKey is the broken record a key or a value
   * @return file name
   */
  static String brokenRecordFileName(String prefix, long offset, boolean isKey) {
    long time = System.currentTimeMillis();
    String isKeyString = isKey ? "key" : "value";
    String fileName = prefix + offset + "_" + isKeyString + "_" + time + ".gz";
    LOGGER.debug("generated broken data file name: {}", fileName);
    return fileName;
  }

  /**
   * generate file prefix
   *
   * @param appName connector name
   * @param table table name
   * @param partition partition index
   * @return file prefix
   */
  static String filePrefix(String appName, String table, int partition) {
    return appName + "/" + table + "/" + partition + "/";
  }

  // applicationName/tableName/partitionNumber
  // /startOffset_endOffset_time_format.json.gz
  private static Pattern FILE_NAME_PATTERN =
      Pattern.compile("^[^/]+/[^/]+/(\\d+)/(\\d+)_(\\d+)_(\\d+)\\.json\\.gz$");
  /**
   * verify file name
   *
   * @param fileName file name
   * @return true if file name format is correct, false otherwise
   */
  static boolean verifyFileName(String fileName) {
    return FILE_NAME_PATTERN.matcher(fileName).find();
  }

  /**
   * read start offset from file name
   *
   * @param fileName file name
   * @return start offset
   */
  static long fileNameToStartOffset(String fileName) {
    return Long.parseLong(readFromFileName(fileName, 2));
  }

  /**
   * read end offset from file name
   *
   * @param fileName file name
   * @return end offset
   */
  static long fileNameToEndOffset(String fileName) {
    return Long.parseLong(readFromFileName(fileName, 3));
  }

  /**
   * read ingested time from file name
   *
   * @param fileName file name
   * @return ingested time
   */
  static long fileNameToTimeIngested(String fileName) {
    return Long.parseLong(readFromFileName(fileName, 4));
  }

  /**
   * read partition index from file name
   *
   * @param fileName file name
   * @return partition index
   */
  static int fileNameToPartition(String fileName) {
    return Integer.parseInt(readFromFileName(fileName, 1));
  }

  /**
   * check whether the given file is expired
   *
   * @param fileName file name
   * @return true if expired, otherwise false
   */
  static boolean isFileExpired(String fileName) {
    return System.currentTimeMillis() - fileNameToTimeIngested(fileName)
        > InternalUtils.MAX_RECOVERY_TIME;
  }

  /**
   * remove prefix and .gz from file name. note: for JDBC put use only
   *
   * @param name file name
   * @return file name without .gz
   */
  static String removePrefixAndGZFromFileName(String name) {
    if (name == null || name.isEmpty() || name.endsWith("/")) {
      throw SnowflakeErrors.ERROR_0008.getException("input file name: " + name);
    }

    if (name.endsWith(".gz")) {
      name = name.substring(0, name.length() - 3);
    }

    int prefixEndIndex = name.lastIndexOf('/');
    if (prefixEndIndex > -1) {
      return name.substring(prefixEndIndex + 1, name.length());
    }

    return name;
  }

  /**
   * Get the prefix from the file name note: for JDBC put use only
   *
   * @param name file name
   * @return prefix from the
   */
  static String getPrefixFromFileName(String name) {
    if (name == null || name.isEmpty() || name.endsWith("/")) {
      throw SnowflakeErrors.ERROR_0008.getException("input file name: " + name);
    }

    int prefixEndIndex = name.lastIndexOf('/');
    if (prefixEndIndex > -1) {
      return name.substring(0, prefixEndIndex);
    }
    return null;
  }

  /**
   * read a value from file name
   *
   * @param fileName file name
   * @param index value index
   * @return string value
   */
  private static String readFromFileName(String fileName, int index) {
    Matcher matcher = FILE_NAME_PATTERN.matcher(fileName);

    if (!matcher.find()) {
      throw SnowflakeErrors.ERROR_0008.getException("input file name: " + fileName);
    }

    return matcher.group(index);
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy