com.snowflake.kafka.connector.internal.FileNameUtils Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of snowflake-kafka-connector Show documentation
Show all versions of snowflake-kafka-connector Show documentation
Snowflake Kafka Connect Sink Connector
The newest version!
package com.snowflake.kafka.connector.internal;
import com.google.common.base.Strings;
import java.math.BigInteger;
import java.nio.charset.StandardCharsets;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.kafka.common.utils.Crc32C;
public class FileNameUtils {
private static final KCLogger LOGGER = new KCLogger(FileNameUtils.class.getName());
/**
* generate file name
*
* @param prefix prefix
* @param start start offset
* @param end end offset
* @return file name
*/
static String fileName(String prefix, long start, long end) {
long time = System.currentTimeMillis();
String fileName = prefix + start + "_" + end + "_" + time + ".json.gz";
LOGGER.debug("generated file name: {}", fileName);
return fileName;
}
/**
* generate file name for broken data
*
* @param prefix prefix
* @param offset record offset
* @param isKey is the broken record a key or a value
* @return file name
*/
static String brokenRecordFileName(String prefix, long offset, boolean isKey) {
long time = System.currentTimeMillis();
String isKeyString = isKey ? "key" : "value";
String fileName = prefix + offset + "_" + isKeyString + "_" + time + ".gz";
LOGGER.debug("generated broken data file name: {}", fileName);
return fileName;
}
/**
* generate file prefix
*
* @param appName connector name
* @param table table name
* @param topic topic name
* @param partition partition index
* @return file prefix
*/
static String filePrefix(String appName, String table, String topic, int partition) {
if (partition >= 0x8000) {
throw new IllegalArgumentException(
String.format("partition id=%d is too large (max=%d)", partition, 0x8000));
}
return appName + "/" + table + "/" + calculatePartitionPart(topic, partition) + "/";
}
private static BigInteger calculatePartitionPart(String topic, int partition) {
BigInteger partitionPart = BigInteger.valueOf(partition);
if (!Strings.isNullOrEmpty(topic)) {
// if topic is provided as part of the file prefix,
// 1. lets calculate stable hash code out of it,
// 2. bit shift it by 16 bits left,
// 3. add 0x8000 (light up 15th bit as a marker)
// 4. add partition id (which should in production use cases never reach a value above 5.000
// partitions pers topic).
// In theory - we would support 32767 partitions, which is more than any reasonable value for
// a single topic
byte[] bytes = topic.toUpperCase().getBytes(StandardCharsets.UTF_8);
BigInteger hash = BigInteger.valueOf(Crc32C.compute(bytes, 0, bytes.length));
partitionPart =
hash.abs()
.multiply(BigInteger.valueOf(0x10000))
.add(BigInteger.valueOf(0x8000))
.add(partitionPart);
}
return partitionPart;
}
// applicationName/tableName/partitionNumber
// /startOffset_endOffset_time_format.json.gz
private static Pattern FILE_NAME_PATTERN =
Pattern.compile("^[^/]+/[^/]+/(\\d+)/(\\d+)_(\\d+)_(\\d+)\\.json\\.gz$");
/**
* verify file name
*
* @param fileName file name
* @return true if file name format is correct, false otherwise
*/
static boolean verifyFileName(String fileName) {
return FILE_NAME_PATTERN.matcher(fileName).find();
}
/**
* read start offset from file name
*
* @param fileName file name
* @return start offset
*/
static long fileNameToStartOffset(String fileName) {
return Long.parseLong(readFromFileName(fileName, 2));
}
/**
* read end offset from file name
*
* @param fileName file name
* @return end offset
*/
static long fileNameToEndOffset(String fileName) {
return Long.parseLong(readFromFileName(fileName, 3));
}
/**
* read ingested time from file name
*
* @param fileName file name
* @return ingested time
*/
static long fileNameToTimeIngested(String fileName) {
return Long.parseLong(readFromFileName(fileName, 4));
}
/**
* read partition index from file name
*
* @param fileName file name
* @return partition index
*/
static int fileNameToPartition(String fileName) {
BigInteger value = new BigInteger(readFromFileName(fileName, 1));
return value.and(BigInteger.valueOf(0x7FFF)).intValue();
}
/**
* remove prefix and .gz from file name. note: for JDBC put use only
*
* @param name file name
* @return file name without .gz
*/
static String removePrefixAndGZFromFileName(String name) {
if (name == null || name.isEmpty() || name.endsWith("/")) {
throw SnowflakeErrors.ERROR_0008.getException("input file name: " + name);
}
if (name.endsWith(".gz")) {
name = name.substring(0, name.length() - 3);
}
int prefixEndIndex = name.lastIndexOf('/');
if (prefixEndIndex > -1) {
return name.substring(prefixEndIndex + 1, name.length());
}
return name;
}
/**
* Get the prefix from the file name note: for JDBC put use only
*
* @param name file name
* @return prefix from the
*/
static String getPrefixFromFileName(String name) {
if (name == null || name.isEmpty() || name.endsWith("/")) {
throw SnowflakeErrors.ERROR_0008.getException("input file name: " + name);
}
int prefixEndIndex = name.lastIndexOf('/');
if (prefixEndIndex > -1) {
return name.substring(0, prefixEndIndex);
}
return null;
}
/**
* read a value from file name
*
* @param fileName file name
* @param index value index
* @return string value
*/
private static String readFromFileName(String fileName, int index) {
Matcher matcher = FILE_NAME_PATTERN.matcher(fileName);
if (!matcher.find()) {
throw SnowflakeErrors.ERROR_0008.getException("input file name: " + fileName);
}
return matcher.group(index);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy