com.microsoft.azure.kusto.ingest.IngestionProperties Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of kusto-ingest Show documentation
Show all versions of kusto-ingest Show documentation
Kusto client library for ingesting data
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
package com.microsoft.azure.kusto.ingest;
import com.fasterxml.jackson.annotation.JsonAutoDetect.Visibility;
import com.fasterxml.jackson.annotation.PropertyAccessor;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.microsoft.azure.kusto.data.Ensure;
import com.microsoft.azure.kusto.data.Utils;
import com.microsoft.azure.kusto.data.instrumentation.TraceableAttributes;
import com.microsoft.azure.kusto.ingest.exceptions.IngestionClientException;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.text.TextStringBuilder;
import org.jetbrains.annotations.NotNull;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.microsoft.azure.kusto.ingest.result.ValidationPolicy;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class IngestionProperties implements TraceableAttributes {
private final String databaseName;
private final String tableName;
private boolean flushImmediately;
private boolean ignoreFirstRecord;
private IngestionReportLevel reportLevel;
private IngestionReportMethod reportMethod;
private List dropByTags;
private List ingestByTags;
private List additionalTags;
private List ingestIfNotExists;
private IngestionMapping ingestionMapping;
private ValidationPolicy validationPolicy;
private Map additionalProperties;
private DataFormat dataFormat;
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
/**
* Creates an initialized {@code IngestionProperties} instance with a given {@code databaseName} and {@code tableName}.
* The default values of the rest of the properties are:
*
* {@code reportLevel} : {@code IngestionReportLevel.FailuresOnly;}
* {@code reportMethod} : {@code IngestionReportMethod.Queue;}
* {@code flushImmediately} : {@code false;}
* {@code ignoreFirstRecord} : {@code false;}
* {@code additionalProperties} : {@code new HashMap();}
* {@code dataFormat} : {@code DataFormat.csv;}
*
*
* @param databaseName the name of the database in the destination Kusto cluster.
* @param tableName the name of the table in the destination database.
*/
public IngestionProperties(String databaseName, String tableName) {
this.databaseName = databaseName;
this.tableName = tableName;
this.reportLevel = IngestionReportLevel.FAILURES_ONLY;
this.reportMethod = IngestionReportMethod.QUEUE;
this.flushImmediately = false;
this.ignoreFirstRecord = false;
this.additionalProperties = new HashMap<>();
this.dropByTags = new ArrayList<>();
this.ingestByTags = new ArrayList<>();
this.ingestIfNotExists = new ArrayList<>();
this.additionalTags = new ArrayList<>();
this.ingestionMapping = new IngestionMapping();
this.dataFormat = DataFormat.CSV;
}
/**
* Copy constructor for {@code IngestionProperties}.
*
* @param other the instance to copy from.
*/
public IngestionProperties(IngestionProperties other) {
this.databaseName = other.databaseName;
this.tableName = other.tableName;
this.reportLevel = other.reportLevel;
this.reportMethod = other.reportMethod;
this.flushImmediately = other.flushImmediately;
this.ignoreFirstRecord = other.ignoreFirstRecord;
this.dataFormat = other.getDataFormat();
this.additionalProperties = new HashMap<>(other.additionalProperties);
this.dropByTags = new ArrayList<>(other.dropByTags);
this.ingestByTags = new ArrayList<>(other.ingestByTags);
this.ingestIfNotExists = new ArrayList<>(other.ingestIfNotExists);
this.additionalTags = new ArrayList<>(other.additionalTags);
if (other.ingestionMapping != null) {
this.ingestionMapping = new IngestionMapping(other.ingestionMapping);
}
if (other.validationPolicy != null) {
this.validationPolicy = new ValidationPolicy(other.validationPolicy);
}
}
public ValidationPolicy getValidationPolicy() {
return validationPolicy;
}
public void setValidationPolicy(ValidationPolicy validationPolicy) {
this.validationPolicy = validationPolicy;
}
public String getDatabaseName() {
return databaseName;
}
public String getTableName() {
return tableName;
}
public boolean getFlushImmediately() {
return flushImmediately;
}
public void setFlushImmediately(boolean flushImmediately) {
this.flushImmediately = flushImmediately;
}
public boolean isIgnoreFirstRecord() {
return ignoreFirstRecord;
}
public void setIgnoreFirstRecord(boolean ignoreFirstRecord) {
this.ignoreFirstRecord = ignoreFirstRecord;
}
public IngestionReportLevel getReportLevel() {
return reportLevel;
}
public void setReportLevel(IngestionReportLevel reportLevel) {
this.reportLevel = reportLevel;
}
public IngestionReportMethod getReportMethod() {
return reportMethod;
}
public void setReportMethod(IngestionReportMethod reportMethod) {
this.reportMethod = reportMethod;
}
public List getDropByTags() {
return dropByTags;
}
/**
* Drop-by tags are tags added to the ingested data bulk inorder to be able to delete it.
* This should be used with care - See kusto docs
*
* @param dropByTags - suffixes tags list to tag the data being ingested, the resulted tag will be trailed by "drop-by"
*/
public void setDropByTags(List dropByTags) {
this.dropByTags = dropByTags;
}
public List getIngestByTags() {
return ingestByTags;
}
/**
* Tags that start with an ingest-by: prefix can be used to ensure that data is only ingested once.
* This should be used with care - See kusto docs
*
* @param ingestByTags - suffixes tags list to tag the data being ingested, the resulted tag will be trailed by "ingest-by"
*/
public void setIngestByTags(List ingestByTags) {
this.ingestByTags = ingestByTags;
}
public List getAdditionalTags() {
return additionalTags;
}
/**
* Customized tags
*
* @param additionalTags list of custom user tags
*/
public void setAdditionalTags(List additionalTags) {
this.additionalTags = additionalTags;
}
/**
* @param additionalProperties - Set additional properties to the ingestion properties
*/
public void setAdditionalProperties(Map additionalProperties) {
this.additionalProperties = additionalProperties;
}
public Map getAdditionalProperties() {
return this.additionalProperties;
}
public List getIngestIfNotExists() {
return ingestIfNotExists;
}
/**
* Will trigger a check if there's already an extent with this specific "ingest-by" tag prefix
* See kusto docs
*
* @param ingestIfNotExists list of ingestIfNotExists tags
*/
public void setIngestIfNotExists(List ingestIfNotExists) {
this.ingestIfNotExists = ingestIfNotExists;
}
Map getIngestionProperties() throws IOException {
Map fullAdditionalProperties = new HashMap<>();
if (!dropByTags.isEmpty() || !ingestByTags.isEmpty() || !additionalTags.isEmpty()) {
List tags = new ArrayList<>();
if (!additionalTags.isEmpty()) {
tags.addAll(additionalTags);
}
if (!ingestByTags.isEmpty()) {
for (String t : ingestByTags) {
tags.add(String.format("%s%s", "ingest-by:", t));
}
}
if (!dropByTags.isEmpty()) {
for (String t : dropByTags) {
tags.add(String.format("%s%s", "drop-by:", t));
}
}
ObjectMapper objectMapper = Utils.getObjectMapper();
String tagsAsJson = objectMapper.writeValueAsString(tags);
fullAdditionalProperties.put("tags", tagsAsJson);
}
if (!ingestIfNotExists.isEmpty()) {
ObjectMapper objectMapper = Utils.getObjectMapper();
String ingestIfNotExistsJson = objectMapper.writeValueAsString(ingestIfNotExists);
fullAdditionalProperties.put("ingestIfNotExists", ingestIfNotExistsJson);
}
fullAdditionalProperties.putAll(additionalProperties);
fullAdditionalProperties.put("format", dataFormat.getKustoValue());
fullAdditionalProperties.put("ignoreFirstRecord", Boolean.toString(ignoreFirstRecord));
String mappingReference = ingestionMapping.getIngestionMappingReference();
if (StringUtils.isNotBlank(mappingReference)) {
fullAdditionalProperties.put("ingestionMappingReference", mappingReference);
fullAdditionalProperties.put("ingestionMappingType", ingestionMapping.getIngestionMappingKind().getKustoValue());
} else if (ingestionMapping.getColumnMappings() != null) {
ObjectMapper objectMapper = Utils.getObjectMapper();
objectMapper.setVisibility(PropertyAccessor.ALL, Visibility.NONE);
objectMapper.setVisibility(PropertyAccessor.FIELD, Visibility.ANY);
String mapping = objectMapper.writeValueAsString(ingestionMapping.getColumnMappings());
fullAdditionalProperties.put("ingestionMapping", mapping);
fullAdditionalProperties.put("ingestionMappingType", ingestionMapping.getIngestionMappingKind().getKustoValue());
}
return fullAdditionalProperties;
}
/**
* Sets the data format.
*
* @param dataFormat One of the values in: {@link DataFormat DataFormat}
* @throws IllegalArgumentException if null argument is passed
*/
public void setDataFormat(@NotNull DataFormat dataFormat) {
Ensure.argIsNotNull(dataFormat, "dataFormat");
this.dataFormat = dataFormat;
}
/**
* Sets the data format by its name. If the name does not exist, then it does not set it.
*
* @param dataFormatName One of the string values in: {@link DataFormat DataFormat}
*/
public void setDataFormat(@NotNull String dataFormatName) {
try {
this.dataFormat = DataFormat.valueOf(dataFormatName.toUpperCase());
} catch (IllegalArgumentException ex) {
log.warn("IngestionProperties.setDataFormat(): Invalid dataFormatName of {}. Per the API's specification, DataFormat property value wasn't set.",
dataFormatName);
}
}
/**
* Returns the DataFormat
*
* @return The DataFormat
*/
@NotNull
public DataFormat getDataFormat() {
return dataFormat;
}
/**
* Sets the predefined ingestion mapping name:
*
* @param mappingReference The name of the mapping declared in the destination Kusto database, that
* describes the mapping between fields of an object and columns of a Kusto table.
* @param ingestionMappingKind The data format of the object to map.
*/
public void setIngestionMapping(String mappingReference, IngestionMapping.IngestionMappingKind ingestionMappingKind) {
this.ingestionMapping = new IngestionMapping(mappingReference, ingestionMappingKind);
}
/**
* Please use a mappingReference for production as passing the mapping every time is wasteful
* Creates an ingestion mapping using the described column mappings:
*
* @param columnMappings The columnMapping used for this ingestion.
* @param ingestionMappingKind The data format of the object to map.
*/
public void setIngestionMapping(ColumnMapping[] columnMappings, IngestionMapping.IngestionMappingKind ingestionMappingKind) {
this.ingestionMapping = new IngestionMapping(columnMappings, ingestionMappingKind);
}
public void setIngestionMapping(IngestionMapping ingestionMapping) {
this.ingestionMapping = ingestionMapping;
}
public IngestionMapping getIngestionMapping() {
return this.ingestionMapping;
}
public void setAuthorizationContextToken(String token) {
additionalProperties.put("authorizationContext", token);
}
/**
* Validate the minimum non-empty values needed for data ingestion and mappings.
*/
void validate() throws IngestionClientException {
Ensure.stringIsNotBlank(databaseName, "databaseName");
Ensure.stringIsNotBlank(tableName, "tableName");
Ensure.argIsNotNull(reportMethod, "reportMethod");
String mappingReference = ingestionMapping.getIngestionMappingReference();
IngestionMapping.IngestionMappingKind ingestionMappingKind = ingestionMapping.getIngestionMappingKind();
TextStringBuilder message = new TextStringBuilder();
if ((ingestionMapping.getColumnMappings() == null) && StringUtils.isBlank(mappingReference)) {
if (ingestionMappingKind != null) {
message.appendln("IngestionMappingKind was defined ('%s'), so a mapping must be defined as well.", ingestionMappingKind);
}
} else { // a mapping was provided
if (dataFormat.getIngestionMappingKind() != null && !dataFormat.getIngestionMappingKind().equals(ingestionMappingKind)) {
message.appendln("Wrong ingestion mapping for format '%s'; mapping kind should be '%s', but was '%s'.",
dataFormat.getKustoValue(), dataFormat.getIngestionMappingKind().getKustoValue(),
ingestionMappingKind != null ? ingestionMappingKind.getKustoValue() : "null");
}
if (ingestionMapping.getColumnMappings() != null) {
if (StringUtils.isNotBlank(mappingReference)) {
message.appendln("Both mapping reference '%s' and column mappings were defined.", mappingReference);
}
if (ingestionMappingKind != null) {
for (ColumnMapping column : ingestionMapping.getColumnMappings()) {
if (!column.isValid(ingestionMappingKind)) {
message.appendln("Column mapping '%s' is invalid.", column.getColumnName());
}
}
}
}
}
if (!message.isEmpty()) {
String messageStr = message.build();
log.error(messageStr);
throw new IngestionClientException(messageStr);
}
}
public void validateResultSetProperties() throws IngestionClientException {
Ensure.isTrue(IngestionProperties.DataFormat.CSV.equals(dataFormat),
String.format("ResultSet translates into csv format but '%s' was given", dataFormat));
validate();
}
@Override
public Map getTracingAttributes() {
Map attributes = new HashMap<>();
attributes.put("database", databaseName);
attributes.put("table", tableName);
return attributes;
}
public enum DataFormat {
CSV("csv", IngestionMapping.IngestionMappingKind.CSV, true),
TSV("tsv", IngestionMapping.IngestionMappingKind.CSV, true),
SCSV("scsv", IngestionMapping.IngestionMappingKind.CSV, true),
SOHSV("sohsv", IngestionMapping.IngestionMappingKind.CSV, true),
PSV("psv", IngestionMapping.IngestionMappingKind.CSV, true),
TXT("txt", IngestionMapping.IngestionMappingKind.CSV, true),
TSVE("tsve", IngestionMapping.IngestionMappingKind.CSV, true),
JSON("json", IngestionMapping.IngestionMappingKind.JSON, true),
SINGLEJSON("singlejson", IngestionMapping.IngestionMappingKind.JSON, true),
MULTIJSON("multijson", IngestionMapping.IngestionMappingKind.JSON, true),
AVRO("avro", IngestionMapping.IngestionMappingKind.AVRO, false),
APACHEAVRO("apacheavro", IngestionMapping.IngestionMappingKind.APACHEAVRO, false),
PARQUET("parquet", IngestionMapping.IngestionMappingKind.PARQUET, false),
SSTREAM("sstream", IngestionMapping.IngestionMappingKind.SSTREAM, false),
ORC("orc", IngestionMapping.IngestionMappingKind.ORC, false),
RAW("raw", IngestionMapping.IngestionMappingKind.CSV, true),
W3CLOGFILE("w3clogfile", IngestionMapping.IngestionMappingKind.W3CLOGFILE, true);
private final String kustoValue;
private final IngestionMapping.IngestionMappingKind ingestionMappingKind;
private final boolean compressible;
DataFormat(String kustoValue, IngestionMapping.IngestionMappingKind ingestionMappingKind, boolean compressible) {
this.kustoValue = kustoValue;
this.ingestionMappingKind = ingestionMappingKind;
this.compressible = compressible;
}
public String getKustoValue() {
return kustoValue;
}
public IngestionMapping.IngestionMappingKind getIngestionMappingKind() {
return ingestionMappingKind;
}
public boolean isCompressible() {
return compressible;
}
}
public enum IngestionReportLevel {
FAILURES_ONLY("FailuresOnly"),
NONE("None"),
FAILURES_AND_SUCCESSES("FailuresAndSuccesses");
private final String kustoValue;
IngestionReportLevel(String kustoValue) {
this.kustoValue = kustoValue;
}
public String getKustoValue() {
return kustoValue;
}
}
public enum IngestionReportMethod {
QUEUE("Queue"),
TABLE("Table"),
QUEUE_AND_TABLE("QueueAndTable");
private final String kustoValue;
IngestionReportMethod(String kustoValue) {
this.kustoValue = kustoValue;
}
public String getKustoValue() {
return kustoValue;
}
}
}