![JAR search and dependency download from the Maven repository](/logo.png)
org.apache.druid.segment.metadata.SegmentSchemaManager Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.segment.metadata;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.base.Functions;
import com.google.common.collect.Collections2;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import com.google.inject.Inject;
import org.apache.commons.lang.StringEscapeUtils;
import org.apache.druid.guice.LazySingleton;
import org.apache.druid.java.util.common.DateTimes;
import org.apache.druid.java.util.common.ISE;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.java.util.emitter.EmittingLogger;
import org.apache.druid.metadata.MetadataStorageTablesConfig;
import org.apache.druid.metadata.SQLMetadataConnector;
import org.apache.druid.segment.SchemaPayload;
import org.apache.druid.segment.SchemaPayloadPlus;
import org.apache.druid.timeline.SegmentId;
import org.skife.jdbi.v2.Handle;
import org.skife.jdbi.v2.PreparedBatch;
import org.skife.jdbi.v2.TransactionCallback;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
/**
* Handles segment schema persistence and cleanup.
*/
@LazySingleton
public class SegmentSchemaManager
{
private static final EmittingLogger log = new EmittingLogger(SegmentSchemaManager.class);
private static final int DB_ACTION_PARTITION_SIZE = 100;
private final MetadataStorageTablesConfig dbTables;
private final ObjectMapper jsonMapper;
private final SQLMetadataConnector connector;
@Inject
public SegmentSchemaManager(
MetadataStorageTablesConfig dbTables,
ObjectMapper jsonMapper,
SQLMetadataConnector connector
)
{
this.dbTables = dbTables;
this.jsonMapper = jsonMapper;
this.connector = connector;
}
/**
* Return a list of schema fingerprints
*/
public List findReferencedSchemaMarkedAsUnused()
{
return connector.retryWithHandle(
handle ->
handle.createQuery(
StringUtils.format(
"SELECT DISTINCT(schema_fingerprint) FROM %s WHERE used = true AND schema_fingerprint IN (SELECT fingerprint FROM %s WHERE used = false)",
dbTables.getSegmentsTable(),
dbTables.getSegmentSchemasTable()
))
.mapTo(String.class)
.list()
);
}
public int markSchemaAsUsed(List schemaFingerprints)
{
if (schemaFingerprints.isEmpty()) {
return 0;
}
String inClause = getInClause(schemaFingerprints.stream());
return connector.retryWithHandle(
handle ->
handle.createStatement(
StringUtils.format(
"UPDATE %s SET used = true, used_status_last_updated = :now"
+ " WHERE fingerprint IN (%s)",
dbTables.getSegmentSchemasTable(), inClause
)
)
.bind("now", DateTimes.nowUtc().toString())
.execute()
);
}
public int deleteSchemasOlderThan(long timestamp)
{
return connector.retryWithHandle(
handle -> handle.createStatement(
StringUtils.format(
"DELETE FROM %s WHERE used = false AND used_status_last_updated < :now",
dbTables.getSegmentSchemasTable()
))
.bind("now", DateTimes.utc(timestamp).toString())
.execute());
}
public int markUnreferencedSchemasAsUnused()
{
return connector.retryWithHandle(
handle ->
handle.createStatement(
StringUtils.format(
"UPDATE %s SET used = false, used_status_last_updated = :now WHERE used != false "
+ "AND fingerprint NOT IN (SELECT DISTINCT(schema_fingerprint) FROM %s WHERE used=true AND schema_fingerprint IS NOT NULL)",
dbTables.getSegmentSchemasTable(),
dbTables.getSegmentsTable()
)
)
.bind("now", DateTimes.nowUtc().toString())
.execute());
}
/**
* Persist segment schema and update segments in a transaction.
*/
public void persistSchemaAndUpdateSegmentsTable(
final String dataSource,
final List segmentSchemas,
final int version
)
{
connector.retryTransaction((TransactionCallback) (handle, status) -> {
Map schemaPayloadMap = new HashMap<>();
for (SegmentSchemaMetadataPlus segmentSchema : segmentSchemas) {
schemaPayloadMap.put(
segmentSchema.getFingerprint(),
segmentSchema.getSegmentSchemaMetadata().getSchemaPayload()
);
}
persistSegmentSchema(handle, dataSource, version, schemaPayloadMap);
updateSegmentWithSchemaInformation(handle, segmentSchemas);
return null;
}, 1, 3);
}
/**
* Persist unique segment schema in the DB.
*/
public void persistSegmentSchema(
final Handle handle,
final String dataSource,
final int version,
final Map fingerprintSchemaPayloadMap
) throws JsonProcessingException
{
if (fingerprintSchemaPayloadMap.isEmpty()) {
return;
}
// Filter already existing schema
Map> existingFingerprintsAndUsedStatus = fingerprintExistBatch(
handle,
fingerprintSchemaPayloadMap.keySet()
);
// Used schema can also be marked as unused by the schema cleanup duty in parallel.
// Refer to the javadocs in org.apache.druid.server.coordinator.duty.KillUnreferencedSegmentSchemaDuty for more details.
Set usedExistingFingerprints = existingFingerprintsAndUsedStatus.containsKey(true)
? existingFingerprintsAndUsedStatus.get(true)
: new HashSet<>();
Set unusedExistingFingerprints = existingFingerprintsAndUsedStatus.containsKey(false)
? existingFingerprintsAndUsedStatus.get(false)
: new HashSet<>();
Set existingFingerprints = Sets.union(usedExistingFingerprints, unusedExistingFingerprints);
if (existingFingerprints.size() > 0) {
log.info(
"Found already existing schema in the DB for dataSource [%1$s]. "
+ "Used fingeprints: [%2$s], Unused fingerprints: [%3$s].",
dataSource,
usedExistingFingerprints,
unusedExistingFingerprints
);
}
// Unused schema can be deleted by the schema cleanup duty in parallel.
// Refer to the javadocs in org.apache.druid.server.coordinator.duty.KillUnreferencedSegmentSchemaDuty for more details.
if (unusedExistingFingerprints.size() > 0) {
// make the unused schema as used to prevent deletion
markSchemaAsUsed(new ArrayList<>(unusedExistingFingerprints));
}
Map schemaPayloadToPersist = new HashMap<>();
for (Map.Entry entry : fingerprintSchemaPayloadMap.entrySet()) {
if (!existingFingerprints.contains(entry.getKey())) {
schemaPayloadToPersist.put(entry.getKey(), entry.getValue());
}
}
if (schemaPayloadToPersist.isEmpty()) {
log.info("No schema to persist for dataSource [%s] and version [%s].", dataSource, version);
return;
}
final List> partitionedFingerprints = Lists.partition(
new ArrayList<>(schemaPayloadToPersist.keySet()),
DB_ACTION_PARTITION_SIZE
);
String insertSql = StringUtils.format(
"INSERT INTO %s (created_date, datasource, fingerprint, payload, used, used_status_last_updated, version) "
+ "VALUES (:created_date, :datasource, :fingerprint, :payload, :used, :used_status_last_updated, :version)",
dbTables.getSegmentSchemasTable()
);
// insert schemas
PreparedBatch schemaInsertBatch = handle.prepareBatch(insertSql);
for (List partition : partitionedFingerprints) {
for (String fingerprint : partition) {
final String now = DateTimes.nowUtc().toString();
schemaInsertBatch.add()
.bind("created_date", now)
.bind("datasource", dataSource)
.bind("fingerprint", fingerprint)
.bind("payload", jsonMapper.writeValueAsBytes(fingerprintSchemaPayloadMap.get(fingerprint)))
.bind("used", true)
.bind("used_status_last_updated", now)
.bind("version", version);
}
final int[] affectedRows = schemaInsertBatch.execute();
final List failedInserts = new ArrayList<>();
for (int i = 0; i < partition.size(); ++i) {
if (affectedRows[i] != 1) {
failedInserts.add(partition.get(i));
}
}
if (failedInserts.isEmpty()) {
log.info(
"Published schemas [%s] to DB for datasource [%s] and version [%s]",
partition,
dataSource,
version
);
} else {
throw new ISE(
"Failed to publish schemas [%s] to DB for datasource [%s] and version [%s]",
failedInserts,
dataSource,
version
);
}
}
}
/**
* Update segment with schemaFingerprint and numRows information.
*/
public void updateSegmentWithSchemaInformation(
final Handle handle,
final List batch
)
{
log.debug("Updating segment with schemaFingerprint and numRows information: [%s].", batch);
// update schemaFingerprint and numRows in segments table
String updateSql =
StringUtils.format(
"UPDATE %s SET schema_fingerprint = :schema_fingerprint, num_rows = :num_rows WHERE id = :id",
dbTables.getSegmentsTable()
);
PreparedBatch segmentUpdateBatch = handle.prepareBatch(updateSql);
List> partitionedSegmentIds =
Lists.partition(
batch,
DB_ACTION_PARTITION_SIZE
);
for (List partition : partitionedSegmentIds) {
for (SegmentSchemaMetadataPlus segmentSchema : partition) {
String fingerprint = segmentSchema.getFingerprint();
segmentUpdateBatch.add()
.bind("id", segmentSchema.getSegmentId().toString())
.bind("schema_fingerprint", fingerprint)
.bind("num_rows", segmentSchema.getSegmentSchemaMetadata().getNumRows());
}
final int[] affectedRows = segmentUpdateBatch.execute();
final List failedUpdates = new ArrayList<>();
for (int i = 0; i < partition.size(); ++i) {
if (affectedRows[i] != 1) {
failedUpdates.add(partition.get(i).getSegmentId());
}
}
if (failedUpdates.isEmpty()) {
log.infoSegmentIds(
partition.stream().map(SegmentSchemaMetadataPlus::getSegmentId),
"Updated segments with schema information in the DB"
);
} else {
throw new ISE(
"Failed to update segments with schema information: %s",
getCommaSeparatedIdentifiers(failedUpdates));
}
}
}
private Object getCommaSeparatedIdentifiers(final Collection ids)
{
if (ids == null || ids.isEmpty()) {
return null;
}
return Collections2.transform(ids, Functions.identity());
}
/**
* Query the metadata DB to filter the fingerprints that exists.
* It returns separate set for used and unused fingerprints in a map.
*/
private Map> fingerprintExistBatch(
final Handle handle,
final Set fingerprintsToInsert
)
{
if (fingerprintsToInsert.isEmpty()) {
return Collections.emptyMap();
}
List> partitionedFingerprints = Lists.partition(
new ArrayList<>(fingerprintsToInsert),
DB_ACTION_PARTITION_SIZE
);
Map> existingFingerprints = new HashMap<>();
for (List fingerprintList : partitionedFingerprints) {
String fingerprints = fingerprintList.stream()
.map(fingerprint -> "'" + StringEscapeUtils.escapeSql(fingerprint) + "'")
.collect(Collectors.joining(","));
handle.createQuery(
StringUtils.format(
"SELECT used, fingerprint FROM %s WHERE fingerprint IN (%s)",
dbTables.getSegmentSchemasTable(), fingerprints
)
)
.map((index, r, ctx) -> existingFingerprints.computeIfAbsent(
r.getBoolean(1), value -> new HashSet<>()).add(r.getString(2)))
.list();
}
return existingFingerprints;
}
private String getInClause(final Stream ids)
{
return ids
.map(value -> "'" + StringEscapeUtils.escapeSql(value) + "'")
.collect(Collectors.joining(","));
}
/**
* Wrapper over {@link SchemaPayloadPlus} class to include segmentId and fingerprint information.
*/
public static class SegmentSchemaMetadataPlus
{
private final SegmentId segmentId;
private final String fingerprint;
private final SchemaPayloadPlus schemaPayloadPlus;
public SegmentSchemaMetadataPlus(
SegmentId segmentId,
String fingerprint,
SchemaPayloadPlus schemaPayloadPlus
)
{
this.segmentId = segmentId;
this.schemaPayloadPlus = schemaPayloadPlus;
this.fingerprint = fingerprint;
}
public SegmentId getSegmentId()
{
return segmentId;
}
public SchemaPayloadPlus getSegmentSchemaMetadata()
{
return schemaPayloadPlus;
}
public String getFingerprint()
{
return fingerprint;
}
@Override
public String toString()
{
return "SegmentSchemaMetadataPlus{" +
"segmentId='" + segmentId + '\'' +
", fingerprint='" + fingerprint + '\'' +
", schemaPayloadPlus=" + schemaPayloadPlus +
'}';
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy