![JAR search and dependency download from the Maven repository](/logo.png)
azkaban.storage.StorageCleaner Maven / Gradle / Ivy
/*
* Copyright 2017 LinkedIn Corp.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*
*/
package azkaban.storage;
import static azkaban.Constants.ConfigurationKeys.AZKABAN_STORAGE_ARTIFACT_MAX_RETENTION;
import static com.google.common.base.Preconditions.checkArgument;
import azkaban.db.DatabaseOperator;
import azkaban.spi.Storage;
import azkaban.utils.Props;
import com.google.common.annotations.VisibleForTesting;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import javax.inject.Inject;
import javax.inject.Singleton;
import org.apache.log4j.Logger;
@Singleton
public class StorageCleaner {
// Delete records of all older versions
static final String SQL_DELETE_RESOURCE_ID = "DELETE FROM project_versions WHERE resource_id=?";
/**
* The query must sort the versions in reverse order for the cleanup operation to work correctly!
* TODO spyne: Refactor database storage cleanup to use this
*
* When using DatabaseStorage, resourceId is always NULL. Hence, those rows will currently be
* never cleaned up.
*/
static final String SQL_FETCH_PVR = "SELECT resource_id FROM project_versions WHERE project_id=? AND resource_id IS NOT NULL ORDER BY version DESC";
private static final Logger log = Logger.getLogger(StorageCleaner.class);
private final DatabaseOperator databaseOperator;
private final int maxArtifactsPerProject;
private final Storage storage;
@Inject
public StorageCleaner(final Props props, final Storage storage,
final DatabaseOperator databaseOperator) {
this.storage = storage;
this.databaseOperator = databaseOperator;
this.maxArtifactsPerProject = props.getInt(AZKABAN_STORAGE_ARTIFACT_MAX_RETENTION, 0);
checkArgument(this.maxArtifactsPerProject >= 0,
String.format("Invalid value for %s : %d", AZKABAN_STORAGE_ARTIFACT_MAX_RETENTION,
this.maxArtifactsPerProject));
if (isCleanupPermitted()) {
log.info(String.format("%s Config: Max %d artifact(s) retained per project",
AZKABAN_STORAGE_ARTIFACT_MAX_RETENTION, this.maxArtifactsPerProject));
} else {
log.warn("Project cleanup disabled. All artifacts will be stored.");
}
}
@VisibleForTesting
boolean isCleanupPermitted() {
return this.maxArtifactsPerProject > 0;
}
/**
* Remove all but last N artifacts as configured by AZKABAN_STORAGE_ARTIFACT_MAX_RETENTION
*
* Since multiple versions can share the same filename, the algo is to collect all filenames and
* from them, remove the latest ones. The remaining ones are deleted by the respective storage.
*
* From the storage perspective, cleanup just needs the {@link Storage#delete(String)} API to
* work.
*
* Failure cases: - If the storage cleanup fails, the cleanup will be attempted again on the next
* upload - If the storage cleanup succeeds and the DB cleanup fails, the DB will be cleaned up in
* the next attempt.
*
* @param projectId project ID
*/
public void cleanupProjectArtifacts(final int projectId) {
if (!isCleanupPermitted()) {
return;
}
final Set allResourceIds = findResourceIdsToDelete(projectId);
if (allResourceIds.size() == 0) {
return;
}
log.warn(String.format("Deleting project artifacts [id: %d]: %s", projectId, allResourceIds));
allResourceIds.forEach(this::delete);
}
private Set findResourceIdsToDelete(final int projectId) {
final List resourceIdOrderedList = fetchResourceIdOrderedList(projectId);
if (resourceIdOrderedList.size() <= this.maxArtifactsPerProject) {
return Collections.emptySet();
}
final Set allResourceIds = new HashSet<>(resourceIdOrderedList);
final Set doNotDeleteSet = new HashSet<>(
resourceIdOrderedList.subList(0, this.maxArtifactsPerProject));
allResourceIds.removeAll(doNotDeleteSet);
return allResourceIds;
}
/**
* Main Delete Utility.
*
* Delete the storage first. Then remove metadata from DB. Warning! This order cannot be reversed
* since if the metadata is lost, there is no reference of the storage blob.
*
* @param resourceId the storage key to be deleted.
* @return true if deletion was successful. false otherwise
*/
private boolean delete(final String resourceId) {
final boolean isDeleted = this.storage.delete(resourceId) && removeDbEntry(resourceId);
if (!isDeleted) {
log.info("Failed to delete resourceId: " + resourceId);
}
return isDeleted;
}
private boolean removeDbEntry(final String resourceId) {
try {
final int nAffectedRows = this.databaseOperator.update(SQL_DELETE_RESOURCE_ID, resourceId);
return nAffectedRows > 0;
} catch (final SQLException e) {
log.error("Error while deleting DB metadata resource ID: " + resourceId, e);
}
return false;
}
private List fetchResourceIdOrderedList(final int projectId) {
try {
return this.databaseOperator.query(SQL_FETCH_PVR,
rs -> {
final List results = new ArrayList<>();
while (rs.next()) {
results.add(rs.getString(1));
}
return results;
}, projectId);
} catch (final SQLException e) {
log.error("Error performing cleanup of Project: " + projectId, e);
}
return Collections.emptyList();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy