Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
ca.nrc.cadc.caom2.artifactsync.ArtifactValidator Maven / Gradle / Ivy
/*
************************************************************************
******************* CANADIAN ASTRONOMY DATA CENTRE *******************
************** CENTRE CANADIEN DE DONNÉES ASTRONOMIQUES **************
*
* (c) 2021. (c) 2021.
* Government of Canada Gouvernement du Canada
* National Research Council Conseil national de recherches
* Ottawa, Canada, K1A 0R6 Ottawa, Canada, K1A 0R6
* All rights reserved Tous droits réservés
*
* NRC disclaims any warranties, Le CNRC dénie toute garantie
* expressed, implied, or énoncée, implicite ou légale,
* statutory, of any kind with de quelque nature que ce
* respect to the software, soit, concernant le logiciel,
* including without limitation y compris sans restriction
* any warranty of merchantability toute garantie de valeur
* or fitness for a particular marchande ou de pertinence
* purpose. NRC shall not be pour un usage particulier.
* liable in any event for any Le CNRC ne pourra en aucun cas
* damages, whether direct or être tenu responsable de tout
* indirect, special or general, dommage, direct ou indirect,
* consequential or incidental, particulier ou général,
* arising from the use of the accessoire ou fortuit, résultant
* software. Neither the name de l'utilisation du logiciel. Ni
* of the National Research le nom du Conseil National de
* Council of Canada nor the Recherches du Canada ni les noms
* names of its contributors may de ses participants ne peuvent
* be used to endorse or promote être utilisés pour approuver ou
* products derived from this promouvoir les produits dérivés
* software without specific prior de ce logiciel sans autorisation
* written permission. préalable et particulière
* par écrit.
*
* This file is part of the Ce fichier fait partie du projet
* OpenCADC project. OpenCADC.
*
* OpenCADC is free software: OpenCADC est un logiciel libre ;
* you can redistribute it and/or vous pouvez le redistribuer ou le
* modify it under the terms of modifier suivant les termes de
* the GNU Affero General Public la “GNU Affero General Public
* License as published by the License” telle que publiée
* Free Software Foundation, par la Free Software Foundation
* either version 3 of the : soit la version 3 de cette
* License, or (at your option) licence, soit (à votre gré)
* any later version. toute version ultérieure.
*
* OpenCADC is distributed in the OpenCADC est distribué
* hope that it will be useful, dans l’espoir qu’il vous
* but WITHOUT ANY WARRANTY; sera utile, mais SANS AUCUNE
* without even the implied GARANTIE : sans même la garantie
* warranty of MERCHANTABILITY implicite de COMMERCIALISABILITÉ
* or FITNESS FOR A PARTICULAR ni d’ADÉQUATION À UN OBJECTIF
* PURPOSE. See the GNU Affero PARTICULIER. Consultez la Licence
* General Public License for Générale Publique GNU Affero
* more details. pour plus de détails.
*
* You should have received Vous devriez avoir reçu une
* a copy of the GNU Affero copie de la Licence Générale
* General Public License along Publique GNU Affero avec
* with OpenCADC. If not, see OpenCADC ; si ce n’est
* . pas le cas, consultez :
* .
*
* $Revision: 5 $
*
************************************************************************
*/
package ca.nrc.cadc.caom2.artifactsync;
import ca.nrc.cadc.auth.AuthMethod;
import ca.nrc.cadc.auth.AuthenticationUtil;
import ca.nrc.cadc.caom2.Artifact;
import ca.nrc.cadc.caom2.ObservationResponse;
import ca.nrc.cadc.caom2.ObservationState;
import ca.nrc.cadc.caom2.Plane;
import ca.nrc.cadc.caom2.access.AccessUtil;
import ca.nrc.cadc.caom2.artifact.ArtifactMetadata;
import ca.nrc.cadc.caom2.artifact.ArtifactStore;
import ca.nrc.cadc.caom2.artifact.StoragePolicy;
import ca.nrc.cadc.caom2.harvester.HarvestResource;
import ca.nrc.cadc.caom2.harvester.state.HarvestSkipURI;
import ca.nrc.cadc.caom2.harvester.state.HarvestSkipURIDAO;
import ca.nrc.cadc.caom2.persistence.ObservationDAO;
import ca.nrc.cadc.date.DateUtil;
import ca.nrc.cadc.net.HttpDownload;
import ca.nrc.cadc.net.ResourceNotFoundException;
import ca.nrc.cadc.util.StringUtil;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.net.URLEncoder;
import java.security.PrivilegedExceptionAction;
import java.text.DateFormat;
import java.util.Date;
import java.util.List;
import java.util.ListIterator;
import java.util.TreeSet;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import javax.sql.DataSource;
import org.apache.log4j.Logger;
import org.opencadc.tap.TapClient;
/**
* Class that compares artifacts in the caom2 metadata with the artifacts
* in storage (via ArtifactStore).
*
* @author majorb
*
*/
public class ArtifactValidator implements PrivilegedExceptionAction, ShutdownListener {
public static final String STATE_CLASS = Artifact.class.getSimpleName();
private ObservationDAO observationDAO;
private HarvestSkipURIDAO harvestSkipURIDAO;
private String source;
private ArtifactStore artifactStore;
private String collection;
private boolean reportOnly;
private URI caomTapResourceID;
private URL caomTapURL;
private boolean supportSkipURITable = false;
private boolean tolerateNullChecksum = false;
private boolean tolerateNullContentLength = false;
private String prefix = null;
private long newSkipURICount = 0;
private long updateSkipURICount = 0;
private ExecutorService executor;
private static final Logger log = Logger.getLogger(ArtifactValidator.class);
public ArtifactValidator(DataSource dataSource, HarvestResource harvestResource, ObservationDAO observationDAO,
boolean reportOnly, ArtifactStore artifactStore, boolean tolerateNullChecksum, boolean tolerateNullContentLength) {
this(harvestResource.getCollection(), reportOnly, artifactStore, tolerateNullChecksum, tolerateNullContentLength);
this.observationDAO = observationDAO;
this.source = harvestResource.getIdentifier();
this.harvestSkipURIDAO = new HarvestSkipURIDAO(dataSource, harvestResource.getDatabase(), harvestResource.getSchema());
}
public ArtifactValidator(URI caomTapResourceID, String collection,
boolean reportOnly, ArtifactStore artifactStore, boolean tolerateNullChecksum, boolean tolerateNullContentLength) {
this(collection, reportOnly, artifactStore, tolerateNullChecksum, tolerateNullContentLength);
this.caomTapResourceID = caomTapResourceID;
}
public ArtifactValidator(URL caomTapURL, String collection,
boolean reportOnly, ArtifactStore artifactStore, boolean tolerateNullChecksum, boolean tolerateNullContentLength) {
this(collection, reportOnly, artifactStore, tolerateNullChecksum, tolerateNullContentLength);
this.caomTapURL = caomTapURL;
}
private ArtifactValidator(String collection, boolean reportOnly,
ArtifactStore artifactStore, boolean tolerateNullChecksum, boolean tolerateNullContentLength) {
this.collection = collection;
this.reportOnly = reportOnly;
this.artifactStore = artifactStore;
this.tolerateNullChecksum = tolerateNullChecksum;
this.tolerateNullContentLength = tolerateNullContentLength;
}
@Override
public Object run() throws Exception {
final long start = System.currentTimeMillis();
log.info("Starting validation for collection " + collection);
executor = Executors.newFixedThreadPool(2);
final Future> logicalQuery = executor.submit(new Callable>() {
public TreeSet call() throws Exception {
return getLogicalMetadata(null);
}
});
log.info("Submitted query to caom2");
final Future> physicalQuery = executor.submit(new Callable>() {
public TreeSet call() throws Exception {
return getPhysicalMetadata();
}
});
log.info("Submitted query to storage");
executor.shutdown();
executor.awaitTermination(1, TimeUnit.DAYS);
log.info("Queries are complete");
executor.shutdownNow();
TreeSet logicalMetadata = logicalQuery.get();
log.info("number of artifacts in caom2: " + logicalMetadata.size());
TreeSet physicalMetadata = physicalQuery.get();
log.info("number of artifacts in storage: " + physicalMetadata.size());
if (logicalMetadata.isEmpty() || physicalMetadata.isEmpty()) {
log.error("Number of artifacts in caom2 or in storage cannot be zero.");
} else {
compareMetadata(logicalMetadata, physicalMetadata, start);
}
return null;
}
void compareMetadata(TreeSet logicalMetadata, TreeSet physicalMetadata,
long start) throws Exception {
boolean supportSkipURITable = supportSkipURITable();
long logicalCount = logicalMetadata.size();
long physicalCount = physicalMetadata.size();
long correct = 0;
long diffLength = 0;
long diffType = 0;
long diffChecksum = 0;
long notInLogical = 0;
DateFormat df = DateUtil.getDateFormat(DateUtil.IVOA_DATE_FORMAT, DateUtil.UTC);
ArtifactMetadata nextLogical = null;
for (ArtifactMetadata nextPhysical : physicalMetadata) {
if (logicalMetadata.contains(nextPhysical)) {
nextLogical = logicalMetadata.ceiling(nextPhysical);
logicalMetadata.remove(nextLogical);
if (matches(nextLogical.getChecksum(), nextPhysical.getChecksum())) {
if (matches(nextLogical.contentLength, nextPhysical.contentLength)) {
if (matches(nextLogical.contentType, nextPhysical.contentType)) {
correct++;
} else {
// content type mismatch
diffType++;
logJSON(new String[]
{"logType", "detail",
"anomaly", "diffType",
"observationID", nextLogical.observationID,
"artifactURI", nextLogical.getArtifactURI().toString(),
"caomContentType", nextLogical.contentType,
"storageContentType", nextPhysical.contentType,
"caomCollection", collection},
false);
}
} else {
// content length mismatch
diffLength++;
if (supportSkipURITable) {
checkAddToSkipTable(nextLogical, "ContentLengths are different");
}
logJSON(new String[]
{"logType", "detail",
"anomaly", "diffLength",
"observationID", nextLogical.observationID,
"artifactURI", nextLogical.getArtifactURI().toASCIIString(),
"caomContentLength", safeToString(nextLogical.contentLength),
"storageContentLength", safeToString(nextPhysical.contentLength),
"caomCollection", collection},
false);
}
} else {
// checksum mismatch
diffChecksum++;
if (supportSkipURITable) {
checkAddToSkipTable(nextLogical, "Checksums are different");
}
logJSON(new String[]
{"logType", "detail",
"anomaly", "diffChecksum",
"observationID", nextLogical.observationID,
"artifactURI", nextLogical.getArtifactURI().toString(),
"caomChecksum", nextLogical.getChecksum(),
"caomSize", safeToString(nextLogical.contentLength),
"storageChecksum", nextPhysical.getChecksum(),
"storageSize", safeToString(nextPhysical.contentLength),
"caomCollection", collection},
false);
}
} else {
notInLogical++;
logJSON(new String[]
{"logType", "detail",
"anomaly", "notInCAOM",
"artifactURI", nextPhysical.getArtifactURI().toString()},
false);
}
}
// at this point, any artifact that is in logicalArtifacts, is not in physicalArtifacts
long missingFromStorage = 0;
long notPublic = 0;
StoragePolicy storagePolicy = artifactStore.getStoragePolicy(this.collection);
Date now = new Date();
for (ArtifactMetadata metadata : logicalMetadata) {
String errorMessage = null;
Artifact artifact = new Artifact(metadata.getArtifactURI(), metadata.productType, metadata.releaseType);
Date releaseDate = AccessUtil.getReleaseDate(artifact, metadata.metaRelease, metadata.dataRelease);
String releaseDateString = "null";
boolean miss = false;
if (releaseDate == null) {
// proprietary artifact, skip
log.debug("null release date, skipping");
if (StoragePolicy.PUBLIC_ONLY == storagePolicy) {
notPublic++;
} else {
// missing proprietary artifact, but won't be added to skip table
miss = true;
}
} else {
releaseDateString = df.format(releaseDate);
if (releaseDate.after(now)) {
// proprietary artifact, add to skip table for future download
errorMessage = ArtifactHarvester.PROPRIETARY;
if (StoragePolicy.PUBLIC_ONLY == storagePolicy) {
notPublic++;
} else {
// missing proprietary artifact, add to skip table
miss = true;
}
} else {
// missing public artifact, add to skip table
miss = true;
}
// add to HavestSkipURI table if there is not already a row in the table
if (supportSkipURITable) {
checkAddToSkipTable(metadata, errorMessage);
}
}
if (miss) {
missingFromStorage++;
logJSON(new String[]
{"logType", "detail",
"anomaly", "missingFromStorage",
"releaseDate", releaseDateString,
"observationID", metadata.observationID,
"artifactURI", metadata.getArtifactURI().toASCIIString(),
"caomCollection", collection},
false);
}
}
if (reportOnly) {
// diff
logJSON(new String[] {
"logType", "summary",
"collection", collection,
"totalInCAOM", Long.toString(logicalCount),
"totalInStorage", Long.toString(physicalCount),
"totalCorrect", Long.toString(correct),
"totalDiffChecksum", Long.toString(diffChecksum),
"totalDiffLength", Long.toString(diffLength),
"totalDiffType", Long.toString(diffType),
"totalNotInCAOM", Long.toString(notInLogical),
"totalMissingFromStorage", Long.toString(missingFromStorage),
"totalNotPublic", Long.toString(notPublic),
"time", Long.toString(System.currentTimeMillis() - start)
}, true);
} else {
// validate
long totalInSkipURICount = updateSkipURICount + newSkipURICount;
logJSON(new String[] {
"logType", "summary",
"collection", collection,
"totalInCAOM", Long.toString(logicalCount),
"totalInStorage", Long.toString(physicalCount),
"totalCorrect", Long.toString(correct),
"totalDiffChecksum", Long.toString(diffChecksum),
"totalDiffLength", Long.toString(diffLength),
"totalDiffType", Long.toString(diffType),
"totalNotInCAOM", Long.toString(notInLogical),
"totalMissingFromStorage", Long.toString(missingFromStorage),
"totalNotPublic", Long.toString(notPublic),
"totalInSkipURI", Long.toString(totalInSkipURICount),
"totalNewSkipURI", Long.toString(newSkipURICount),
"totalUpdateSkipURI", Long.toString(updateSkipURICount),
"time", Long.toString(System.currentTimeMillis() - start)
}, true);
}
}
public void shutdown() {
if (executor != null) {
executor.shutdownNow();
try {
executor.awaitTermination(2, TimeUnit.SECONDS);
} catch (InterruptedException e) {
log.warn("Shutdown interruped");
Thread.currentThread().interrupt();
}
}
}
private String safeToString(Long n) {
if (n == null) {
return "null";
}
return n.toString();
}
private void logJSON(String[] data, boolean summaryInfo) {
StringBuilder sb = new StringBuilder();
sb.append("{");
boolean paired = true;
for (String s : data) {
sb.append("\"");
sb.append(s);
sb.append("\"");
if (paired) {
sb.append(":");
} else {
sb.append(",");
}
paired = !paired;
}
sb.setLength(sb.length() - 1);
sb.append("}");
if (summaryInfo || reportOnly) {
System.out.println(sb.toString());
}
}
private boolean matches(String logical, String physical) {
// consider it a match if logical is null or empty or there is an actual match
if (logical == null || logical.length() == 0
|| logical.equals(physical)) {
return true;
} else {
return false;
}
}
private boolean matches(Long logical, Long physical) {
// consider it a match if logical is null or empty or there is an actual match
if (logical == null || logical.equals(physical)) {
return true;
} else {
return false;
}
}
private boolean supportSkipURITable() {
return supportSkipURITable;
}
private void checkAddToSkipTable(ArtifactMetadata metadata, String errorMessage) throws URISyntaxException {
if (supportSkipURITable) {
// add to HavestSkipURI table if there is not already a row in the table
Artifact artifact = new Artifact(metadata.getArtifactURI(), metadata.productType, metadata.releaseType);
Date releaseDate = AccessUtil.getReleaseDate(artifact, metadata.metaRelease, metadata.dataRelease);
HarvestSkipURI skip = harvestSkipURIDAO.get(source, STATE_CLASS, metadata.getArtifactURI());
if (releaseDate != null && !reportOnly) {
boolean isAdd = false;
if (skip == null) {
// not in skip table, add it
isAdd = true;
skip = new HarvestSkipURI(source, STATE_CLASS, metadata.getArtifactURI(), releaseDate, errorMessage);
}
if (ArtifactHarvester.PROPRIETARY.equals(skip.errorMessage)
|| ArtifactHarvester.PROPRIETARY.equals(errorMessage)) {
skip.setTryAfter(releaseDate);
skip.errorMessage = errorMessage;
}
harvestSkipURIDAO.put(skip);
if (isAdd) {
newSkipURICount++;
} else {
updateSkipURICount++;
}
String errorMessageString = (errorMessage == null) ? "null" : skip.errorMessage;
logJSON(new String[]
{"logType", "detail",
"action", "addedToSkipTable",
"artifactURI", metadata.getArtifactURI().toASCIIString(),
"caomCollection", collection,
"caomChecksum", metadata.getChecksum(),
"errorMessage", errorMessageString},
true);
}
}
}
private TreeSet getLogicalMetadata(Integer batchSize) throws Exception {
TreeSet result = new TreeSet<>(ArtifactMetadata.getComparator());
if (StringUtil.hasText(source)) {
// use database
// HarvestSkipURI table is not supported in 'diff' mode, i.e. reportOnly = true
this.supportSkipURITable = !reportOnly;
long t1 = System.currentTimeMillis();
List states = observationDAO.getObservationList(collection, null, null, batchSize);
long t2 = System.currentTimeMillis();
long dt = t2 - t1;
if (batchSize == null) {
log.info("get-state-list: size=" + states.size() + " in " + dt + " ms");
}
int depth = 3;
ListIterator iter = states.listIterator();
t1 = System.currentTimeMillis();
while (iter.hasNext()) {
ObservationState s = iter.next();
iter.remove(); // GC
ObservationResponse resp = observationDAO.getObservationResponse(s, depth);
if (resp == null) {
log.error("Null response from Observation DAO, ObservationURI: " + s.getURI().toString() + ", depth: " + depth);
} else if (resp.observation == null) {
log.error("Observation is null, ObservationURI: " + s.getURI().toString() + ", depth: " + depth);
} else {
for (Plane plane : resp.observation.getPlanes()) {
for (Artifact artifact : plane.getArtifacts()) {
String observationID = s.getURI().getObservationID();
result.add(getMetadata(observationID, artifact, plane.dataRelease, plane.metaRelease));
}
}
}
}
if (batchSize == null) {
// log only when we query all artifacts of a collection
log.info("Finished logical metadata query in " + (System.currentTimeMillis() - t1) + " ms");
}
} else {
this.supportSkipURITable = false;
if (caomTapResourceID != null) {
// source is a TAP resource ID
AuthMethod authMethod = AuthenticationUtil.getAuthMethodFromCredentials(AuthenticationUtil.getCurrentSubject());
TapClient tapClient = new TapClient(caomTapResourceID);
try {
this.caomTapURL = tapClient.getSyncURL(authMethod);
} catch (ResourceNotFoundException ex) {
if (ex.getMessage().contains("with password")) {
throw new ResourceNotFoundException("TAP service for "
+ caomTapResourceID + " does not support password authentication.", ex);
}
}
}
// source is a TAP service URL or a TAP resource ID
String uriSelect = "distinct(a.uri)";
if (batchSize == 1) {
uriSelect = "top 1 a.uri";
}
String adql = "select " + uriSelect + ", a.contentChecksum, a.contentLength, a.contentType, o.observationID, "
+ "a.productType, a.releaseType, p.dataRelease, p.metaRelease "
+ "from caom2.Artifact a "
+ "join caom2.Plane p on a.planeID = p.planeID "
+ "join caom2.Observation o on p.obsID = o.obsID "
+ "where o.collection='" + collection + "'";
log.debug("logical query: " + adql);
long start = System.currentTimeMillis();
result = query(caomTapURL, adql);
if (batchSize != 1) {
// log only when we query all artifacts of a collection
log.info("Finished caom2 query in " + (System.currentTimeMillis() - start) + " ms");
}
}
return result;
}
private ArtifactMetadata getMetadata(String observationID, Artifact artifact, Date dataRelease, Date metaRelease) throws Exception {
String cs = null;
if (artifact.contentChecksum == null) {
if (!this.tolerateNullChecksum) {
throw new RuntimeException("content checksum is null for artifact URI: " + artifact.getURI());
}
} else {
cs = getStorageChecksum(artifact.contentChecksum.toASCIIString());
}
ArtifactMetadata metadata = new ArtifactMetadata(artifact.getURI(), cs);
if (artifact.contentLength == null) {
if (!this.tolerateNullContentLength) {
throw new RuntimeException("content length is null for artifact URI: " + metadata.getArtifactURI());
}
} else {
metadata.contentLength = artifact.contentLength;
}
metadata.contentType = artifact.contentType;
metadata.observationID = observationID;
metadata.productType = artifact.getProductType();
metadata.releaseType = artifact.getReleaseType();
metadata.metaRelease = metaRelease;
metadata.dataRelease = dataRelease;
return metadata;
}
private TreeSet query(URL baseURL, String adql) throws Exception {
StringBuilder queryString = new StringBuilder();
queryString.append("LANG=ADQL&RESPONSEFORMAT=tsv&QUERY=");
queryString.append(URLEncoder.encode(adql, "UTF-8"));
URL url = new URL(baseURL.toString() + "?" + queryString.toString());
ResultReader resultReader = new ResultReader(artifactStore);
HttpDownload get = new HttpDownload(url, resultReader);
try {
get.run();
} catch (Throwable t) {
t.printStackTrace();
throw new RuntimeException(t);
}
if (get.getThrowable() != null) {
if (get.getThrowable() instanceof Exception) {
throw (Exception) get.getThrowable();
} else {
throw new RuntimeException(get.getThrowable());
}
}
return resultReader.metadata;
}
private String getStorageChecksum(String checksum) throws Exception {
int colon = checksum.indexOf(":");
return checksum.substring(colon + 1, checksum.length());
}
private TreeSet getPhysicalMetadata() throws Exception {
TreeSet metadata = new TreeSet(ArtifactMetadata.getComparator());
long t1 = System.currentTimeMillis();
metadata.addAll(artifactStore.list(collection));
log.info("Finished storage query in " + (System.currentTimeMillis() - t1) + " ms");
return metadata;
}
}