ml-modules.root.data-hub.5.provenance.provenanceWriteQueue.mjs Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of marklogic-data-hub Show documentation
Show all versions of marklogic-data-hub Show documentation
Library for Creating an Operational Data Hub on MarkLogic
/**
Copyright (c) 2021 MarkLogic Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
'use strict';
import consts from "/data-hub/5/impl/consts.mjs";
import hubUtils from "/data-hub/5/impl/hub-utils.mjs";
const dhPs = require("/data-hub/5/provenance/dh-provenance.xqy");
const persistedIDs = [];
const provenanceNamespaces = {
dhf: "http://marklogic.com/dhf",
job: "http://marklogic.com/data-hub/job#",
step: "http://marklogic.com/data-hub/step#",
user: "http://marklogic.com/data-hub/user#",
dh: "http://marklogic.com/data-hub/provenance#"
};
/**
* Captures objects describing provenance records to be generated by ps.provenanceRecord, grouped by target database.
* Starting in 5.7 provenance is persisted in the target database at the time of persisting content objects.
*
* If multiple provenance are added that have the same PROV ID and are to be written to the same
* database, the newer record is merged with the existing one. This is intended both to avoid
* conflicting update errors and to facilitate incremental changes such as adding new PROV relationships.
* Trace logging is used to record this so that a user can have visibility into when this happens.
*/
export default class ProvenanceWriteQueue {
constructor() {
this.databaseToRecordQueue = {};
this.recordsById = new Map();
}
/**
* @param databaseName
* @param provenanceRecord
*/
addProvenanceRecord(databaseName, provenanceRecord) {
let recordQueue = this.databaseToRecordQueue[databaseName];
if (!recordQueue) {
recordQueue = [];
this.databaseToRecordQueue[databaseName] = recordQueue;
}
let idKey = `${databaseName}:${provenanceRecord.id}`;
let existingForId = this.recordsById.get(idKey);
if (existingForId) {
Object.assign(existingForId, provenanceRecord);
/* The persistedIDs check below is primarily for the mlRunIngestTransform.
* Due to multiple transforms running in the same transaction, there could be
* multiple attempts to write the same data source provenance causing XDMP-CONFLICTINGUPDATE
*/
} else if (!persistedIDs.includes(provenanceRecord.id)) {
persistedIDs.push(provenanceRecord.id);
recordQueue.push(provenanceRecord);
this.recordsById.set(idKey, provenanceRecord);
}
}
/**
* @param databaseName
* @return provenanceRecordQueue array of provenance records to write to the database
*/
getDatabaseQueue(databaseName) {
return this.databaseToRecordQueue[databaseName] || [];
}
/**
* @param databaseName name of the database queue to persist
* @return void
*/
persist(databaseName = xdmp.databaseName(xdmp.database())) {
xdmp.securityAssert("http://marklogic.com/xdmp/privileges/ps-user", "execute");
const currentDatabaseName = xdmp.databaseName(xdmp.database());
if (databaseName === currentDatabaseName) {
this._persist(databaseName);
} else {
xdmp.invokeFunction(() => { this._persist(databaseName); },
{database: xdmp.database(databaseName), update: "true", commit: "auto"}
);
}
}
_persist(databaseName) {
const recordsQueue = this.getDatabaseQueue(databaseName) || [];
if (recordsQueue.length > 0) {
hubUtils.hubTrace(consts.TRACE_FLOW, `Committing provenance records, count: ${recordsQueue.length}`);
} else {
hubUtils.hubTrace(consts.TRACE_FLOW, `No provenance records were queued, so not committing any to the jobs database`);
}
const currentUser = xdmp.getCurrentUser();
const currentDateTime = fn.currentDateTime();
for (let recordDetails of recordsQueue) {
let options = recordDetails.options || {};
let datePortion = recordDetails.id.slice(recordDetails.id.lastIndexOf("#") + 1);
if (xdmp.castableAs("http://www.w3.org/2001/XMLSchema", "dateTime", datePortion)) {
options.dateTime = datePortion;
} else {
options.dateTime = String(currentDateTime.add(xdmp.elapsedTime()));
}
// namespaces for user defined provenance types
options.namespaces = provenanceNamespaces;
// relations
options.relations = options.relations || {};
options.relations.attributedTo = options.relations.attributedTo || currentUser;
// attributes
options.attributes = options.attributes || {};
let metadata = recordDetails.metadata || {};
if (metadata) {
Object.assign(options.attributes, metadata);
}
dhPs.persistDataHubRecord(recordDetails.id, options);
}
this.databaseToRecordQueue[databaseName] = [];
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy