All Downloads are FREE. Search and download functionalities are using the official Maven repository.

ml-modules.root.data-hub.5.provenance.provenanceWriteQueue.mjs Maven / Gradle / Ivy

There is a newer version: 6.1.1
Show newest version
/**
 Copyright (c) 2021 MarkLogic Corporation

 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at

 http://www.apache.org/licenses/LICENSE-2.0

 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
'use strict';
import consts from "/data-hub/5/impl/consts.mjs";
import hubUtils from "/data-hub/5/impl/hub-utils.mjs";

const dhPs = require("/data-hub/5/provenance/dh-provenance.xqy");

const persistedIDs = [];

const provenanceNamespaces = {
  dhf: "http://marklogic.com/dhf",
  job: "http://marklogic.com/data-hub/job#",
  step: "http://marklogic.com/data-hub/step#",
  user: "http://marklogic.com/data-hub/user#",
  dh: "http://marklogic.com/data-hub/provenance#"
};
/**
 * Captures objects describing provenance records to be generated by ps.provenanceRecord, grouped by target database.
 * Starting in 5.7 provenance is persisted in the target database at the time of persisting content objects.
 *
 * If multiple provenance are added that have the same PROV ID and are to be written to the same
 * database, the newer record is merged with the existing one. This is intended both to avoid
 * conflicting update errors and to facilitate incremental changes such as adding new PROV relationships.
 * Trace logging is used to record this so that a user can have visibility into when this happens.
 */
export default class ProvenanceWriteQueue {

  constructor() {
    this.databaseToRecordQueue = {};
    this.recordsById = new Map();
  }

  /**
   * @param databaseName
   * @param provenanceRecord
   */
  addProvenanceRecord(databaseName, provenanceRecord) {
    let recordQueue = this.databaseToRecordQueue[databaseName];
    if (!recordQueue) {
      recordQueue = [];
      this.databaseToRecordQueue[databaseName] = recordQueue;
    }
    let idKey = `${databaseName}:${provenanceRecord.id}`;
    let existingForId = this.recordsById.get(idKey);
    if (existingForId) {
      Object.assign(existingForId, provenanceRecord);
    /* The persistedIDs check below is primarily for the mlRunIngestTransform.
     *  Due to multiple transforms running in the same transaction, there could be
     *  multiple attempts to write the same data source provenance causing XDMP-CONFLICTINGUPDATE
     */
    } else if (!persistedIDs.includes(provenanceRecord.id)) {
      persistedIDs.push(provenanceRecord.id);
      recordQueue.push(provenanceRecord);
      this.recordsById.set(idKey, provenanceRecord);
    }
  }

  /**
   * @param databaseName
   * @return provenanceRecordQueue array of provenance records to write to the database
   */
  getDatabaseQueue(databaseName) {
    return this.databaseToRecordQueue[databaseName] || [];
  }

  /**
   * @param databaseName name of the database queue to persist
   * @return void
   */

  persist(databaseName = xdmp.databaseName(xdmp.database())) {
    xdmp.securityAssert("http://marklogic.com/xdmp/privileges/ps-user", "execute");
    const currentDatabaseName  = xdmp.databaseName(xdmp.database());
    if (databaseName === currentDatabaseName) {
      this._persist(databaseName);
    } else {
      xdmp.invokeFunction(() => { this._persist(databaseName); },
        {database: xdmp.database(databaseName), update: "true", commit: "auto"}
      );
    }
  }

  _persist(databaseName) {
    const recordsQueue  = this.getDatabaseQueue(databaseName) || [];
    if (recordsQueue.length > 0) {
      hubUtils.hubTrace(consts.TRACE_FLOW, `Committing provenance records, count: ${recordsQueue.length}`);
    } else {
      hubUtils.hubTrace(consts.TRACE_FLOW, `No provenance records were queued, so not committing any to the jobs database`);
    }
    const currentUser = xdmp.getCurrentUser();
    const currentDateTime = fn.currentDateTime();
    for (let recordDetails of recordsQueue) {
      let options = recordDetails.options || {};
      let datePortion = recordDetails.id.slice(recordDetails.id.lastIndexOf("#") + 1);
      if (xdmp.castableAs("http://www.w3.org/2001/XMLSchema", "dateTime", datePortion)) {
        options.dateTime = datePortion;
      } else {
        options.dateTime = String(currentDateTime.add(xdmp.elapsedTime()));
      }
      // namespaces for user defined provenance types
      options.namespaces = provenanceNamespaces;

      // relations
      options.relations = options.relations || {};
      options.relations.attributedTo = options.relations.attributedTo || currentUser;

      // attributes
      options.attributes = options.attributes || {};

      let metadata = recordDetails.metadata || {};
      if (metadata) {
        Object.assign(options.attributes, metadata);
      }
      dhPs.persistDataHubRecord(recordDetails.id, options);
    }
    this.databaseToRecordQueue[databaseName] = [];
  }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy