ml-modules.root.com.marklogic.smart-mastering.survivorship.merging.base.xqy Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of marklogic-data-hub Show documentation
Show all versions of marklogic-data-hub Show documentation
Library for Creating an Operational Data Hub on MarkLogic
xquery version "1.0-ml";
(:~
: Implementation library for merging functions. If you're building an
: application, include com.marklogic.smart-mastering/merging.xqy, not this
: file.
:
: The merging process draws values from source documents, applies an algorithm
: to the values for each property, and creates a new merged document. The
: source of each value gets tracked in a sidecar document that can be used to
: provide auditing/history information. A merge document records the documents
: that were used to generate it.
:
: Algorithms used to merge values can be provided by this library or custom
: code written as part of an application. This library provides a "standard"
: algorithm that provides a few simple ways to choose values from source
: documents.
:
: Note that the process of getting values for the merged document will
: typically be a selection among the values available in the source documents,
: but nothing prevents an algorithm from combining or otherwise modifying
: source document values. (Value source tracking might be a little more
: complex.)
:)
module namespace merge-impl = "http://marklogic.com/smart-mastering/survivorship/merging";
import module namespace auditing = "http://marklogic.com/smart-mastering/auditing"
at "../../auditing/base.xqy";
import module namespace coll-impl = "http://marklogic.com/smart-mastering/survivorship/collections"
at "collections.xqy";
import module namespace config = "http://marklogic.com/data-hub/config"
at "/com.marklogic.hub/config.xqy";
import module namespace fun-ext = "http://marklogic.com/smart-mastering/function-extension"
at "../../function-extension/base.xqy";
import module namespace history = "http://marklogic.com/smart-mastering/auditing/history"
at "../../auditing/history.xqy";
import module namespace json="http://marklogic.com/xdmp/json"
at "/MarkLogic/json/json.xqy";
import module namespace merge-impl = "http://marklogic.com/smart-mastering/survivorship/merging"
at "standard.xqy",
"options.xqy";
import module namespace const = "http://marklogic.com/smart-mastering/constants"
at "/com.marklogic.smart-mastering/constants.xqy";
import module namespace matcher = "http://marklogic.com/smart-mastering/matcher"
at "/com.marklogic.smart-mastering/matcher.xqy";
import module namespace prop-def = "http://marklogic.com/smart-mastering/survivorship/property-definition"
at "property-definition.xqy";
import module namespace sem = "http://marklogic.com/semantics"
at "/MarkLogic/semantics.xqy";
import module namespace tel = "http://marklogic.com/smart-mastering/telemetry"
at "/com.marklogic.smart-mastering/telemetry.xqy";
import module namespace util-impl = "http://marklogic.com/smart-mastering/util-impl"
at "/com.marklogic.smart-mastering/impl/util.xqy";
import module namespace mem = "http://maxdewpoint.blogspot.com/memory-operations/functional"
at "/mlpm_modules/XQuery-XML-Memory-Operations/memory-operations-functional.xqy";
import module namespace helper = "http://marklogic.com/smart-mastering/helper-impl"
at "/com.marklogic.smart-mastering/matcher-impl/helper-impl.xqy";
import module namespace httputils = "http://marklogic.com/data-hub/http-utils"
at "/data-hub/5/impl/http-utils.xqy";
declare namespace merging = "http://marklogic.com/smart-mastering/merging";
declare namespace sm = "http://marklogic.com/smart-mastering";
declare namespace es = "http://marklogic.com/entity-services";
declare namespace prov = "http://www.w3.org/ns/prov#";
declare namespace host = "http://marklogic.com/xdmp/status/host";
declare namespace xsl = "http://www.w3.org/1999/XSL/Transform";
declare namespace xsi = "http://www.w3.org/2001/XMLSchema-instance";
declare option xdmp:mapping "false";
(:
: Directory in which merged documents are created.
:)
declare variable $MERGED-DIR := "/com.marklogic.smart-mastering/merged/";
(:
: Check whether all the URIs are already write-locked. If they are, they have been updated.
: ASSUMPTION: If a content doc has been updated, it's because it was archived, which means that it was already merged.
: Therefore, we don't want it to get merged into something else.
: Scenario this is here to prevent is doing match-and-merge on multiple documents within the same transaction:
: - docA -- docB is a good match, archive docA and docB; create docAB
: - docB -- docA is a good match. docA and docB are already archived, don't create docBA.
:
: @param $uris list of URIs to be checked
: @return fn:true() if this transaction already has write locks on ALL of the URIs
:)
declare function merge-impl:all-merged($uris as xs:string*) as xs:boolean
{
every $uri in $uris
satisfies merge-impl:is-uri-locked($uri)
};
declare function merge-impl:build-merge-uri($id as xs:string, $format as xs:string)
{
$MERGED-DIR || $id || "." || $format
};
(:~
: Merge the documents as specified by the merge options and update the
: involved files in the database.
: @param $uris URIs of the source documents that will be merged
: @param $merge-options specification of how options are to be merged
: @return in-memory copy of the merge result
:)
declare function merge-impl:save-merge-models-by-uri(
$uris as xs:string*,
$merge-options as item()?
)
{
merge-impl:save-merge-models-by-uri($uris, $merge-options, sem:uuid-string())
};
(:~
: Merge the documents as specified by the merge options and update the
: involved files in the database.
: @param $uris URIs of the source documents that will be merged
: @param $merge-options specification of how options are to be merged
: @param $id an id that will uniquely identify this merged document
: @return in-memory copy of the merge result
:)
declare function merge-impl:save-merge-models-by-uri(
$uris as xs:string*,
$merge-options as item()?,
$id as xs:string
)
{
tel:increment(),
if (merge-impl:all-merged($uris)) then
xdmp:log("Skipping merge because all uris to be merged (" || fn:string-join($uris, ", ") ||
") were already write-locked", "debug")
else
let $start-elapsed := xdmp:elapsed-time()
let $compiled-options := merge-impl:compile-merge-options($merge-options)
let $merge-options := $compiled-options => map:get("mergeOptionsNode")
let $merge-write-object := merge-impl:build-merge-models-by-uri($uris, $merge-options, $id)
let $merged-document := $merge-write-object => map:get("value")
let $merge-uri := $merge-write-object => map:get("uri")
let $audit-trace := $merge-write-object => map:get("audit-trace")
return (
$merged-document,
xdmp:document-insert(
$audit-trace => map:get("uri"),
$audit-trace => map:get("value"),
$audit-trace => map:get("context") => map:get("permissions"),
$audit-trace => map:get("context") => map:get("collections")
),
let $on-merge-options := $compiled-options => map:get("onMerge")
let $distinct-uris := fn:distinct-values(($uris, $uris))[fn:doc-available(.)][fn:not(. = $merge-uri)]
let $_archive := $distinct-uris ! merge-impl:archive-document(., $merge-options)
return
xdmp:document-insert(
$merge-uri,
$merged-document,
let $perms := (
xdmp:default-permissions(),
fn:map(xdmp:document-get-permissions#1, $uris)
)
return if (fn:exists($perms)) then $perms else config:get-default-data-hub-permissions(),
coll-impl:on-merge(map:new((
for $uri in $distinct-uris
return map:entry($uri, xdmp:document-get-collections($uri)[fn:not(. = $const:ARCHIVED-COLL)])
)),$on-merge-options)
),
if (xdmp:trace-enabled($const:TRACE-PERFORMANCE)) then
xdmp:trace($const:TRACE-PERFORMANCE, "merge-impl:save-merge-models-by-uri: " || (xdmp:elapsed-time() - $start-elapsed))
else ()
)
};
declare function merge-impl:construct-type($name as xs:QName, $path as xs:string?, $ns-map as map:map?)
{
if (fn:exists($path)) then
fn:string-join(
xdmp:with-namespaces(
$ns-map,
for $path-part in fn:tokenize($path, "/")[. != ""]
return
if ($path-part castable as xs:QName) then
xdmp:key-from-QName(xs:QName($path-part))
else
$path-part
),
"/"
)
else
fn:string($name)
};
(:
: Generate attachments for the audit document.
: @param $merge-uri the URI of the new merged document
: @param $final-properties the merged properties, with their source info
: @return
:)
declare function merge-impl:generate-audit-attachments(
$merge-uri as xs:string,
$provenance-details as map:map
) as item()*
{
xdmp:trace($const:TRACE-MERGE-RESULTS, "Provenance details: " || xdmp:to-json-string($provenance-details)),
let $generated-entity-id := $auditing:sm-prefix ||$merge-uri
let $generated-entity-xml :=
element prov:generatedEntity {
attribute prov:ref { $generated-entity-id }
}
let $agent-ids-map := map:map()
let $property-related-prov :=
for $source in map:keys($provenance-details)
let $source-info := map:get($provenance-details, $source)
for $property in map:keys($source-info)
let $prop-info := map:get($source-info, $property)
let $algorithm-info := map:get($prop-info, "algorithm")
let $algorithm-agent := fn:distinct-values($algorithm-info ! ("algorithm:"||./name||";options:"||./optionsReference))
let $_add-agent-id := map:put($agent-ids-map, $algorithm-agent, fn:true())
let $influencer-xml := element prov:influencer { attribute prov:ref { $algorithm-agent }}
let $entity-nodes := (
element prov:type {$property},
element prov:label {$source || ":" || $property},
element prov:location {$source})
for $value in json:array-values(map:get($prop-info, "value"))
(: Due to how JSON is constructed, we can't rely on the node having a node name.
Pull the node name from the name entry of the property map.
:)
let $hash := xdmp:sha512($value)
let $used-entity-id := $auditing:sm-prefix || $source || $property || $hash
return (
element prov:entity {
attribute prov:id {$used-entity-id},
$entity-nodes,
element prov:value { $value }
},
element prov:wasDerivedFrom {
$generated-entity-xml,
element prov:usedEntity {
attribute prov:ref { $used-entity-id }
}
},
element prov:wasInfluencedBy {
element prov:influencee { attribute prov:ref { $used-entity-id }},
$influencer-xml
}
)
let $prop-prov-entities := $property-related-prov[. instance of element(prov:entity)]
let $other-prop-prov := $property-related-prov except $prop-prov-entities
return (
element prov:hadMember {
element prov:collection { attribute prov:ref { $generated-entity-id } },
$prop-prov-entities
},
$other-prop-prov,
for $agent-id in map:keys($agent-ids-map)
return element prov:softwareAgent {
attribute prov:id {$agent-id},
element prov:label {fn:substring-before(fn:substring-after($agent-id,"algorithm:"), ";")},
element prov:location {fn:substring-after($agent-id,"options:")}
}
)
};
(:
: Generate property details for the provenance document.
: @param $final-properties the merged properties, with their source info
: @return
:)
declare function merge-impl:generate-provenance-details(
$final-properties
) as item()*
{
map:new((
let $properties-by-doc-uri := map:map()
let $_populate-map :=
for $final-prop in $final-properties,
$doc-uri in map:get($final-prop, "sources")/documentUri
return
map:put($properties-by-doc-uri, $doc-uri, (map:get($properties-by-doc-uri, $doc-uri),$final-prop))
for $source in map:keys($properties-by-doc-uri)
return
map:entry(
$source,
map:new(
for $prop in map:get($properties-by-doc-uri, $source)
(: Due to how JSON is constructed, we can't rely on the node having a node name.
Pull the node name from the name entry of the property map.
:)
let $type := merge-impl:construct-type(map:get($prop, "name"), map:get($prop, "path"), map:get($prop, "nsMap"))
return map:entry($type,
map:new((
map:entry(
"value",
json:to-array(
for $value in map:get($prop, "values")
let $value-text := history:normalize-value-for-tracing($value)
return $value-text
)
),
map:entry("destination", $type),
map:entry("algorithm", map:get($prop, "algorithm") union ())
))
)
)
)
))
};
(:
: Unmerge a merged document, un-archive the source documents. Create a match
: block to make sure these documents don't get auto-merged again.
: @param $merged-doc-uri The URI of the document to be unmerged
: @return ()
:)
declare function merge-impl:rollback-merge(
$merged-doc-uri as xs:string
) as xs:string*
{
merge-impl:rollback-merge($merged-doc-uri, fn:true(), fn:true(), ())
};
(:
: Unmerge a merged document, un-archive the source documents. Create a match
: block to make sure these documents don't get auto-merged again.
: @param $merged-doc-uri The URI of the document to be unmerged
: @param $retain-rollback-info if true, then the merged document will be
: added to the archive collection; otherwise,
: the merged document and its audit records will
: be deleted
: @param $block-future-merges if true, then the future matches between documents
: will be blocked; otherwise, the documents could match
: on next process-match-and-merge
: @param $remove-uris xs:string* of document URIs that should be removed from the merge.
If an empty sequence, then the last merge transaction is rolled back.
: @return restored URIs
:)
declare function merge-impl:rollback-merge(
$merged-doc-uri as xs:string,
$retain-rollback-info as xs:boolean,
$block-future-merges as xs:boolean,
$remove-uris as xs:string*
) as xs:string*
{
xdmp:trace($const:TRACE-MERGE-RESULTS, "Rolling back merge of cts.doc('" || $merged-doc-uri || "')"),
let $merge-doc-headers := fn:doc($merged-doc-uri)/*:envelope/*:headers
let $merge-options-ref := fn:head($merge-doc-headers/*:merge-options/*:value ! fn:string(.))
let $merge-options := merge-impl:options-ref-to-options-node($merge-options-ref)
let $latest-auditing-receipt-for-doc :=
fn:head(
for $auditing-doc in auditing:auditing-receipts-for-doc-uri($merged-doc-uri, $merge-options)
order by $auditing-doc//prov:time ! xs:dateTime(.) descending
return $auditing-doc
)
let $all-contributing-uris := $merge-doc-headers/*:merges/*:document-uri
let $last-merge-dateTime := if (fn:empty($remove-uris)) then
fn:max($all-contributing-uris/(@last-merge|../last-merge) ! xs:dateTime(.))
else ()
let $previous-uris := if (fn:exists($remove-uris)) then
if (fn:count($all-contributing-uris[fn:not(. = ($remove-uris, $merged-doc-uri))]) le 1) then
$all-contributing-uris
else
$remove-uris
else if (fn:empty($last-merge-dateTime) and fn:exists($latest-auditing-receipt-for-doc)) then
$latest-auditing-receipt-for-doc/auditing:previous-uri ! fn:string(.)
else
$all-contributing-uris[(@last-merge|../last-merge) = $last-merge-dateTime] ! fn:string(.)
let $older-uris := $all-contributing-uris[fn:not(. = ($previous-uris,$merged-doc-uri))]
let $previous-uris := if (fn:count($older-uris) le 1) then $all-contributing-uris else $previous-uris
let $merge-doc-in-previous := fn:count($older-uris) ge 2
let $_trace := if (xdmp:trace-enabled($const:TRACE-MERGE-RESULTS)) then
let $doc-prefix := "cts.doc('" || $merged-doc-uri || "')"
return (
xdmp:trace($const:TRACE-MERGE-RESULTS, $doc-prefix || " all contributing URIs: " || xdmp:to-json-string($all-contributing-uris)),
xdmp:trace($const:TRACE-MERGE-RESULTS, $doc-prefix || " remove URIs: " || xdmp:to-json-string($remove-uris)),
xdmp:trace($const:TRACE-MERGE-RESULTS, $doc-prefix || " previous URIs: " || xdmp:to-json-string($previous-uris)),
xdmp:trace($const:TRACE-MERGE-RESULTS, $doc-prefix || " last merge dateTime: " || xdmp:to-json-string($last-merge-dateTime)),
xdmp:trace($const:TRACE-MERGE-RESULTS, $doc-prefix || " latest auditing receipt: " || xdmp:to-json-string($latest-auditing-receipt-for-doc))
)
else
()
let $older-uris := $all-contributing-uris[fn:not(. = ($previous-uris,$merged-doc-uri))]
where fn:exists(($latest-auditing-receipt-for-doc,$last-merge-dateTime, $remove-uris))
return (
if ($retain-rollback-info) then (
merge-impl:archive-document($merged-doc-uri, $merge-options)
) else (
xdmp:document-delete($merged-doc-uri)
),
let $remerged-doc :=
if ($merge-doc-in-previous) then
let $merge-options-ref := $merge-doc-headers/*:merge-options/*:value ! fn:string(.)
let $merge-options := merge-impl:options-ref-to-options-node($merge-options-ref)
where fn:exists($merge-options) and fn:exists($older-uris)
return
merge-impl:save-merge-models-by-uri(
$older-uris,
$merge-options
)
else ()
let $prevent-auto-match :=
if ($block-future-merges) then
matcher:block-matches((
if (fn:empty($remerged-doc)) then $merged-doc-uri else (),
$previous-uris
))
else ()
let $collections := merge-impl:build-target-collections($merge-options)
let $on-no-match-options := $collections => map:get("onNoMatch")
let $on-archive-options := $collections => map:get("onArchive")
let $archive-collections := coll-impl:on-archive(map:map(), $on-archive-options)
for $previous-doc-uri in $previous-uris
let $new-collections := coll-impl:on-no-match(
map:entry($previous-doc-uri, xdmp:document-get-collections($previous-doc-uri)[fn:not(. = $archive-collections)])
,$on-no-match-options)
where fn:not($previous-doc-uri = $merged-doc-uri or merge-impl:source-of-other-merged-doc($previous-doc-uri, $merged-doc-uri))
return (
$previous-doc-uri,
if (xdmp:trace-enabled($const:TRACE-MERGE-RESULTS)) then
(
xdmp:trace($const:TRACE-MERGE-RESULTS, $previous-doc-uri || " new collections: " || xdmp:to-json-string($new-collections))
)
else
(),
xdmp:document-set-collections($previous-doc-uri, $new-collections)
),
if ($retain-rollback-info) then (
$latest-auditing-receipt-for-doc ! auditing:audit-trace-rollback(., $merge-options)
) else (
$latest-auditing-receipt-for-doc ! xdmp:document-delete(xdmp:node-uri(.))
)
)
};
declare function merge-impl:options-ref-to-options-node($merge-options-ref) {
let $castable-as-hex := $merge-options-ref castable as xs:hexBinary
let $options-document :=
if ($castable-as-hex) then
let $zip-binary := binary { $merge-options-ref }
let $node-uri := fn:head(xdmp:zip-manifest($zip-binary)/*:part) ! fn:string(.)
return xdmp:zip-get($zip-binary, $node-uri)
else
fn:doc($merge-options-ref)
where fn:exists($options-document)
return
merge-impl:options-to-node($options-document/(object-node()|element()))
};
declare function merge-impl:source-of-other-merged-doc($uri, $merge-uri)
{
xdmp:exists(cts:search(fn:collection(),
cts:and-query((
cts:collection-query($const:ARCHIVED-COLL),
cts:collection-query($const:MERGED-COLL),
cts:or-query((
cts:json-property-value-query("document-uri", $uri, "exact"),
cts:element-value-query(xs:QName("sm:document-uri"), $uri, "exact")
)),
cts:not-query(cts:document-query($merge-uri))
))
))
};
declare function merge-impl:expanded-uris($uris as xs:string*) {
fn:distinct-values(
let $expanded-uris :=
for $uri in $uris
return
if (fn:starts-with($uri, $merge-impl:MERGED-DIR)) then
fn:doc($uri)/*:envelope/*:headers/*:merges/*:document-uri[fn:not(fn:starts-with(., $MERGED-DIR))] ! fn:string(.)
else
$uri
for $uri in $expanded-uris
order by $uri
return $uri
)
};
(:~
: Construct a merged document from the given URIs, but do not update the
: database.
: @param $uris URIs of the source documents that will be merged
: @param $merge-options specification of how options are to be merged
: @return in-memory copy of the merge result
:)
declare function merge-impl:build-merge-models-by-uri(
$uris as xs:string*,
$merge-options as item()?
) {
let $sorted-uris := for $uri in $uris order by $uri return $uri
let $expanded-uris := merge-impl:expanded-uris($uris)
return
merge-impl:build-merge-models-by-uri(
$uris,
$merge-options,
fn:head((
$sorted-uris[fn:starts-with(., $MERGED-DIR)] ! fn:replace(fn:substring-after(., $MERGED-DIR),"\.(json|xml)", ""),
xdmp:md5(fn:string-join($expanded-uris, "##"))
)),
$expanded-uris
)
};
(:~
: Construct a merged document from the given URIs, but do not update the
: database.
: @param $uris URIs of the source documents that will be merged
: @param $merge-options specification of how options are to be merged
: @param $id id to be used for merge document
: @return in-memory copy of the merge result
:)
declare function merge-impl:build-merge-models-by-uri(
$uris as xs:string*,
$merge-options as item()?,
$id as xs:string
)
{
merge-impl:build-merge-models-by-uri(
$uris,
$merge-options,
$id,
merge-impl:expanded-uris($uris)
)
};
(:~
: Construct a merged document from the given URIs, but do not update the
: database.
: @param $uris URIs of the source documents that will be merged
: @param $merge-options specification of how options are to be merged
: @param $id id to be used for merge document
: @param $expanded-uris all URIs, including merged URIs that contribute the merged document
: @return in-memory copy of the merge result
:)
declare function merge-impl:build-merge-models-by-uri(
$uris as xs:string*,
$merge-options as item()?,
$id as xs:string,
$expanded-uris as xs:string*
)
{
let $start-elapsed := xdmp:elapsed-time()
let $compiled-merge-options := merge-impl:compile-merge-options($merge-options)
let $merge-options := $compiled-merge-options => map:get("mergeOptionsNode")
let $target-entity := $compiled-merge-options => map:get("targetEntityType")
let $on-merge := $compiled-merge-options => map:get("onMerge")
let $parsed-properties :=
merge-impl:parse-final-properties-for-merge(
$expanded-uris,
$merge-options
)
let $final-properties := map:get($parsed-properties, "final-properties")
let $final-headers := map:get($parsed-properties, "final-headers")
let $final-triples := map:get($parsed-properties, "final-triples")
let $headers-ns-map := map:get($parsed-properties, $PROPKEY-HEADERS-NS-MAP)
let $docs := map:get($parsed-properties, "documents")
let $wrapper-qnames := map:get($parsed-properties, "wrapper-qnames")
let $format := if ($docs instance of document-node(element())+) then
$const:FORMAT-XML
else
$const:FORMAT-JSON
let $merge-uri := if (fn:starts-with($id, $MERGED-DIR)) then $id else merge-impl:build-merge-uri($id, $format)
let $provenance-details := merge-impl:generate-provenance-details($final-properties)
return (
map:map()
=> map:with("previousUri", $uris)
=> map:with("uri", $merge-uri)
=> map:with("audit-trace",
auditing:build-audit-trace(
$const:MERGE-ACTION,
$uris,
$merge-uri,
$merge-options,
merge-impl:generate-audit-attachments($merge-uri, $provenance-details)
)
)
=> map:with("provenance", $provenance-details)
=> map:with("value",
merge-impl:build-merge-models-by-final-properties(
$id,
$uris,
$docs,
$wrapper-qnames,
$final-properties,
$final-headers,
$final-triples,
$headers-ns-map,
$compiled-merge-options
)
)
=> map:with("context",
map:new((
map:entry("collections",
(
coll-impl:on-merge(
map:new((
for $uri in $uris
let $write-object := util-impl:retrieve-write-object($uri)
return
map:entry(
$uri,
$write-object
=> map:get("context")
=> map:get("collections")
)
)),
$on-merge
),
$target-entity
)
),
map:entry("permissions",
let $perms := (
xdmp:default-permissions($merge-uri, "objects"),
for $uri in $uris
let $write-object := util-impl:retrieve-write-object($uri)
return $write-object => map:get("context") => map:get("permissions")
)
return if (fn:exists($perms)) then $perms else config:get-default-data-hub-permissions()
)
))
),
if (xdmp:trace-enabled($const:TRACE-PERFORMANCE)) then
xdmp:trace($const:TRACE-PERFORMANCE, "merge-impl:build-merge-models-by-uri: " || (xdmp:elapsed-time() - $start-elapsed))
else ()
)
};
(:
: Construct XML or JSON merged properties.
: @param $id unique ID for the merged document being built
: @param $docs the source documents that provide the values
: @param $wrapper-qnames The QNames of
: @param $final-properties merged property values with source info
: @param $final-headers merged header values with source info
: @param $headers-ns-map namespace map for interpreting header paths
:)
declare function merge-impl:build-merge-models-by-final-properties(
$id as xs:string,
$uris as xs:string*,
$docs as node()*,
$wrapper-qnames as xs:QName*,
$final-properties as item()*,
$final-headers as item()*,
$final-triples as item()*,
$headers-ns-map as map:map,
$compiled-merge-options as map:map
)
{
if ($docs instance of document-node(element())+) then
merge-impl:build-merge-models-by-final-properties-to-xml(
$id,
$uris,
$docs,
$wrapper-qnames,
$final-properties,
$final-headers,
$final-triples,
$headers-ns-map,
$compiled-merge-options
)
else
merge-impl:build-merge-models-by-final-properties-to-json(
$id,
$uris,
$docs,
$wrapper-qnames,
$final-properties,
$final-headers,
$final-triples,
$compiled-merge-options
)
};
(:
: Construct the new merged document, based on the merged values already
: identified.
: @param $id unique identifier for the merged document
: @param $docs raw source documents
: @param $wrapper-qnames TODO
: @param $final-properties merged property values with source info
: @param $final-headers merged header values with source info
: @param $headers-ns-map map of prefixes to namespaces for header paths
: @return merged document
:)
declare function merge-impl:build-merge-models-by-final-properties-to-xml(
$id as xs:string,
$uris as xs:string*,
$docs as node()*,
$wrapper-qnames as xs:QName*,
$final-properties as item()*,
$final-headers as item()*,
$final-triples as item()*,
$headers-ns-map as map:map,
$compiled-merge-options as map:map
) as element(es:envelope)
{
{
merge-impl:build-headers($id, $docs, $uris, $final-headers, $headers-ns-map, $compiled-merge-options, $const:FORMAT-XML)
}
{
$final-triples
}
{
fn:head($docs)/es:envelope/es:instance/es:info,
merge-impl:build-instance-body-by-final-properties(
$final-properties,
$wrapper-qnames,
$docs,
$compiled-merge-options,
$const:FORMAT-XML
)
}
};
(:~
: Construct the element using merged values.
: @param $id identifier for the merged document
: @param $docs source documents; merged doc values drawn from these
: @param $uris URIs of the source documents
: @param $final-headers the result of merging headers from the source documents
: @param $headers-ns-map map of namespace prefixes to namespace URIs; used to
: interpret paths in $final-headers
: @param $format $const:FORMAT-XML or $const:FORMAT-JSON
: @return constructed element or object-node
:)
declare function merge-impl:build-headers(
$id as xs:string,
$docs as node()*,
$uris as xs:string*,
$final-headers as item()*,
$headers-ns-map as map:map?,
$compiled-merge-options as map:map,
$format as xs:string
)
{
(: Combine the merged non-Smart-Mastering namespace headers. Some will be
: configured and merged; some will not be configured and will just get
: copied over. :)
if ($format = ($const:FORMAT-XML, $const:FORMAT-JSON)) then
()
else httputils:throw-bad-request(xs:QName("SM-INVALID-FORMAT"), "merge-impl:build-headers called with invalid format " || $format),
let $current-dateTime := fn:current-dateTime()
(:
For in-memory docs, no URI will be returned, but the associated URI for an in-memory doc
should already be in the $uris variable.
:)
let $all-uris :=
for $uri in fn:distinct-values(($docs ! xdmp:node-uri(.), $uris))
order by $uri
return $uri
let $all-merged-docs := $all-uris[fn:starts-with(., $MERGED-DIR)] ! fn:doc(.)
let $is-xml := $format = $const:FORMAT-XML
(: remove "/*:envelope/*:headers" from the paths; already accounted for :)
let $configured-paths := $final-headers ! fn:replace(map:get(., "path"), "/.*headers(/.*)", "$1")
let $_trace :=
if (xdmp:trace-enabled($const:TRACE-MERGE-RESULTS)) then (
xdmp:trace($const:TRACE-MERGE-RESULTS, "Building Headers with: " || xdmp:to-json-string($final-headers)),
xdmp:trace($const:TRACE-MERGE-RESULTS, "Configured paths: " || xdmp:to-json-string($configured-paths))
) else ()
(: Identify the full and partial paths of the configured paths. Record in
: a map for quick access. :)
let $anc-path-map := map:new(merge-impl:config-paths-and-ancestors($configured-paths) ! map:entry(., 1))
(: Identify the non-Smart-Mastering headers from the source documents.
: Anything else will either be passed through to the merged document or
: replaced by merged values. :)
let $non-sm-headers :=
if ($is-xml) then
$docs/es:envelope/es:headers/*[fn:empty(self::sm:*|self::sources)]
else
let $sm-keys := ("id", "sources", "merges")
let $sm-map := fn:data($docs/object-node("envelope")/object-node("headers"))
return
let $m := map:map()
let $_ :=
for $map in $sm-map
for $key in map:keys($map)
where fn:not($key = $sm-keys)
return map:put($m, $key, (map:get($m, $key), map:get($map, $key)))
return $m
(: Build a map that combines the unconfigured (pass-through) values from the
: source docs with the merged values. :)
let $combined :=
let $m := map:map()
let $populate :=
if ($is-xml) then
merge-impl:prep-unconfigured-xml("", $anc-path-map, $configured-paths, $non-sm-headers, $m)
else
merge-impl:prep-unconfigured-json("", $anc-path-map, $configured-paths, $non-sm-headers, $m)
let $add-merged-values := merge-impl:add-merged-values($final-headers, $m)
return $m
(: Having built a map of XPaths -> elements, generate a properly-nested
: list of XML elements or JSON properties. :)
let $merge-options-node := $compiled-merge-options => map:get("mergeOptionsNode")
let $merge-options-uri := xdmp:node-uri($merge-options-node)
let $merge-options-value := if (fn:exists($merge-options-uri))
then
$merge-options-uri
else
fn:string(xdmp:zip-create(
merge-options{if ($merge-options-node instance of object-node()) then ".json" else ".xml"}
,
$merge-options-node))
return
if ($is-xml) then
{$id}
{
for $uri in $all-uris
return
element sm:document-uri {
attribute last-merge {
if ($uri = $uris) then
$current-dateTime
else
fn:max($all-merged-docs/es:envelope/es:headers/sm:merges/sm:document-uri[. eq $uri]/@last-merge ! xs:dateTime(.))
},
$uri
}
}
{
if (fn:exists($docs/es:envelope/es:headers/sm:sources/sm:source)) then
{
merge-impl:distinct-node-values($docs/es:envelope/es:headers/sm:sources/sm:source)
}
else (),
merge-impl:distinct-node-values($docs/es:envelope/es:headers/sources)
}
{$merge-options-value}
{
merge-impl:map-to-xml($headers-ns-map, $combined)
}
else
xdmp:to-json(
map:new((
map:entry("id", $id),
map:entry("merges", array-node {
for $uri in $all-uris
return
object-node { "document-uri": $uri, "last-merge":
if ($uri = $uris) then
$current-dateTime
else
fn:max($all-merged-docs/envelope/headers/merges/document-uri[. eq $uri]/../last-merge ! xs:dateTime(.))
}
}),
map:entry("sources", array-node {
merge-impl:distinct-node-values($docs/envelope/headers/sources)
}),
map:entry("merge-options", object-node {
"lang": "zxx",
"value": $merge-options-value
}),
merge-impl:map-to-json($combined)
))
)/object-node()
};
declare function merge-impl:distinct-node-values($nodes as node()*)
{
for $node at $pos in $nodes
where fn:not(some $n in fn:subsequence($nodes, $pos + 1) satisfies fn:deep-equal($n, $node))
return $node
};
(:~
: Examines a sequence of paths and returns a set of distinct paths and their
: ancestors. For instance, given ("/a/b/c", "/a/b/d/e"), returns
: ("/a", "/a/b", "/a/b/c", "/a/b/d", "/a/b/d/e")
:)
declare function merge-impl:config-paths-and-ancestors($paths as xs:string*) as xs:string*
{
fn:distinct-values(
for $path in $paths
let $parts := fn:tokenize($path, "/")[fn:not(.="")]
let $count := fn:count($parts)
for $i in (1 to $count)
return "/" || fn:string-join($parts[1 to $i], "/")
)
};
(:~
: Work through the original header properties recursively. Properties that weren't
: configured for merging pass through and will become part of the merged
: document. Any property that conflicts with a configured path will be skipped.
:
: The $m parameter will have keys that are property names and values that
: are either JSON to be included in the merged document, or another map:map.
: This structure allows us to combine overlapping XPaths.
: @param $path current path being processed, relative to /envelope/headers
: @param $anc-path-map "ancestor path map"; used to quickly determine whether
: the current path is part of a configured path
: @param $configured-paths sequence of full configured paths
: @param $headers all the non-Smart Mastering header objects from the source
: documents
: @param $m recursive map:map where the keys are property names and the values
: are either map:maps (for the next level of properties down) or
: values to include in the merged document
: @return () (no "as" clause to allow for tail call optimization)
:)
declare function merge-impl:prep-unconfigured-json(
$path as xs:string,
$anc-path-map as map:map,
$configured-paths as xs:string*,
$headers as map:map*,
$m as map:map
)
{
for $header in $headers
for $key in map:keys($header)
let $curr-path := $path || "/" || $key
let $current := map:get($header, $key)
return
if (fn:empty(map:get($anc-path-map, $curr-path))) then
(: This path is not related to any configured path, so we can just pass
: any elements through. :)
map:put($m, $key, (map:get($m, $key), $current))
else if ($curr-path = $configured-paths) then
(: Any elements here will be replaced by the calculated merged elements :)
()
else
if (fn:exists($current)) then
let $child-map := map:map()
let $populate :=
merge-impl:prep-unconfigured-json($curr-path, $anc-path-map, $configured-paths, $current, $child-map)
return
if (map:keys($child-map)) then
map:put(
$m, $key,
if (map:contains($m, $key)) then map:get($m, $key) + $child-map
else $child-map
)
else ()
else
map:put($m, $key, (map:get($m, $key), $current))
};
(:~
: Work through the original header elements recursively. Elements that weren't
: configured for merging pass through and will become part of the merged
: document. Any element that conflicts with a configured path will be skipped.
:
: The map:map parameter will have keys that are element names and values that
: are either XML to be included in the merged document, or another map:map.
: This structure allows us to combine overlapping XPaths.
: @param $path current path being processed, relative to /es:envelope/es:headers
: @param $anc-path-map "ancestor path map"; used to quickly determine whether
: the current path is part of a configured path
: @param $configured-paths sequence of full configured paths
: @param $headers all the non-Smart Mastering header elements from the source
: documents
: @param $m recursive map:map where the keys are element names and the values
: are either map:maps (for the next level of elements down) or
: values to include in the merged document
: @return () (no "as" clause to allow for tail call optimization)
:)
declare function merge-impl:prep-unconfigured-xml(
$path as xs:string,
$anc-path-map as map:map,
$configured-paths as xs:string*,
$headers,
$m as map:map
)
{
let $node-names := fn:distinct-values($headers ! fn:node-name(.))
for $node-name in $node-names
let $current := $headers[fn:node-name(.) eq $node-name]
let $curr-path := $path || "/" || $node-name
let $key := xdmp:key-from-QName($node-name)
return
if (fn:empty(map:get($anc-path-map, $curr-path))) then
(: This path is not related to any configured path, so we can just pass
: any elements through. :)
map:put($m, $key, (map:get($m, $key), $current))
else if ($curr-path = $configured-paths) then
(: Any elements here will be replaced by the calculated merged elements :)
()
else
let $children := $current/element()
return
if (fn:exists($children)) then
let $child-map := map:map()
let $populate := merge-impl:prep-unconfigured-xml($curr-path, $anc-path-map, $configured-paths, $children, $child-map)
return
if (map:keys($child-map)) then
map:put(
$m, $key,
if (fn:exists(map:get($m, $key))) then map:get($m, $key) + $child-map
else $child-map
)
else ()
else
map:put($m, $key, (map:get($m, $key), $current))
};
(:~
: Helper function for add-merged-values.
: @param $m a map:map where the keys are element names and the values are
: nested maps or elements to be put in the merged document
: @param $path-parts the path to an element, tokenized by "/"
: @param $value the merged value for this header
: @return () no "as" clause to allow for tail call optimization
:)
declare function merge-impl:add-merged-part(
$m as map:map,
$path-parts as xs:string*,
$value
)
{
let $key := fn:head($path-parts)
let $path-tail := fn:tail($path-parts)
return (
if (fn:exists(map:get($m, $key)) and fn:exists($path-tail)) then
for $present in map:get($m, $key)
where $present instance of map:map and fn:not(merge-impl:is-source-values-map($present))
return
merge-impl:add-merged-part($present, $path-tail, $value)
else
if (fn:exists($path-tail)) then
let $child-map := map:map()
let $populate := merge-impl:add-merged-part($child-map, $path-tail, $value)
return
map:put($m, $key, $child-map)
else if (merge-impl:is-source-values-map($value)) then
map:put($m, $key, map:get($value, "values"))
else
map:put($m, $key, $value)
)
};
(:~
: Combine the calculated merged header elements with those that are being
: passed through.
: @param $final-headers a sequence of maps having "path" and "values"
: @param $m a map:map where the keys are element names and the values are
: nested maps or elements to be put in the merged document
: @return () works by modifying $m
:)
declare function merge-impl:add-merged-values($final-headers, $m as map:map)
{
for $header in $final-headers
(: remove "/*:envelope/*:headers" from the paths; already accounted for :)
let $key := fn:replace(map:get($header, "path"), "/.*headers(/.*)", "$1")
return (
merge-impl:add-merged-part($m, fn:tokenize($key, "/")[fn:not(. = "")], map:get($header, "values"))
)
};
(:~
: Converts a map to XML, where the keys are XPaths and the values are elements.
: @param $ns-map a map of namespace prefixes to namespace URIs
: @param $m the map with XPath -> element mappings
: @return a sequence of elements (no "as" clause to allow tail call optimization)
:)
declare function merge-impl:map-to-xml($ns-map as map:map, $m as map:map)
{
for $path in map:keys($m)
let $value := map:get($m, $path)
return
if ($value instance of map:map) then
if (merge-impl:is-source-values-map($value)) then
map:get($value, "values")
else
xdmp:with-namespaces(
$ns-map,
(
let $qname := xdmp:QName-from-key($path)
let $prefix := if (fn:contains(fn:string($qname), ":")) then fn:tokenize(fn:string($qname),":")[1] else ""
return
element { fn:QName(map:get($ns-map, $prefix), fn:local-name-from-QName($qname)) } {
merge-impl:map-to-xml($ns-map, $value)
}
)
)
else if ($value instance of object-node()+) then (
$value/values
)
else
$value
};
declare function merge-impl:is-source-values-map($m as item()*) as xs:boolean {
$m instance of map:map and map:contains($m, "sources") and map:contains($m, "values") and map:contains($m, "name")
};
(:~
: The incoming structure is a nested set of map:maps, where the leaf nodes
: contain the name of a property, the values for it, and the sources where
: those values came from. Return a map:map that simplifies the leaf nodes to
: just the values.
:)
declare function merge-impl:map-to-json($m as map:map)
{
if (map:contains($m, "sources") and map:contains($m, "values") and map:contains($m, "name")) then
(: Extract the values and return those :)
map:get($m, "values")
else
map:new(
for $key in map:keys($m)
let $value := map:get($m, $key)
return
if ($value instance of map:map) then
map:entry($key, merge-impl:map-to-json($value))
else
(: Unconfigured values :)
map:entry($key, $value)
)
};
(:
: Construct the new merged document, based on the merged values already
: identified.
: @param $id unique identifier for the merged document
: @param $docs raw source documents
: @param $wrapper-qnames TODO
: @param $final-properties merged property values with source info
: @param $final-headers merged header values with source info
: @return merged document
:)
declare function merge-impl:build-merge-models-by-final-properties-to-json(
$id as xs:string,
$uris as xs:string*,
$docs as node()*,
$wrapper-qnames as xs:QName*,
$final-properties as item()*,
$final-headers as item()*,
$final-triples as item()*,
$compiled-merge-options as map:map
)
{
object-node {
"envelope": object-node {
"headers": merge-impl:build-headers($id, $docs, $uris, $final-headers, (), $compiled-merge-options, $const:FORMAT-JSON),
"triples": array-node {
$final-triples
},
"instance": (
let $instance-body :=
merge-impl:build-instance-body-by-final-properties(
$final-properties,
$wrapper-qnames,
$docs,
$compiled-merge-options,
$const:FORMAT-JSON
)
let $info := fn:head($docs)/envelope/instance/info
return
if (fn:exists($info)) then
object-node {
"info": $info
} + $instance-body
else
$instance-body
)
}
}
};
(:
: Construct the Entity Services instance for the new merged document.
: @param $final-properties merged property values with source info
: @param $wrapper-qnames xs:QName* these will wrap the results
: @param $format $const:FORMAT-JSON or $const:FORMAT-XML
: @return instance elements or properties
:)
declare function merge-impl:build-instance-body-by-final-properties(
$final-properties as map:map*,
$wrapper-qnames as xs:QName*,
$docs as document-node()*,
$compiled-merge-options as map:map,
$format as xs:string
)
{
if (xdmp:trace-enabled($const:TRACE-MERGE-RESULTS)) then
xdmp:trace($const:TRACE-MERGE-RESULTS, "Building instance with properties: " || xdmp:to-json-string($final-properties))
else (),
if ($format eq $const:FORMAT-JSON) then (
xdmp:to-json(
let $merged-non-path-props := merge-impl:build-non-path-json($final-properties, $wrapper-qnames)
return merge-impl:build-path-json($merged-non-path-props, $final-properties)
)/object-node()
)
else (
fn:fold-left(
function($children, $parent-name) {
element {$parent-name} {
$children
}
},
let $prop-element-updates := map:map()
let $prop-elements :=
let $non-path-properties := $final-properties[fn:not(map:contains(., "path"))]
let $_trace := if (xdmp:trace-enabled($const:TRACE-MERGE-RESULTS)) then
xdmp:trace($const:TRACE-MERGE-RESULTS, "Building instance with non-path properties: " || xdmp:to-json-string($non-path-properties))
else ()
for $prop in $non-path-properties
let $prop-values := $prop => map:get("values")
return
if ($prop-values instance of element()+) then
$prop-values
else
element {($prop => map:get("name"))} {
$prop-values
}
let $paths-used-to-update-elements-map := map:map()
let $path-properties := $final-properties[fn:exists(map:get(., "path"))]
(: Need QNames in document order to determine if element properties need updated via XPath merge rules :)
let $document-order-wrapper-qnames := fn:reverse($wrapper-qnames)
let $element-updates-by-paths :=
for $prop in $prop-elements
(: Send wrapper-qnames in document order :)
let $copy-op := mem:copy($prop)
let $prop-updates := merge-impl:find-updates($copy-op, $path-properties, $document-order-wrapper-qnames, $prop, $paths-used-to-update-elements-map)
where fn:exists(map:keys($prop-updates))
return
(map:put($prop-element-updates, fn:generate-id($prop), mem:execute($prop-updates)), fn:true())
let $path-updates :=
let $merge-rules := $compiled-merge-options => map:get("mergeRulesInfo")
for $path in fn:distinct-values($final-properties ! map:get(., "path"))[fn:empty(map:get($paths-used-to-update-elements-map, .))]
let $path-properties := $path-properties[map:get(., "path") = $path]
(: A property may contain path-specified properties. Overlay the path property values on the top-level properties :)
let $_trace := if (xdmp:trace-enabled($const:TRACE-MERGE-RESULTS)) then
xdmp:trace($const:TRACE-MERGE-RESULTS, "Building instance with path properties: " || xdmp:to-json-string($path-properties))
else ()
return
(: determine path structure from original documents :)
let $merge-rule := $merge-rules[map:get(., "path") = $path]
let $docs-to-values-function := $merge-rule => map:get("documentToValuesFunction")
let $original-element := fn:head($docs ! $docs-to-values-function(.))
let $ancestor-qnames := $original-element/ancestor::element() ! fn:node-name(.)
let $last-index-of := fn:index-of($ancestor-qnames, fn:head(($wrapper-qnames,xs:QName('es:instance'))))[fn:last()]
let $path-wrapping-qnames := fn:subsequence($ancestor-qnames, $last-index-of + 1)
return
fn:fold-left(
function($children, $parent-name) {
element {$parent-name} {
$children
}
},
$path-properties ! map:get(., "values"),
$path-wrapping-qnames
)
return (
for $prop in $prop-elements
let $prop-id := fn:generate-id($prop)
return
if (map:contains($prop-element-updates, $prop-id)) then
map:get($prop-element-updates, $prop-id)
else
$prop,
$path-updates
),
$wrapper-qnames
)
)
};
(:
: Construct the non-path referenced portion of the Entity Services instance for the new merged document.
: @param $final-properties merged property values with source info
: @param $wrapper-qnames xs:QName* these will wrap the results
: @return map:map that represents the merged instance for non-path targeted JSON
:)
declare function merge-impl:build-non-path-json($final-properties as map:map*, $wrapper-qnames as xs:QName*) {
let $props-to-retain-array := map:map()
(: TODO - consider using XSLT. I'd be able to specify a path rather than tracking through recursive descent :)
let $merged-props-body :=
(: consolidate $final-properties into a single map of names (keys) and values :)
fn:fold-left(
function($map-a, $map-b) {
$map-a + $map-b
},
map:map(),
let $non-path-properties := $final-properties[fn:not(map:contains(., "path"))]
let $_trace := if (xdmp:trace-enabled($const:TRACE-MERGE-RESULTS)) then
xdmp:trace($const:TRACE-MERGE-RESULTS, "Building instance with non-path properties: " || xdmp:to-json-string($non-path-properties))
else ()
for $prop at $pos in $non-path-properties
let $prop-name := fn:string($prop => map:get("name"))
let $prop-values := $prop => map:get("values")
let $retain-array := (($prop => map:get("retainArray"))[. castable as xs:boolean] ! xs:boolean(.)) = fn:true()
return (
if ($retain-array and fn:not(map:contains($props-to-retain-array, $prop-name))) then
map:put($props-to-retain-array, $prop-name, $retain-array)
else (),
map:entry(
$prop-name,
$prop-values
)))
let $_convert-to-arrays :=
for $prop-name in map:keys($props-to-retain-array)
let $values := map:get($merged-props-body, $prop-name)
where fn:count($values) le 1
return
map:put($merged-props-body, $prop-name, array-node{$values})
return fn:fold-left(
function($child-object, $parent-name) {
map:entry(fn:string($parent-name), $child-object)
},
$merged-props-body,
$wrapper-qnames
)
};
(:
: Add the path portions of the merged Entity Services instance.
: @param $base-json json:object|map:map with non-path reference properties already merged in
: @param $final-properties merged property values with source info
: @return json:object that represents the merged instance with path targeted JSON
:)
declare function merge-impl:build-path-json($base-json (: as json:object|map:map :), $final-properties as map:map*) {
let $path-properties := $final-properties[map:contains(., "path")]
let $_populate-lower-paths :=
for $prop in $path-properties
let $lower-path := merge-impl:strip-top-path(map:get($prop, "path"), (), (), fn:true())
let $path-parts := fn:tokenize($lower-path, "/")[. ne ""]
let $populate-base-json := merge-impl:build-out-json-path-for-xslt($base-json (: as json:object|map:map :), $path-parts)
return map:put($prop, "lowerPath", $lower-path)
(: Convert from maps to json:object notation, needed for XSLT :)
let $xml-json := {xdmp:from-json(xdmp:to-json($base-json))}
let $_trace := if (xdmp:trace-enabled($const:TRACE-MERGE-RESULTS)) then
xdmp:trace($const:TRACE-MERGE-RESULTS, "Building instance with path properties: " || xdmp:to-json-string($path-properties))
else ()
let $path-templates := merge-impl:generate-path-templates($path-properties)
let $full-template :=
{ $path-templates }
let $fully-merged := xdmp:xslt-eval($full-template, $xml-json)
return json:object($fully-merged/root/node())
};
(:
: Ensures that the property path exists in the JSON for the XSLT transform
: @param $base-json json:object|map:map
: @param $path-parts xs:string* each step in JSON path be
:)
declare function merge-impl:build-out-json-path-for-xslt($base-json (: as json:object|map:map :), $path-parts as xs:string*) {
fn:fold-left(function ($base-json (: as json:object|map:map :), $path-part) {
let $children-json := map:get($base-json, $path-part)
let $existing-object :=
fn:head(
(
for $child-json in $children-json
return
typeswitch($child-json)
case json:array return
json:array-values($child-json)
case array-node() return
json:array-values(xdmp:from-json($child-json))
case object-node() return
xdmp:from-json($child-json)
default return
$child-json
)[. instance of json:object or . instance of map:map]
)
return
if (fn:exists($existing-object)) then
$existing-object
else
let $json-object := json:object()
return (
map:put($base-json, $path-part, ($children-json,$json-object)),
$json-object
)
}, $base-json, $path-parts)
};
declare function merge-impl:multi-node-equals($nodes1 as node()*, $nodes2 as node()*)
{
(fn:count($nodes1) eq fn:count($nodes2)
and
(every $bool in fn:map-pairs(fn:deep-equal#2, $nodes1, $nodes2)
satisfies $bool))
};
declare function merge-impl:strip-top-path($path, $wrapper-qnames as xs:QName*, $namespaces as map:map?, $is-json as xs:boolean) as xs:string
{
let $instance-level :=
if (fn:starts-with($path, "/(es:envelope|envelope)/(es:instance|instance)/")) then
fn:substring-after($path, "/(es:envelope|envelope)/(es:instance|instance)/")
else
fn:replace($path, "^/\w*:?envelope/\w*:?instance/", "")
return
if (fn:exists($wrapper-qnames)) then
xdmp:with-namespaces(
$namespaces,
fn:string-join(
for $step at $step-pos in fn:tokenize($instance-level, "/")
let $qname-search := if ($step castable as xs:QName) then xs:QName($step) else $step
let $wrapper-qname-index := fn:index-of($wrapper-qnames, $qname-search)
where fn:not($step-pos = $wrapper-qname-index)
return $step,
"/"
)
)
else if ($is-json) then
$instance-level
else
fn:replace($instance-level, "^[^/]+/", "")
};
(:
: Given an XPath, rewrite it to apply to the XML serialization of JSON.
: Example: "/LowerProperty1/EvenLowerProperty/LowestProperty1" becomes
: "json:entry[@key='LowerProperty1']/json:value/json:object/json:entry[@key='EvenLowerProperty']/json:value/json:object/json:entry[@key='LowestProperty1']/json:value"
:)
declare function merge-impl:convert-path-to-json($path as xs:string)
as xs:string
{
fn:string-join(
for $segment in fn:tokenize($path, "/")
where $segment ne ""
return ("json:entry[@key='" || $segment || "']/json:value"),
"/json:object/"
)
};
(:
: For each of the $path-properties, generate an XSL template
: @param $final-properties merged property values with source info
: @return element(xsl:template)* that will add the merged results
:)
declare function merge-impl:generate-path-templates($path-properties as map:map*)
{
let $distinct-paths := fn:distinct-values($path-properties ! map:get(., 'lowerPath'))
for $lower-path in $distinct-paths
let $path-props := $path-properties[map:get(., 'lowerPath') = $lower-path]
return
{ attribute match { merge-impl:convert-path-to-json($lower-path) }}
{
let $values := $path-props ! map:get(., 'values')
let $values-is-array := $values instance of json:array or $values instance of array-node()
let $wrap-in-array := fn:not($values-is-array) and (fn:count($values) gt 1 or (some $path-prop in $path-props satisfies $path-prop => map:get("retainArray")))
let $values := if ($wrap-in-array) then array-node {$values} else $values
return
typeswitch($values)
case empty-sequence() return
()
(: Convert JSON nodes to the serialized JSON elements :)
case array-node()|object-node() return
xdmp:from-json($values)
(: JSON specific leaf nodes can't be added directly added to XSLT :)
default return
let $data-value := fn:data($values)
where fn:exists($data-value)
return
let $data-type := xdmp:type($data-value)
where $data-type = xs:QName("xs:boolean") or fn:number($data-type) = 0 or $data-value
return (
{$data-type} ,
$data-value
)
}
};
(:
: Recurse through $path-properties to build up a map of mem:replace operations.
: @param $updates a map for tracking updates to be made
: @param $path-properties a sequence of maps that hold property values and sources
: @param $wrapper-qnames a list of QNames in document order to wrap around the elements
: @param $prop a merged non-path property
: @param $path-updates tracks the paths that have been updated
: @return a map:map of mem:replace operations. Type not specified to allow for tail call optimization.
:)
declare function merge-impl:find-updates($updates as map:map, $path-properties as map:map*, $wrapper-qnames as xs:QName*, $prop, $path-updates as map:map)
{
if (fn:exists($path-properties)) then
let $path-prop := fn:head($path-properties)
let $path := map:get($path-prop, "path")
(: $path is a rooted path, but we need to apply the path under the level of the top property. Strip off the
: top part of the path. :)
let $namespace-map := map:get($path-prop, "nsMap")
let $lower-path := merge-impl:strip-top-path($path, ($wrapper-qnames, fn:node-name($prop)), $namespace-map, fn:false())
let $target := xdmp:unpath($lower-path, $namespace-map, $prop)
return
if (fn:exists($target)) then (
map:put($path-updates, $path, fn:true()),
(: This property contains this path; replace :)
mem:replace(
merge-impl:find-updates($updates, fn:tail($path-properties), $wrapper-qnames, $prop, $path-updates),
$target,
map:get($path-prop, "values")
)
) else
(: This property doesn't contain this path; check the other paths :)
merge-impl:find-updates($updates, fn:tail($path-properties), $wrapper-qnames, $prop, $path-updates)
else
$updates
};
(:
: Given a sequence of documents, extract the Entity Services instance from
: each of them.
: @param $docs source documents
: @return ES instance
:)
declare function merge-impl:get-instances($docs)
{
for $doc in $docs
let $instance-root := $doc/(es:envelope|object-node("envelope"))/(es:instance|object-node("instance"))
let $instance := $instance-root/((element()[*]|object-node()) except (es:info|object-node("info")))
return
if (fn:empty($instance) or fn:count($instance) gt 1) then
$instance-root
else if ($instance instance of element(MDM)) then
$instance/*/*
else if (fn:node-name($instance) eq xs:QName("MDM")) then
(: Ensure we navigating a array at the instance root :)
$instance/(array-node()|.)/object-node()/object-node()
else
$instance
};
declare function merge-impl:get-sources(
$content-objects,
$compiled-merge-options as item())
as object-node()*
{
let $last-timestamp-function := $compiled-merge-options => map:get("lastUpdatedFunction")
for $content-object in $content-objects
let $doc := map:get($content-object, "value")
let $doc-uri := (xdmp:node-uri($doc), map:get($content-object, "uri"))[1]
let $sources := $doc/(es:envelope|object-node("envelope"))
/(es:headers|object-node("headers"))
/(*:sources/(*:source|*:name)[self::element()]|array-node("sources")/object-node("sources")|object-node("sources"))
let $sources := if (fn:empty($sources)) then object-node {"name": $doc-uri} else $sources
for $source in $sources
let $last-updated := $last-timestamp-function($doc)
order by $last-updated descending
return
object-node {
"name": fn:string($source/descendant-or-self::*[fn:local-name(.) = ('name','datahubSourceName')]),
"dateTime": fn:string($last-updated),
"documentUri": $doc-uri
}
};
declare variable $PROPKEY-HEADERS-NS-MAP := "headers-ns-map";
(:~
: Extract the instance parts from the source documents and pass them to
: functions that will do the property and header merges. Return a map with
: that data.
: @param $uris URIs of the source documents
: @param $merge-options these control how the source data get merged together
: @return map:map with merged information from the source docs
:)
declare function merge-impl:parse-final-properties-for-merge(
$uris as xs:string*,
$merge-options as item()?
) as map:map
{
let $compiled-merge-options := merge-impl:compile-merge-options($merge-options)
let $content-objects :=
for $uri in $uris
return util-impl:retrieve-write-object($uri)
let $docs := merge-impl:get-content-docs($content-objects)
let $first-doc := fn:head($docs)
let $first-instance := merge-impl:get-instances($first-doc)
let $instances := ($first-instance, merge-impl:get-instances(fn:tail($docs)))
let $wrapper-qnames :=
fn:reverse(
($first-instance/ancestor-or-self::*
except
$first-doc/(es:envelope|object-node("envelope"))/(es:instance|object-node("instance"))/ancestor-or-self::*)
! fn:node-name(.)
)
let $sources := merge-impl:get-sources($content-objects, $compiled-merge-options)
let $sources-by-document-uri as map:map := util-impl:combine-maps(map:map(), for $doc-uri in $sources/documentUri return map:entry($doc-uri, $doc-uri/..))
let $final-properties := merge-impl:build-final-properties(
$compiled-merge-options,
$instances,
$content-objects,
$sources-by-document-uri
)
let $final-headers := merge-impl:build-final-headers(
$compiled-merge-options,
$content-objects,
$sources-by-document-uri
)
let $final-triples := merge-impl:build-final-triples(
$compiled-merge-options,
$docs,
$sources
)
return
map:new((
map:entry("instances", $instances),
map:entry("sources", $sources),
map:entry("documents", $docs),
map:entry("wrapper-qnames", $wrapper-qnames),
map:entry("final-properties", $final-properties),
map:entry($PROPKEY-HEADERS-NS-MAP, fn:head($final-headers)),
map:entry("final-headers", fn:tail($final-headers)),
map:entry("final-triples", $final-triples)
))
};
(:~
: Build a sequence of maps that contain, for each configured header, the
: algorithm used to do the merging, the merged values, and the sources of
: those values (embedded in the values).
: @param $merge-options an element or object containing the merge options
: @param $content-objects DHF content wrappers around the source documents for building headers
: @param $sources information about the source of the header data
: @return sequence of maps. First map is the mapping from namespace prefixes
: to namespace URIs, as configured on the property-defs element. The
: rest of the maps are final header values.
:)
declare function merge-impl:build-final-headers(
$compiled-merge-options as item(),
$content-objects,
$sources-by-document-uri as map:map
) as map:map*
{
let $merge-options-ref := $compiled-merge-options => map:get("mergeOptionsRef")
let $ns-map := $compiled-merge-options => map:get("namespaces")
let $default-merge-rule-info := $compiled-merge-options => map:get("defaultMergeRuleInfo")
let $header-merge-rules-info := ($compiled-merge-options => map:get("mergeRulesInfo"))[map:contains(., "path")][fn:matches(map:get(., "path"),"^/[\w]*:?envelope/[\w]*:?headers/")]
let $docs := merge-impl:get-content-docs($content-objects)
let $top-level-properties := fn:distinct-values(($docs/*:envelope/*:headers/node()[fn:not(fn:local-name-from-QName(fn:node-name(.)) = ("id","merges","sources"))] ! (fn:node-name(.))))
let $is-hub-central-format := $compiled-merge-options => map:get("isHubCentralFormat")
return (
$ns-map,
for $top-level-property in $top-level-properties
let $local-name := fn:local-name-from-QName($top-level-property)
let $path-regex := "^/[\w]*:?envelope/[\w]*:?headers/[\w]*:?" || $local-name
where fn:empty($header-merge-rules-info[fn:matches(map:get(., "path"),$path-regex)])
return
let $merge-rule := $default-merge-rule-info => map:get("mergeRule")
let $algorithm-name := $default-merge-rule-info => map:get("mergeAlgorithmName")
let $algorithm := $default-merge-rule-info => map:get("mergeAlgorithm")
let $is-javascript := util-impl:function-is-javascript($algorithm)
let $merge-rule := util-impl:convert-node-for-function($merge-rule, $is-hub-central-format, $is-javascript, merge-impl:propertyspec-to-json#1, merge-impl:propertyspec-to-xml(?, xs:QName("merging:merge")))
let $algorithm-info :=
object-node {
"name": fn:head(($algorithm-name[fn:exists($algorithm)], "standard")),
"optionsReference": $merge-options-ref
}
let $properties := $docs/*:envelope/*:headers/node()[fn:node-name(.) eq $top-level-property]
let $raw-values :=
for $property in $properties
let $doc-uri := merge-impl:get-uri-for-property($property, $content-objects)
where fn:exists($doc-uri)
return
let $prop-sources := map:get($sources-by-document-uri, $doc-uri)
return merge-impl:wrap-revision-info($top-level-property, $property, $prop-sources, (), ())
where fn:exists($raw-values)
return
prop-def:new()
=> prop-def:with-algorithm-info($algorithm-info)
=> prop-def:with-path('/es:envelope/es:headers/' || $local-name)
=> prop-def:with-namespaces($ns-map)
=> prop-def:with-extensions(
fn:fold-left(
function($cumulative, $map) {
$cumulative + $map
},
map:map(),
if (fn:exists($algorithm)) then
merge-impl:execute-algorithm(
$algorithm,
$top-level-property,
$raw-values,
$merge-rule
)
else
merge-impl:standard(
$top-level-property,
$raw-values,
$merge-rule
)
)
),
for $header-merge-rule-info in $header-merge-rules-info
let $algorithm-name := fn:string($header-merge-rule-info => map:get("mergeAlgorithmName"))
let $algorithm := $header-merge-rule-info => map:get("mergeAlgorithm")
let $is-javascript := util-impl:function-is-javascript($algorithm)
let $algorithm-info :=
object-node {
"name": fn:head(($algorithm-name[fn:exists($algorithm)], "standard")),
"optionsReference": $merge-options-ref
}
let $merge-rule := $header-merge-rule-info => map:get("mergeRule")
let $merge-rule := util-impl:convert-node-for-function($merge-rule, $is-hub-central-format, $is-javascript, merge-impl:propertyspec-to-json#1, merge-impl:propertyspec-to-xml(?, xs:QName("merging:merge")))
let $raw-values := merge-impl:get-raw-values($content-objects, $header-merge-rule-info, $sources-by-document-uri)
return
if (fn:exists($raw-values)) then
prop-def:new()
=> prop-def:with-algorithm-info($algorithm-info)
=> prop-def:with-namespaces($ns-map)
=> prop-def:with-values(
(: get the merged values :)
if (fn:exists($algorithm)) then
merge-impl:execute-algorithm(
$algorithm,
map:get(fn:head($raw-values), "name"),
$raw-values,
$merge-rule
)
else
merge-impl:standard(
map:get(fn:head($raw-values), "name"),
$raw-values,
$merge-rule
)
)
=> prop-def:with-path($header-merge-rule-info => map:get("path"))
else ()
)
};
(:
Given a property, find the content object whose document contains the property, and then return the associated URI.
This technique is used to support both in-memory documents and documents loaded from the database.
:)
declare private function merge-impl:get-uri-for-property($property, $content-objects) as xs:string?
{
let $doc := fn:root($property)
let $content-object := $content-objects[map:get(., "value") is $doc]
where fn:exists($content-object)
return map:get($content-object, "uri")
};
(:~
: Build a sequence of triples
:
: NOTE that unlike how other algorithms are configured,
: the element refers directly to the
: @at, @namespace, @function params. This is because there will only
: be 1 triple merge function.
:
: @param $merge-options an element or object containing the merge options
: @param $docs the source documents the header values will be drawn from
: @param $sources information about the source of the header data
: @return sequence of sem:triples
:)
declare function merge-impl:build-final-triples(
$compiled-merge-options as map:map,
$docs,
$sources
) as sem:triple*
{
let $is-hub-central-format := $compiled-merge-options => map:get("isHubCentralFormat")
let $merge-options := $compiled-merge-options => map:get("mergeOptionsNode")
let $triple-merge := fn:head($merge-options/(merging:triple-merge|tripleMerge))
let $algorithm :=
fun-ext:function-lookup(
fn:string(fn:head(($triple-merge/(@function|function), "standard-triples"))),
fn:string($triple-merge/(@namespace|namespace)),
fn:string($triple-merge/(@at|at)),
merge-impl:default-function-lookup(?, 4)
)
let $is-javascript := util-impl:function-is-javascript($algorithm)
let $merge-options := util-impl:convert-node-for-function($merge-options, $is-hub-central-format, $is-javascript, merge-impl:options-to-json#1, merge-impl:options-from-json#1)
let $triple-merge := util-impl:convert-node-for-function($triple-merge, $is-hub-central-format, $is-javascript, merge-impl:propertyspec-to-json#1, merge-impl:propertyspec-to-xml(?, xs:QName('merging:triple-merge')))
return
xdmp:apply(
$algorithm,
$merge-options,
$docs,
$sources,
$triple-merge)
};
(:~
: Identify and merge any headers whose paths are given in the merge options.
: @param $content-objects DHF content wrappers around the source documents
: @param $property the property specification, which includes the path to
: look for source values
: @param $sources structure reflecting the origin of the data
: @param $ns-map maps from namespace prefixes to namespace URIs
: @return a sequence of maps, one for each value of this property found in a
: source document
:)
declare function merge-impl:get-raw-values(
$content-objects,
$merge-rule-info as map:map,
$sources-by-document-uri as map:map
) as map:map*
{
let $path := $merge-rule-info => map:get("path")
for $content-object in $content-objects
let $doc := map:get($content-object, "value")
let $values := ($merge-rule-info => map:get("documentToValuesFunction"))($doc)
let $curr-uri := (xdmp:node-uri($doc), map:get($content-object, "uri"))[1]
let $prop-sources := map:get($sources-by-document-uri, $curr-uri)
let $res :=
if (fn:exists($values)) then
merge-impl:wrap-revision-info(
fn:node-name(fn:head($values)),
$values,
$prop-sources,
(), ()
)
else ()
return
$res
};
(:
: Get instance property values by following the configured path.
: @param $instances all instance values from the source documents
: @param $path-prop full path to property
: @param $ns-map map of namespaces for interpreting the path
: @return sequence: a QName, followed by values (elements or JSON properties)
:)
declare function merge-impl:get-instance-props-by-path(
$instances,
$path-prop as element(merging:property),
$ns-map as map:map
)
{
(: Remove /es:envelope/es:instance/{top property name}, because we'll evaluate against the instance property :)
let $inst-path := merge-impl:strip-top-path($path-prop/@path, (), (), fn:false())
let $parts := fn:tokenize($inst-path, "/")
(: We'll grab the node above our target so that we determine whether it's an array :)
let $middle-path := fn:string-join($parts[fn:position() != fn:last()], "/")
let $target-name-str := $parts[fn:last()]
let $target-name := fn:QName(map:get($ns-map, fn:replace($target-name-str, ":.*", "")), $target-name-str)
return (
$target-name,
for $instance in $instances
return
(: The value will be an XML element, a string (if target is a JSON property), or a JSON array :)
xdmp:unpath($middle-path, $ns-map, $instance)/node()[fn:node-name(.) = $target-name]
)
};
(:
: Returns a sequence of map:maps, one for each top-level property. Each map has the following keys:
: - "algorithm" -- object-node with the name and optionsReference of the algorithm used for this property
: - "sources" -- one or more object-nodes indicating which of the original docs the surviving value(s) came from
: - "values" -- the surviving property values
: - "path" -- if the property was specified by a path, the XPath expression
: - "nsMap" -- if the property was specified by a path, a map of (prefix -> namespace)
:)
declare function merge-impl:build-final-properties(
$compiled-merge-options as map:map,
$instances,
$content-objects,
$sources-by-document-uri as map:map
) as map:map*
{
let $is-json :=
let $docs := merge-impl:get-content-docs($content-objects)
return fn:exists($docs/(object-node()|array-node()))
return merge-impl:build-final-properties(
$compiled-merge-options,
$instances,
$content-objects,
$sources-by-document-uri,
$is-json
)
};
(:
: Returns a sequence of map:maps, one for each top-level property. Each map has the following keys:
: - "algorithm" -- object-node with the name and optionsReference of the algorithm used for this property
: - "sources" -- one or more object-nodes indicating which of the original docs the surviving value(s) came from
: - "values" -- the surviving property values
: - "path" -- if the property was specified by a path, the XPath expression
: - "nsMap" -- if the property was specified by a path, a map of (prefix -> namespace)
:)
declare function merge-impl:build-final-properties(
$compiled-merge-options as map:map,
$instances,
$content-objects,
$sources-by-document-uri as map:map,
$is-json
) as map:map*
{
let $entity-definition := $compiled-merge-options => map:get("targetEntityTypeDefinition")
let $namespaces-map := $compiled-merge-options => map:get("namespaces")
let $merge-options-ref := $compiled-merge-options => map:get("mergeOptionsRef")
let $merge-rules-info := $compiled-merge-options => map:get("mergeRulesInfo")
let $is-hub-central-format := $compiled-merge-options => map:get("isHubCentralFormat")
let $top-level-properties := $instances/*
let $top-level-qnames := fn:distinct-values($top-level-properties ! fn:node-name(.))
let $explicit-merges :=
for $merge-rule-info in $merge-rules-info[fn:not(map:contains(., "path") and fn:matches(map:get(., "path"), "^/[\w]*:?envelope/[\w]*:?headers/"))]
where fn:exists(map:get($merge-rule-info,"path")) or map:get($merge-rule-info, "propertyQName") = $top-level-qnames
return merge-impl:get-merge-values($merge-rule-info, $content-objects, $namespaces-map, $sources-by-document-uri, $merge-options-ref, $is-hub-central-format)
let $implicit-merges :=
if (fn:empty($entity-definition)) then
let $default-merge-rule-info := $compiled-merge-options => map:get("defaultMergeRuleInfo")
for $property-name in fn:distinct-values($top-level-properties ! fn:node-name(.))
where fn:empty($merge-rules-info[map:get(., "propertyQName") = $property-name])
return
let $property-instances := $top-level-properties[fn:node-name(.) eq $property-name]
return
merge-impl:get-merge-values(
$default-merge-rule-info
=> map:with("documentToValuesFunction", function($doc) {
$property-instances[fn:root(.) is $doc]
}),
$content-objects,
$namespaces-map,
$sources-by-document-uri,
$merge-options-ref,
$is-hub-central-format
)
else ()
return (
$explicit-merges,
$implicit-merges
)
};
(:~
: Executes the appropriate function for a merge rule.
: @param $merge-rule-info as map:map from the compiled merge options with relevant information about a merge rule
: @param $content-objects DHF content wrappers around documents
: @param $namespaces-map as map:map namespace prefixes mapped to their full URI
: @param $sources-by-document-uri as map:map maps document URIs to object-node()* with relevant source information (i.e., name, dateTime, documentUri)
: @param $merge-options-ref as xs:string A string identifier for merge options primarily used for provenance
: @return as map:map* . First map is the mapping from namespace prefixes
: to namespace URIs, as configured on the property-defs element. The
: rest of the maps are final header values.
:)
declare function merge-impl:get-merge-values(
$merge-rule-info as map:map,
$content-objects,
$namespaces-map as map:map,
$sources-by-document-uri as map:map,
$merge-options-ref as xs:string,
$is-hub-central-format as xs:boolean
) as map:map* {
let $property-name := $merge-rule-info => map:get("propertyName")
let $path := $merge-rule-info => map:get("path")
let $algorithm := $merge-rule-info => map:get("mergeAlgorithm")
let $is-javascript := util-impl:function-is-javascript($algorithm)
let $merge-rule := $merge-rule-info => map:get("mergeRule")
let $merge-rule := util-impl:convert-node-for-function($merge-rule, $is-hub-central-format, $is-javascript, merge-impl:propertyspec-to-json#1, merge-impl:propertyspec-to-xml(?, xs:QName("merging:merge")))
let $algorithm-name := $merge-rule-info => map:get("mergeAlgorithmName")
let $algorithm-info :=
object-node {
"name": $algorithm-name,
"optionsReference": $merge-options-ref
}
let $instance-props-by-root-id := map:map()
let $document-to-values-function := $merge-rule-info => map:get("documentToValuesFunction")
let $instance-props :=
for $doc in merge-impl:get-content-docs($content-objects)
let $instance-properties := $document-to-values-function($doc)
return (
map:put($instance-props-by-root-id, fn:generate-id($doc), $instance-properties),
$instance-properties
)
where fn:exists($instance-props)
return
let $wrapped-properties :=
for $content-object at $pos in $content-objects
let $doc := map:get($content-object, "value")
let $generate-id := fn:generate-id($doc)
for $prop-value in map:get($instance-props-by-root-id, $generate-id)
let $prop-qname := fn:node-name($prop-value)
let $prop-sources :=
let $uri := (merge-impl:node-uri($doc), map:get($content-object, "uri"))[1]
return map:get($sources-by-document-uri, $uri)
let $ns-map := fn:head(($merge-rule-info => map:get("namespaces"),$namespaces-map))
return
merge-impl:wrap-revision-info($prop-qname, $prop-value, $prop-sources, $path, $ns-map)
=> prop-def:with-algorithm-info($algorithm-info)
=> prop-def:with-retain-array(fn:exists($instance-props/parent::array-node()))
let $prop-qname := fn:head($wrapped-properties) ! map:get(., "name")
let $merged-values :=
if (fn:exists($algorithm)) then
merge-impl:execute-algorithm(
$algorithm,
$prop-qname,
$wrapped-properties,
$merge-rule
)
else
merge-impl:standard(
$prop-qname,
$wrapped-properties,
$merge-rule
)
return (
if (xdmp:trace-enabled($const:TRACE-MERGE-RESULTS)) then
xdmp:trace($const:TRACE-MERGE-RESULTS, 'Processing merge rule: ' || $property-name || '
merge rule details: '|| xdmp:to-json-string($merge-rule) || '
merge values details: ' || xdmp:to-json-string($merged-values))
else (),
$merged-values
)
};
(: Trust xdmp:node-uri over fn:base-uri, but we use base uri for some merging
: of in-memory constructed nodes.
:)
declare function merge-impl:node-uri($node as node()?)
as xs:string?
{
$node ! fn:head((xdmp:node-uri(.), fn:base-uri(.), fn:root(.)/text("$baseUri")))
};
(:
: Create maps to connect a property's name, values, and sources.
: @param $property-name XML element or JSON property name
: @param $properties XML elements or JSON properties corresponding to ES instance properties
: @param $sources information pulled from the source document headers
: @param $path an XPath to a property
: @param $ns-map namespace map for the $path
: @return sequence of maps
:)
declare function merge-impl:wrap-revision-info(
$property-name as xs:QName,
$properties as item()*,
$sources as item()*,
$path as xs:string?,
$ns-map as map:map?
) as map:map*
{
merge-impl:wrap-revision-info-with-extensions(
$property-name,
$properties,
$sources,
if (fn:exists($path)) then map:new((
map:entry("path", $path),
map:entry("nsMap", $ns-map)
))
else ()
)
};
(:
: Create maps to connect a property's name, values, and sources with extension ability.
: @param $property-name XML element or JSON property name
: @param $properties XML elements or JSON properties corresponding to ES instance properties
: @param $sources information pulled from the source document headers
: @param $extension map for any additional details that need to be associated with a property
: @return sequence of maps
:)
declare function merge-impl:wrap-revision-info-with-extensions(
$property-name as xs:QName,
$properties as item()*,
$sources as item()*,
$extensions as map:map?
) as map:map*
{
for $prop in $properties
return
prop-def:new()
=> prop-def:with-name($property-name)
=> prop-def:with-sources($sources)
=> prop-def:with-values($prop)
=> prop-def:with-extensions($extensions)
};
(: Compare all keys and values between two maps :)
declare function merge-impl:objects-equal($object1 as map:map, $object2 as map:map) as xs:boolean
{
merge-impl:objects-equal-recursive($object1, $object2)
};
(:
: Compare JSON data for equality.
:)
declare function merge-impl:objects-equal-recursive($object1, $object2)
{
typeswitch($object1)
case map:map return
let $k1 := map:keys($object1)
let $k2 := map:keys($object2)
let $counts-equal := fn:count($k1) eq fn:count($k2)
let $maps-equal :=
for $key in map:keys($object1)
let $v1 := map:get($object1, $key)
let $v2 := map:get($object2, $key)
return
merge-impl:objects-equal-recursive($v1, $v2)
return $counts-equal and fn:not($maps-equal = fn:false())
case json:array return
let $counts-equal := fn:count($object1) = fn:count($object2)
let $items-equal :=
let $o1 := json:array-values($object1)
let $o2 := json:array-values($object2)
for $item at $i in $o1
return
merge-impl:objects-equal-recursive($item, $o2[$i])
return
$counts-equal and fn:not($items-equal = fn:false())
default return
$object1 = $object2
};
(:
: Apply a merge algorithm to a set of properties in order to determine the
: property values to be used in a merged document.
: @param $algorithm function that will determine the merged values
: @param $property-name QName of the property
: @param $properties value and source data from the source documents
: @param $property-spec configuration for how this property should be merged
:)
declare function merge-impl:execute-algorithm(
$algorithm as xdmp:function,
$property-name as xs:QName,
$properties as map:map*,
$property-spec as item()?
)
{
xdmp:trace($const:TRACE-MERGE-RESULTS, "Calling function at '" || xdmp:function-module($algorithm) || "' " || xdmp:describe($algorithm, (),())),
let $is-javascript := util-impl:function-is-javascript($algorithm)
let $properties := if ($is-javascript) then json:to-array($properties) else $properties
let $results := xdmp:apply($algorithm, $property-name, $properties, $property-spec)
return merge-impl:normalize-javascript-results($results)
};
(:
: Normalize the results of JavaScript merge function.
: @param $results output of a merge JavaScript function
:)
declare function merge-impl:normalize-javascript-results(
$results as item()*
) {
xdmp:trace($const:TRACE-MERGE-RESULTS, "Normalizing JavaScript results: " || xdmp:describe($results, (), ())),
let $results-sequence :=
if ($results instance of json:array) then
json:array-values($results)
else
$results
for $result in $results-sequence
let $values := map:get($result, "values")
return
if (fn:exists($values[fn:not(. instance of node())])) then
map:new((
$result,
map:entry("values", merge-impl:normalize-json-to-nodes(map:get($result, "name"), $values))
))
else
$result
};
(:
: Normalize the values to nodes of JavaScript merge function.
: @param $results output of a merge JavaScript function
:)
declare function merge-impl:normalize-json-to-nodes(
$prop-name as xs:QName,
$values as item()*
) {
for $value in $values
return
if ($value instance of node()) then
$value
else if ($value instance of json:array or $value instance of json:object or $value instance of map:map) then
xdmp:to-json($value)/node()
else
object-node { $prop-name: $value }/node()
};
declare variable $documents-archived-in-transaction := map:map();
declare function merge-impl:archive-document($uri as xs:string, $merge-options as node()?)
{
merge-impl:lock-for-update($uri),
if (map:contains($documents-archived-in-transaction, $uri)) then ()
else
(: If we're archiving a merged document, we want to only retain the collections specifically for merged and archived
and drop collections carried over by the documents merged into it.
:)
let $compiled-merge-options := merge-impl:compile-merge-options($merge-options)
let $is-merged-doc := fn:starts-with($uri,$MERGED-DIR)
let $doc-collections := if ($is-merged-doc) then map:map() else map:entry($uri, xdmp:document-get-collections($uri))
return
map:put(
$documents-archived-in-transaction,
$uri,
(
xdmp:document-set-collections(
$uri,
(
if ($is-merged-doc) then (
let $on-no-match-collections := coll-impl:on-no-match(
$doc-collections,
$compiled-merge-options => map:get("onNoMatch")
)
let $on-merge-collections := coll-impl:on-merge(
$doc-collections,
$compiled-merge-options => map:get("onMerge")
)
(: Exclude any overlap of on-merge with on-no-match :)
return $on-merge-collections[fn:not(. = $on-no-match-collections)]
) else (),
coll-impl:on-archive(
$doc-collections,
$compiled-merge-options => map:get("onArchive")
)
)
),
fn:true()
)
)
};
declare function merge-impl:NCName-compatible($value as xs:string)
{
helper:NCName-compatible($value)
};
declare variable $_to-decoded-NCName as map:map := map:map();
declare function merge-impl:NCName-compatible-reverse($value as xs:string)
{
if (map:contains($_to-decoded-NCName, $value)) then
map:get($_to-decoded-NCName, $value)
else
let $decoded-value := fn:head((try {xdmp:decode-from-NCName($value)} catch * {()}, $value))
return (
map:put($_to-decoded-NCName, $value, $decoded-value),
$decoded-value
)
};
(: Prefix for locking to identify the task being done on the URI :)
declare variable $lock-task-prefix as xs:string := "sm-merging:";
declare variable $locked-uris-map as map:map := map:map();
(:
: This attempts to view URIs have been locked by other mastering processes.
: Only one attempt is made and the results are cached to avoid too much network noise in a cluster.
:)
declare function merge-impl:locked-uris() {
if (map:contains($locked-uris-map, "runAlready")) then
$locked-uris-map
else
let $transaction-id := xdmp:transaction()
let $locked-uris := fn:distinct-values(
for $host-id in xdmp:hosts()
let $check-transactions := xdmp:host-status($host-id)/host:transactions/host:transaction[host:transaction-mode = "update"]/host:transaction-id[. ne $transaction-id]
for $check-transaction in $check-transactions
(: Invoking to another transaction in query mode to avoid deadlocking :)
return xdmp:invoke-function(function() {merge-impl:locked-uris($host-id, $check-transaction)}, map:map() => map:with("update","false"))
)
return (
map:put($locked-uris-map, "runAlready", fn:true()),
$locked-uris ! map:put($locked-uris-map, ., fn:true()),
$locked-uris-map
)
};
(:
: This returns the write/waiting locks that were created by mastering for a given host/transaction pair.
: This is a separate overloaded function to retain the amp privileges after an invoke.
: @param $host-id ID of host we're looking at transaction locks of
: @param $transaction-id ID of transaction we're looking at transaction locks of
:)
declare function merge-impl:locked-uris($host-id, $transaction-id) {
fn:distinct-values(
(: transaction may have closed between getting it from the host status and now looking for locks :)
try {
xdmp:transaction-locks($host-id, $transaction-id)/(host:waiting|host:write)[fn:starts-with(., $lock-task-prefix)]/fn:substring-after(., $lock-task-prefix)
} catch * {()}
)
};
declare function merge-impl:filter-out-locked-uris($uris) {
let $locked-uris := merge-impl:locked-uris()
let $filtered-uris := $uris[fn:not(map:contains($locked-uris, .))]
return $filtered-uris
};
declare function merge-impl:is-uri-locked($uri as xs:string) as xs:boolean {
map:contains(merge-impl:locked-uris(), $uri)
};
(: Don't want to trigger static analysis for our separate read-only operations :)
declare variable $lock-for-update-fun := fn:function-lookup(xs:QName('xdmp:lock-for-update'), 1);
declare variable $locked-in-this-transaction as map:map := map:map();
declare function merge-impl:lock-for-update($uri as xs:string) {
if (fn:not(map:contains($locked-in-this-transaction, $uri))) then (
map:put($locked-in-this-transaction, $uri, fn:true()),
if (fn:not(merge-impl:is-uri-locked($uri))) then
map:put(merge-impl:locked-uris(), $uri, fn:true())
else (),
(: This is to tell transactions outside mastering we are working with this document :)
$lock-for-update-fun($uri),
(: Below is to identify locks specific to mastering :)
$lock-for-update-fun($lock-task-prefix || $uri)
) else ()
};
(:
Used to ensure consistent ordering of returned docs, as opposed to using the "!" operator.
:)
declare private function merge-impl:get-content-docs($content-objects) {
for $c in $content-objects
return map:get($c, "value")
};
© 2015 - 2024 Weber Informatics LLC | Privacy Policy