ml-modules.root.com.marklogic.smart-mastering.impl.util.xqy Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of marklogic-data-hub Show documentation
Show all versions of marklogic-data-hub Show documentation
Library for Creating an Operational Data Hub on MarkLogic
(:
Copyright (c) 2021 MarkLogic Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
:)
xquery version "1.0-ml";
(:
: This is an implementation library, not an interface to the Smart Mastering functionality.
:
: This library contains functions that are shared accross matching/merging
:)
module namespace util-impl = "http://marklogic.com/smart-mastering/util-impl";
import module namespace const = "http://marklogic.com/smart-mastering/constants"
at "/com.marklogic.smart-mastering/constants.xqy";
import module namespace es-helper = "http://marklogic.com/smart-mastering/entity-services"
at "/com.marklogic.smart-mastering/sm-entity-services.xqy";
import module namespace httputils = "http://marklogic.com/data-hub/http-utils"
at "/data-hub/5/impl/http-utils.xqy";
declare namespace es = "http://marklogic.com/entity-services";
declare variable $write-objects-by-uri as map:map := map:map();
declare function util-impl:add-all-write-objects(
$write-objects as map:map*)
as map:map?
{
for $write-object in $write-objects
return map:put($write-objects-by-uri, $write-object => map:get("uri"), $write-object),
$write-objects-by-uri
};
declare function util-impl:retrieve-write-object(
$uri as xs:string)
as map:map?
{
if (map:contains($write-objects-by-uri, $uri)) then
$write-objects-by-uri
=> map:get($uri)
else
let $write-obj := util-impl:build-write-object-for-doc(fn:doc($uri))
return (
map:put($write-objects-by-uri, $uri, $write-obj),
$write-obj
)
};
declare function util-impl:build-write-object-for-doc($doc as document-node())
as map:map
{
map:new((
map:entry("uri", xdmp:node-uri($doc)),
map:entry("value", $doc),
map:entry("context", map:new((
map:entry("collections", xdmp:node-collections($doc)),
map:entry("metadata", xdmp:node-metadata($doc)),
map:entry("permissions", xdmp:node-permissions($doc, "objects"))
)))
))
};
declare function util-impl:adjust-collections-on-document(
$uri as xs:string,
$collection-function as function(map:map) as xs:string*)
{
let $write-object := util-impl:retrieve-write-object($uri)
let $write-context := $write-object => map:get("context")
let $current-collections := $write-context => map:get("collections")
let $new-collections := $collection-function(map:entry($uri, $current-collections))
let $_set-collections := $write-context => map:put("collections", $new-collections)
return (
if (xdmp:trace-enabled($const:TRACE-MERGE-RESULTS)) then
xdmp:trace($const:TRACE-MERGE-RESULTS, "Setting collections to URI '"|| $uri ||"': " || fn:string-join($new-collections, ","))
else (),
$write-object
)
};
declare function util-impl:combine-maps($base-map as map:map, $maps as map:map*)
as map:map
{
fn:fold-left(function($map1,$map2) {
$map1 + $map2
}, $base-map, $maps)
};
(: Given a set of rules and the :)
declare function util-impl:properties-to-values-functions(
$rules as node()*,
$property-definitions as node()?,
$entity-type-iri as xs:string?,
$return-all-properties as xs:boolean,
$message-output as map:map?)
as map:map
{
let $xpath-namespaces :=
if (fn:exists($property-definitions/namespaces)) then
xdmp:from-json($property-definitions/namespaces)
else
map:new(
for $ns in $property-definitions/namespace::node()
let $localname := fn:local-name($ns)
where $localname ne ""
return map:entry($localname, fn:string($ns))
)
let $entity-property-info :=
if (fn:exists($entity-type-iri)) then
es-helper:get-entity-property-info($entity-type-iri)
else
map:map()
let $distinct-properties :=
fn:distinct-values((
$property-definitions/(*:property|*:properties)/(@name|name) ! fn:string(.),
if ($return-all-properties) then
map:keys($entity-property-info)
else (
$rules/(@property-name|propertyName),
$rules/entityPropertyPath
),
$rules/documentXPath
))
return map:new(
for $property-name in $distinct-properties
let $entity-property-info := $entity-property-info => map:get($property-name)
let $property-definition := $property-definitions/(*:property|properties)[(@name|name) = $property-name]
let $document-xpath-rule := fn:head(($property-definition[@path|path], $rules[documentXPath eq $property-name]))
let $function :=
if (fn:exists($entity-property-info)) then
(: optimization for top-level properties :)
if (fn:contains($property-name, ".")) then
let $xpath := fn:substring-after($entity-property-info => map:get("pathExpression"), "/(es:envelope|envelope)/(es:instance|instance)/")
let $namespaces := $entity-property-info => map:get("namespaces")
return
function($document) {
xdmp:unpath($xpath, $namespaces, $document/(es:envelope|envelope)/(es:instance|instance))
}
else
let $entity-title := $entity-property-info => map:get("entityTitle")
let $property-title := $entity-property-info => map:get("propertyTitle")
let $namespace := $entity-property-info => map:get("namespace")
let $entity-qname := fn:QName(fn:string($namespace), fn:string($entity-title))
let $qname := fn:QName(fn:string($namespace), fn:string($property-title))
return
function($document) {
let $is-json := (xdmp:node-kind($document) = "object" or fn:exists($document/(object-node()|array-node())))
let $entity-qname:=
if($is-json)
then
fn:QName("", fn:string($entity-title))
else
$entity-qname
let $qname:=
if($is-json)
then
fn:QName("", fn:string($property-title))
else
$qname
return $document/(es:envelope|envelope)/(es:instance|instance)/*[fn:node-name(.) eq $entity-qname]/*[fn:node-name(.) eq $qname]
}
else if (fn:exists($document-xpath-rule)) then
let $xpath := fn:head(($document-xpath-rule/(@path|path),$property-name))
let $namespaces := fn:head(($document-xpath-rule/namespaces ! xdmp:from-json(.), $xpath-namespaces))
return
function($document) {
xdmp:unpath($xpath, $namespaces, $document)
}
else if (fn:exists($property-definition)) then
let $qname := fn:QName(fn:string($property-definition/(@namespace|namespace)), fn:string($property-definition/(@localname|localname)))
return
function($document) {
$document/(es:envelope|envelope)/(es:instance|instance)/(descendant::* except (es:info|info)/descendant-or-self::*)/*[fn:node-name(.) eq $qname]
}
else
util-impl:handle-option-messages("error", "Property information for '" || $property-name || "'" || (if (fn:exists($entity-type-iri)) then " entity <"||$entity-type-iri ||">" else "") || " not found!", $message-output)
let $property-name := fn:head(($document-xpath-rule/(@path|path) ! fn:string(.),$property-name))
return
map:entry($property-name, $function)
)
};
declare function util-impl:handle-option-messages($type as xs:string, $message as xs:string, $messages-output as map:map?)
as empty-sequence()
{
if (fn:exists($messages-output)) then (
map:put($messages-output, $type, (map:get($messages-output, $type),$message))
) else if ($type eq "error") then
httputils:throw-bad-request((), $message)
else
xdmp:log($message, $type)
};
(:
:
: @param $options as node()? match or merge options as object-node() or element()
: @return map:map {
: "targetEntityType" The reference to the entity type in the options, either IRI or title
: "targetEntityTypeDefinition" Object with entity type defintion information
: "targetEntityTypeIRI" The full IRI for the entity type
: }
:)
declare function util-impl:get-entity-type-information($options as node()?) {
let $target-entity-type := fn:head(($options/targetEntityType,$options/(*:target-entity|targetEntity))) ! fn:string(.)
let $target-entity-type-def := es-helper:get-entity-def($target-entity-type)
let $target-entity-type-iri := $target-entity-type-def/entityIRI ! fn:string(.)
return
map:entry("targetEntityType", $target-entity-type)
=> map:with("targetEntityTypeDefinition", $target-entity-type-def)
=> map:with("targetEntityTypeIRI", $target-entity-type-iri)
};
(:
: @param $node as node()
: @param $is-hub-central-format as xs:boolean
: @param $is-function-javascript as xs:boolean
: @param $conversion-to-json-function as xs:boolean
: @param $conversion-to-xml-function as xs:boolean
: @return item()
:)
declare function util-impl:convert-node-for-function(
$node as node()?,
$is-hub-central-format as xs:boolean,
$is-function-javascript as xs:boolean,
$conversion-to-json-function as function(item()) as item()*,
$conversion-to-xml-function as function(item()) as item()*
) as item()? {
if (fn:empty($node)) then
()
else if ($is-hub-central-format) then
if ($is-function-javascript) then
xdmp:from-json($node)
else
$node
else
if ($is-function-javascript) then
typeswitch($node)
case element() return
$conversion-to-json-function($node)
case object-node() return
xdmp:from-json($node)
default return
()
else
typeswitch($node)
case element() return
$node
case object-node() return
$conversion-to-xml-function(xdmp:from-json($node))
default return
()
};
(:
: Determines if a function is JavaScript
: @param $fun as xdmp:function
: @return xs:boolean
:)
declare function util-impl:function-is-javascript(
$fun as xdmp:function?
) as xs:boolean {
fn:exists($fun) and fn:ends-with(xdmp:function-module($fun), "js")
};
declare variable $SESSION_TIMEOUT := 600;
(:
: Ensures each item is run in set amount of time
: @param $fun as function(item())
: @param $items as item()*
: @return item()*
:)
declare function util-impl:process-items-in-set-time(
$fun,
$items as item()*,
$item-cost-fun,
$outlier-handler
) {
try {
for $item in $items
return $fun($item)
} catch ($e) {
if ($e/error:code = ("XDMP-EXTIME", "SVC-EXTIME")) then (
let $items-cost := $items ! $item-cost-fun(.)
let $high-outliers-cost := util-impl:determine-high-outliers($items-cost)
let $high-outlier-indexes := $high-outliers-cost ! fn:index-of($items-cost, .)
let $others := if (fn:count($high-outlier-indexes) eq 0) then () else $items[fn:not(fn:position() = $high-outlier-indexes)]
(: If there are no high outliers, than all items have a high cost and should be handled by our high outlier function :)
let $high-outliers := if (fn:empty($others)) then $items else $items[fn:position() = $high-outlier-indexes]
return (
util-impl:process-items-in-set-time(
$fun,
$others,
$item-cost-fun,
$outlier-handler
),
if (fn:exists($outlier-handler)) then
$outlier-handler($high-outliers)
else (),
for $outlier in $high-outliers
return
xdmp:log("Unable to process "|| xdmp:describe($outlier, (), ()) || " outlier item.", "warning")
)
) else
xdmp:rethrow()
}
};
(:
: Returns high outliers
: @param $items as item()*
: @return item()*
:)
declare function util-impl:determine-high-outliers(
$items as xs:unsignedLong*) {
let $ordered-items :=
for $i in $items
order by $i ascending
return $i
let $size := fn:count($items)
let $half := $size idiv 2
let $median-low := fn:avg(fn:subsequence($ordered-items, 1, $half))
let $median-high := fn:avg(fn:subsequence($ordered-items,$half + 1))
let $median-difference := $median-high - $median-low
let $outlier-threshold := $median-high + (1.5 * $median-difference)
return $items[. gt $outlier-threshold]
};
© 2015 - 2024 Weber Informatics LLC | Privacy Policy