All Downloads are FREE. Search and download functionalities are using the official Maven repository.

ml-modules.root.data-hub.5.impl.hub-entities.xqy Maven / Gradle / Ivy

There is a newer version: 6.1.1
Show newest version
(:
  Copyright (c) 2021 MarkLogic Corporation

  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

     http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License.
:)
xquery version "1.0-ml";

module namespace hent = "http://marklogic.com/data-hub/hub-entities";

import module namespace es = "http://marklogic.com/entity-services"
  at "/MarkLogic/entity-services/entity-services.xqy";

import module namespace ext = "http://marklogic.com/data-hub/extensions/entity"
  at "/data-hub/extensions/entity/post-process-search-options.xqy";


import module namespace tg = "http://marklogic.com/data-hub/hub-entities" at "/data-hub/5/impl/template-generated.xqy";

import module namespace sem = "http://marklogic.com/semantics" at "/MarkLogic/semantics.xqy";
import module namespace functx = "http://www.functx.com" at "/MarkLogic/functx/functx-1.0-nodoc-2007-01.xqy";
declare namespace search = "http://marklogic.com/appservices/search";
declare namespace tde = "http://marklogic.com/xdmp/tde";


declare variable $ENTITY-MODEL-COLLECTION := "http://marklogic.com/entity-services/models";

declare option xdmp:mapping "true";



declare function hent:get-model($entity-name as xs:string)
{
  hent:get-model($entity-name, ())
};

declare function hent:get-model($entity-name as xs:string, $used-models as xs:string*)
{
  let $model :=
    let $_ := fn:collection($ENTITY-MODEL-COLLECTION)[lower-case(info/title) = lower-case($entity-name)]
    return
      if (fn:count($_) > 1) then
        fn:head(fn:collection($ENTITY-MODEL-COLLECTION)[info/title = $entity-name])
      else
        $_
  where fn:exists($model)
  return
    let $model-map as map:map? := $model
    let $refs := $model//*[fn:local-name(.) = '$ref'][fn:starts-with(., "#/definitions")] ! fn:replace(., "#/definitions/", "")
    let $definitions := map:get($model-map, "definitions")
    let $_ :=
      for $ref in $refs[fn:not(. = $used-models)]
      let $m :=
        if (fn:empty(map:get($definitions, $ref))) then
          let $other-model as map:map? := hent:get-model($ref, ($used-models, $entity-name))
          let $other-defs := map:get($other-model, "definitions")
          for $key in map:keys($other-defs)
          return
            map:put($definitions, $key, map:get($other-defs, $key))
        else ()
      return ()
    return $model-map
};

(:
See the comments on the uber-models(models) functions.
:)
declare function hent:uber-model() as map:map
{
  hent:uber-model(fn:collection($ENTITY-MODEL-COLLECTION)/object-node())
};

(:
This concept of an "uber model" dates back to DHF 4. It combines the entity definitions from every model into a single
entity model. However, this means that if there are two models that have an entity definition - such as "Address" - with
the same name but different config, only one will be in the returned model. This may lead to bugs, such as DHFPROD-7713.

This approach seems to be based on how QuickStart in DHF 4 and 5 would create a duplicate entity definition in some
scenarios. For example, if you had a Customer in QS and then added a property of type Address, QS would both add
Address to the Customer entity model, and it would also create an Address entity model with the same config. So QS was
already assuming that if you have two entity definitions with the same name in different models, they must have the
same config, as it was likely QS that did that.

If it turns out that users need to have multiple entity definitions with the same name but different config, this
function will of course need to be reworked to accomodate that.
:)
declare function hent:uber-model($models as object-node()*) as map:map
{
  map:new((
    map:entry("definitions", map:new((
      for $model as map:map in $models
      let $definitions := map:get($model, "definitions")
      where fn:exists($definitions)
      return
        for $entity-type-name in map:keys($definitions)
        return map:entry($entity-type-name, map:get($definitions, $entity-type-name))
    )))
  ))
};

declare function hent:is-tde-generation-enabled($entity-def as object-node()) as xs:boolean
{
  let $entity-def-map as map:map := $entity-def
  let $definitions := $entity-def-map => map:get("definitions")
  let $entity-title := $entity-def/info/title
  (: Check if TDE generation is enabled. If the property is not set or has any value other than true/'true', it enables TDE generation. :)
  let $primary-type-def := ($definitions => map:get($entity-title), $definitions => map:get(map:keys($definitions)[1]))[1]
  let $is-property-set := map:contains($primary-type-def, "tdeGenerationDisabled")
  let $property-value := xs:string(map:get($primary-type-def, "tdeGenerationDisabled"))
  let $tdes-enabled := not($is-property-set) or $property-value != "true"
  return $tdes-enabled
};




declare function hent:wrap-duplicates(
  $duplicate-map as map:map,
  $property-name as xs:string,
  $item as element()
) as item()
{
  if (map:contains($duplicate-map, $property-name))
  then
    comment { "This item is a duplicate and is commented out so as to create a valid artifact.
",
    xdmp:quote($item),
    "
"
    }
  else (
    map:put($duplicate-map, $property-name, true()),
    $item)
};

(:
  this method doctors the output from ES
  because of https://github.com/marklogic/entity-services/issues/359
:)
declare function hent:fix-options($nodes as node()*)
{
  for $n in $nodes
  return
    typeswitch($n)
      case element(search:options) return
        element { fn:node-name($n) } {
          $n/namespace::node(),
          $n/@*,
          
            
          ,
          
            
              
            
          ,
          
            
              
            
          ,
          
            
              
            
          ,
          hent:fix-options($n/node())
        }
      case element(search:additional-query) return ()
      case element(search:return-facets) return true
      case element(search:path-index) return fix-path-index($n)
      case element() return
        element { fn:node-name($n) } {
          $n/namespace::node(),
          $n/@*,
          hent:fix-options($n/node()),

          let $is-range-constraint := $n[self::search:range] and $n/..[self::search:constraint]
          where $is-range-constraint and fn:not($n/search:facet-option[starts-with(., "limit=")])
          return limit=25
        }
      case text() return
        fn:replace($n, "es:", "*:")
      default return $n
};

declare %private function hent:fix-options-for-explorer(
  $nodes as node()*,
  $sortable-properties as map:map,
  $entity-namespace-map as map:map
)
{
  for $n in $nodes
  return
    typeswitch($n)
      case element(search:options) return
        element { fn:node-name($n) } {
          $n/namespace::node(),
          $n/@*,
          build-static-explorer-constraints(),
          hent:build-sort-operator($sortable-properties, $entity-namespace-map),
          hent:fix-options-for-explorer($n/node(), $sortable-properties, $entity-namespace-map),
          
            30
            4
            200
          
        }
      case element(search:constraint) return
        let $container-for-entity-property-generated-by-es := $n/search:container
        where fn:not($container-for-entity-property-generated-by-es)
        return
          element {fn:node-name($n)} {
            $n/@* ! attribute {fn:node-name()} { fn:replace(., "/", ".")},
            let $path-expression := fix-path-expression(fn:string($n/search:range/search:path-index))
            let $search-range-node := $n/search:range
            let $is-sortable-only :=
              let $sort-info := map:get($sortable-properties, $path-expression)
              return
                if (fn:exists($sort-info)) then map:get($sort-info, "is-sortable-only") = fn:true()
                else fn:false()
            return
              if (fn:empty($search-range-node) or fn:not($is-sortable-only)) then
                hent:fix-options-for-explorer($n/node(), $sortable-properties, $entity-namespace-map)
              else
                element {fn:node-name($search-range-node)} {
                  $search-range-node/attribute()[not(name() = 'facet')],
                  attribute facet {"false"},
                  hent:fix-options-for-explorer($search-range-node, $sortable-properties, $entity-namespace-map)/node()
                }
          }
      case element(search:additional-query) return ()
      case element(search:return-facets) return true
      (: HubCentral doesn't have any need for extracted data :)
      case element(search:extract-document-data) return ()
      case element(search:transform-results) return 
      case element(search:path-index) return fix-path-index($n)
      case element() return
        element { fn:node-name($n) } {
          $n/namespace::node(),
          $n/@*,
          hent:fix-options-for-explorer($n/node(), $sortable-properties, $entity-namespace-map),

          let $is-range-constraint := $n[self::search:range] and $n/..[self::search:constraint]
          where $is-range-constraint and fn:not($n/search:facet-option[starts-with(., "limit=")])
          return (
            limit=25,
            frequency-order,
            descending)
        }
      case text() return
        fn:replace($n, "es:", "*:")
      default return $n
};

declare function is-explorer-constraint-name($name as xs:string?) as xs:boolean
{
  let $default-constraint-names := es:search-options-generate(map:map())/search:constraint/@name/fn:string()
  let $explorer-constraint-names := build-static-explorer-constraints()/@name/fn:string()
  return $name = ($default-constraint-names, $explorer-constraint-names)
};

(:
Defined in a separate function so that these can be referenced when validating entity names.
:)
declare private function build-static-explorer-constraints() as element(search:constraint)+
{
  (: This wrapper element is used to avoid repeating the "search:" prefix over and over :)
  
    
      
        limit=25
        frequency-order
        descending
      
    
    
      
        
      
    
    
      
        
      
    
    
      
        
      
    
    
      
        
        limit=25
        frequency-order
        descending
      
    
    
      
        
      
    
    
      
        
      
    
    
      
        
      
    
    
      
        
        limit=25
        frequency-order
        descending
      
    
    
      
        
        limit=25
        frequency-order
        descending
      
    
    
      
        
        limit=25
        frequency-order
        descending
      
    
    
      
        
      
    
  /element()
};

declare %private function hent:build-sort-operator(
  $sortable-properties as map:map,
  $entity-namespace-map as map:map
) as element(search:operator)?
{
  let $states :=
    for $path-expression in map:keys($sortable-properties)
    let $sort-info := map:get($sortable-properties, $path-expression)
    let $indexable-datatype := hent:get-indexable-datatype(map:get($sort-info, "property-datatype"))
    let $state-name-prefix := fn:concat(map:get($sort-info, "entity-title"), "_", map:get($sort-info, "property-name"))
    for $direction in ("ascending", "descending")
    return
      
        
          {
            element search:path-index {
              attribute {"xmlns:es"} {"http://marklogic.com/entity-services"},
              for $prefix in map:keys($entity-namespace-map)
              return attribute {"xmlns:" || $prefix} {map:get($entity-namespace-map, $prefix)},
              $path-expression
            }
          }
        
      
  where $states
  return {$states}
};

(:
Returns an indexable scalar data type for ES logical datatype.
:)
declare function hent:get-indexable-datatype($datatype as xs:string) as xs:string
{
    switch ($datatype)
    case "boolean" return "string"
    case "iri" return "string"
    case "byte" return "int"
    case "short" return "int"
    case "unsignedShort" return "unsignedInt"
    case "unsignedByte" return "unsignedInt"
    case "integer" return "decimal"
    case "negativeInteger" return "decimal"
    case "nonNegativeInteger" return "decimal"
    case "positiveInteger" return "decimal"
    case "nonPositiveInteger" return "decimal"
    default return $datatype
};

(:
Returns a map with an entry for each entity definition that defines namespace and namespacePrefix, with the
key of each entry being the prefix. It's public so unit tests can work against it.
:)
declare function build-entity-namespace-map($uber-model)
{
  map:new(
    let $definitions := map:get($uber-model, "definitions")
    for $entity-name in map:keys($definitions)
    let $entity-type := map:get($definitions, $entity-name)
    let $ns := map:get($entity-type, "namespace")
    let $prefix := map:get($entity-type, "namespacePrefix")
    where $ns and $prefix
    return map:entry($prefix, $ns)
  )
};

declare function hent:dump-search-options($entities as json:array, $for-explorer as xs:boolean?)
{
  let $entity-model-map := hent:add-indexes-for-entity-properties($entities)
  let $sortable-properties := map:get($entity-model-map, "sortable-properties")
  let $uber-model :=
      let $entities := map:get($entity-model-map, "updated-models")
      return hent:uber-model(json:array-values($entities) ! xdmp:to-json(.)/object-node())
  return
    try {
      if ($for-explorer = fn:true()) then
        let $options := hent:fix-options-for-explorer(es:search-options-generate($uber-model), $sortable-properties, build-entity-namespace-map($uber-model))
        return ext:post-process-search-options($options)
      else
        hent:fix-options(es:search-options-generate($uber-model))
    } catch * {
      (: provide a validation error if the model is invalid or throw original error :)
      let  $_validate-model := es:model-validate($uber-model)
      return xdmp:rethrow()
    }
};

declare private function fix-path-index($path-index as element(search:path-index)) as element(search:path-index)
{
  element {fn:node-name($path-index)} {
    $path-index/namespace::node(),
    $path-index/@*,
    text {fix-path-expression($path-index/fn:string())}
  }
};

declare function hent:find-entity-identifiers(
    $all-uris as xs:string*,
    $entity-type as xs:string
) as map:map {
  let $entity-type-iri := sem:iri($entity-type)
  let $primary-key-defined := xdmp:exists(cts:search(fn:doc(), cts:triple-range-query($entity-type-iri, sem:iri('http://marklogic.com/entity-services#primaryKey'), ())))
  let $primary-keys := if ($primary-key-defined) then
    (: Optimize set to zero to avoid issue where query occausional doesn't return primary keys. See https://project.marklogic.com/jira/browse/DHFPROD-7388 :)
    let $results := sem:sparql('
      SELECT * WHERE {
        ?instanceIRI  ?entityTypeIRI;
            ?URI.
      }', map:entry("entityTypeIRI", $entity-type-iri), ("optimize=0"), cts:document-query($all-uris))
    let $primary-keys := map:new(
        let $entity-type-prefix := $entity-type || "/"
        for $result in $results
        let $uri := fn:string(map:get($result, "URI"))
        let $primary-key := fn:substring-after(fn:string(map:get($result, "instanceIRI")), $entity-type-prefix)
        return map:entry(
            $uri,
            if (fn:normalize-space($primary-key) eq "") then
              $uri
            else
              $primary-key
        )
      )
    let $_fill-in-missing :=
      for $uri in $all-uris
      where fn:not(map:contains($primary-keys, $uri))
      return
        map:put($primary-keys, $uri, $uri)
    return $primary-keys
  else
    map:new(
        $all-uris ! map:entry(., .)
    )
  return $primary-keys
};

(:
Fixes the path expression used by es:database-properties-generate and es:search-options-generate. Both are known to
return a path starting with "//es:instance/" but not including namespace prefixes/wildcards for the entity and property
names. This is instead replaced with our best attempt at a path that is functional and reasonably efficient.
:)
declare private function fix-path-expression($path as xs:string) as xs:string
{
  if (fn:starts-with($path, "//es:instance/")) then
    let $subpath := fn:substring($path, fn:string-length("//es:instance/") + 1)
    let $subpath-tokens := fn:tokenize($subpath, "/")
    return
      if (fn:contains($subpath-tokens[1], ":")) then
        "/es:envelope/es:instance/" || $subpath
      else
        "/(es:envelope|envelope)/(es:instance|instance)/" || $subpath
  else
    (: This is never expected to be reached, but if for some reason the ML function does not return a path
    starting with //es:instance, we don't want to mess with it :)
    $path
};

(:
Use hubEs.generateDatabaseProperties instead of this. This code is being kept here for now as there's a lot of
custom logic that would need to be rewritten in SJS. That is likely worth doing eventually since the output
of es:database-properties-generate is a JSON object, and it's of course easier to manipulate JSON in SJS vs XQuery.
:)
declare function hent:dump-indexes($entities as json:array) as document-node()
{
  let $updated-models := map:get(hent:add-indexes-for-entity-properties($entities), "updated-models")
  let $uber-model := hent:uber-model(json:array-values($updated-models) ! xdmp:to-json(.)/object-node())

  let $database-config := xdmp:from-json(es:database-properties-generate($uber-model))

  let $_ := add-entity-namespaces-to-path-namepaces($uber-model, $database-config)

  let $_ :=
    for $x in ("database-name", "schema-database", "triple-index", "collection-lexicon")
    return
      map:delete($database-config, $x)

  let $_ :=
    for $index in map:get($database-config, "range-path-index") ! json:array-values(.)
    let $path := map:get($index, "path-expression")
    return map:put($index, "path-expression", fix-path-expression($path))

  let $_ := remove-duplicate-range-indexes($database-config)
  return xdmp:to-json($database-config)
};

(:
Regardless of whether there are any path expressions that use an entity definition's namespace, we ensure that
such namespaces and their prefixes are added as path namespaces to workaround a bug in the Manage API where
indexes that do use the prefixes cannot be removed unless the prefixes are defined in path-namespaces.
:)
declare private function hent:add-entity-namespaces-to-path-namepaces($uber-model, $database-config)
{
  let $path-namespaces := map:get($database-config, "path-namespace")
  let $path-namespaces :=
    (: This is expected to always define 'es', but just in case, we ensure it's an array :)
    if (fn:not($path-namespaces)) then
      let $array := json:array()
      let $_ := map:put($database-config, "path-namespace", $array)
      return $array
    else $path-namespaces

  let $already-defined-prefixes :=
    for $ns in json:array-values($path-namespaces)
    return map:get($ns, "prefix")

  let $entity-namespace-map := build-entity-namespace-map($uber-model)
  let $_ :=
    for $prefix in map:keys($entity-namespace-map)
    where fn:not($prefix = $already-defined-prefixes)
    return json:array-push($path-namespaces, map:new((
      map:entry("prefix", $prefix),
      map:entry("namespace-uri", map:get($entity-namespace-map, $prefix))
    )))

  return ()
};

(:
es:database-properties-generate will generate duplicate range indexes when e.g. two entities have properties with the
same name and namespace and are both configured to have range indexes. This function removes duplicates, where
duplicates are considered to have the same local name, namespace URI, and collation.
:)
declare private function hent:remove-duplicate-range-indexes($database-config as item())
{
  let $indexes := map:get($database-config, "range-element-index")
  where (fn:exists($indexes))
  return
      let $index-map := map:map()
      let $_ :=
        for $index in json:array-values($indexes)
        let $key := fn:string-join(
          (
            "localname", map:get($index, "localname"),
            "namespace", map:get($index, "namespace-uri"),
            "collation", map:get($index, "collation")
          ), "-"
        )
        where fn:not(map:contains($index-map, $key))
        return map:put($index-map, $key, $index)

      let $deduplicated-indexes := json:array()
      let $_ := map:keys($index-map) ! json:array-push($deduplicated-indexes, map:get($index-map, .))
      let $_ := map:put($database-config, "range-element-index", $deduplicated-indexes)
      return ()
};

declare variable $generated-primary-key-column as xs:string := "DataHubGeneratedPrimaryKey";
declare variable $generated-primary-key-expression as xs:string := "(xdmp:node-uri(.) || fn:string(fn:node-name(.)) || (fn:count(preceding-sibling::*) + 1))";

declare function hent:replace-generated-key($value as xs:string) {
  if (fn:contains($value, $generated-primary-key-column)) then
    fn:replace(
      $value,
      "([^:/]+:)?"|| $generated-primary-key-column|| "(\|" || $generated-primary-key-column || "\))?",
      $generated-primary-key-expression
    )
  else
    $value
};

declare function hent:dump-tde($entities as json:array)
{
  let $entity-values as map:map* := json:array-values($entities)
  let $uber-model := hent:uber-model($entity-values ! xdmp:to-json(.)/object-node())
  let $_set-info := $uber-model => map:put("info", fn:head($entity-values) => map:get("info"))
  let $uber-definitions := $uber-model => map:get("definitions")
  (: Primary keys are required for each definition in an entity. If the primary key is missing, we'll help out by using the doc URI and position as the primary key. :)
  let $_set-primary-keys-for-TDE :=
    for $definition-type in map:keys($uber-definitions)
    let $definition := $uber-definitions => map:get($definition-type)
    where fn:empty($definition => map:get("primaryKey"))
    return (
      $definition => map:put("primaryKey", $generated-primary-key-column),
      $definition => map:get("properties") => map:put($generated-primary-key-column, map:entry("datatype", "string"))
    )
  let $entity-model-contexts := map:keys($uber-definitions)
  let $entity-name := map:get(map:get($uber-model, "info"), "title")
  let $entity-name :=
    if (($uber-model => map:get("definitions") => map:contains($entity-name))) then
      $entity-name
    else
      (: if the title doesn't match a definition, make our best guess at what the root entity definition is :)
      hent:get-primary-entity-type-title($uber-model => map:get("definitions"))
  let $es-template := tg:extraction-template-generate($uber-model)
  return hent:fix-tde($es-template, $entity-model-contexts, $uber-model, $entity-name)
};




declare function hent:get-primary-entity-type-title($entity-definition as item()) {
  let $entity-definition-node :=
    if ($entity-definition instance of node()) then
      $entity-definition
    else
      xdmp:to-json($entity-definition)/object-node()
  let $references-for-local-definitions := $entity-definition-node/object-node() ! ("#/definitions/"||fn:string(fn:node-name(.)))
  let $local-references-made := fn:distinct-values($entity-definition-node//text("$ref"))[fn:starts-with(., "#/definitions/")]
  let $unreferenced-definitions := $references-for-local-definitions[fn:not(. = $local-references-made)]
  let $primary-definition := fn:head($unreferenced-definitions)
  return $primary-definition ! fn:substring-after(., "#/definitions/")
};


declare variable $default-invalid-values as element(tde:invalid-values) := element tde:invalid-values {"ignore"};

declare function hent:fix-tde($nodes as node()*, $entity-model-contexts as xs:string*, $uber-model as map:map)
{
  hent:fix-tde($nodes, $entity-model-contexts, $uber-model, ())
};

declare function hent:fix-tde($nodes as node()*, $entity-model-contexts as xs:string*, $uber-model as map:map, $entity-name as xs:string?)
{
  for $n in $nodes
  return
    typeswitch($n)
      case document-node() return
        document {
          hent:fix-tde($n/node(), $entity-model-contexts, $uber-model, $entity-name)
        }
      case element(tde:invalid-values) return
        $default-invalid-values
      case element(tde:val) return
        element { fn:node-name($n) } {
          $n/namespace::node(),
          let $col-name := fn:string($n/../tde:name)
          return
            if (fn:ends-with($col-name, $generated-primary-key-column)) then
              $generated-primary-key-expression
            else if (fn:contains(fn:string($n), $generated-primary-key-column)) then
              hent:replace-generated-key(fn:string($n))
            else if (fn:starts-with($n, $col-name)) then
              let $parts := fn:tokenize($n, "/")
              let $uber-definitions := $uber-model => map:get("definitions")
              let $entity-definition := $uber-definitions => map:get(fn:string($parts[2]))
              return
                if (fn:exists($entity-definition)) then
                  let $primary-key := $entity-definition => map:get("primaryKey")
                  return
                    if (fn:contains($primary-key, $generated-primary-key-column)) then
                      fn:string($n) || "/" || hent:replace-generated-key($primary-key)
                    else
                      fn:string($n) || "/" || $primary-key
                else
                  hent:fix-tde($n/node(), $entity-model-contexts, $uber-model, ())
            else
              hent:fix-tde($n/node(), $entity-model-contexts, $uber-model, ())
        }

      case element(tde:context) return
        fix-tde-context($n, $entity-model-contexts, $uber-model, $entity-name)

      case element(tde:column) return
        element { fn:node-name($n) } {
          $n/namespace::node(),
          $n/@*,
          hent:fix-tde($n/* except $n/tde:invalid-values, $entity-model-contexts, $uber-model, ()),
          $default-invalid-values
        }
      case element(tde:subject)|element(tde:predicate)|element(tde:object) return
        element { fn:node-name($n) } {
          $n/namespace::node(),
          hent:fix-tde($n/* except $n/tde:invalid-values, $entity-model-contexts, $uber-model, ()),
          $default-invalid-values
        }
      case element(tde:template) return
        element { fn:node-name($n) } {
          $n/namespace::node(),
          $n/@*,
          let $context-item :=  fn:replace(fn:string($n/tde:context), "^\.//?", "")
          let $context-item :=
            fn:replace(
              if (fn:contains($context-item, "|")) then
                fn:replace(fn:tokenize($context-item, "\|")[2], "\)$", "")
              else
                $context-item,
              "\[(fn|xs):string\(\.\) ne ""\]",
              ""
            )
          let $join-prefix := $context-item || "_"
          let $parent-context-item := fn:replace(fn:string($n/../../tde:context), "^\./{1,2}([^\[]+)(\[node\(\)\])?$", "$1")
          let $parent-context-item :=
            fn:replace(
              if (fn:contains($parent-context-item, "|")) then
                fn:replace(fn:tokenize($parent-context-item, "\|")[2], "\)$", "")
              else
                $parent-context-item,
              "\[(fn|xs):string\(\.\) ne ""\]",
              ""
            )
          let $join-view-name := fn:string-join(($parent-context-item, $context-item), "_")
          let $is-join-template := $n/tde:rows/tde:row/tde:view-name = $join-view-name
          let $rows := $n/tde:rows/tde:row
          return
            if ($is-join-template) then (
              hent:fix-tde($n/tde:context, $entity-model-contexts, $uber-model, ()),

              let $uber-definitions := $uber-model => map:get("definitions")
              let $model_iri  := tg:model-graph-iri($uber-model)
              (:searching values to load in triple section:)
              let $entityName := $parent-context-item
              let $entityMap := map:get($uber-definitions, $entityName)
              let $propertiesMap := map:get($entityMap, "properties")
              let $entityContextItemMap := map:get($propertiesMap, $context-item)
              let $entityItems := map:get($entityContextItemMap, "items")
              let $relatedEntityType := if (fn:exists($entityItems)) then fn:string(map:get($entityItems, "relatedEntityType")) else ()
              let $primaryKey := fn:string(map:get($entityMap, "primaryKey"))
              let $ancestorKey := hent:replace-generated-key(fn:string($rows/tde:columns/tde:column[tde:name = $primaryKey]/tde:val))
              return (
              element tde:rows {
                element tde:row {
                  $rows/(tde:schema-name|tde:view-name|tde:view-layout),
                  element tde:columns {
                    for $column in $rows/tde:columns/tde:column
                    return
                      element tde:column {
                        $column/@*,
                        hent:fix-tde($column/(tde:name|tde:scalar-type), $entity-model-contexts, $uber-model, ()),
                        if (fn:starts-with($column/tde:name, $join-prefix)) then (
                          let $tde-val := fn:string($column/tde:val)
                          let $uber-definitions := $uber-model => map:get("definitions")
                          let $primary-key := $uber-definitions => map:get($tde-val) => map:get("primaryKey")
                          return
                            element tde:val {
                              if ($primary-key = $generated-primary-key-column) then
                                $generated-primary-key-expression
                              else
                                $tde-val || "/" || $primary-key
                            }
                        ) else
                        hent:fix-tde($column/(tde:val|tde:nullable), $entity-model-contexts, $uber-model, ()),
                        $default-invalid-values,
                        hent:fix-tde($column/(tde:default|tde:reindexing|tde:collation), $entity-model-contexts, $uber-model, ())
                      }
                  }
                }
               },
               if (fn:exists($relatedEntityType[. ne ""])) then (
                   
                    
                       subject-iri
                       $top-subject-iri
                     
                   ,
                   
                     
                       
                      $subject-iri
                       ignore
                     
                    
                      sem:iri("{ $model_iri }/{ $entityName }/{ $context-item}")
                      ignore
                     
                    
                      sem:iri(concat("{ $relatedEntityType }/", fn:encode-for-uri(xs:string(.))))
                       ignore
                    
                   
                   
                 ) else ()
               )
            ) else
              hent:fix-tde($n/node(), $entity-model-contexts, $uber-model, $entity-name)
        }
      case element() return
        element { fn:node-name($n) } {
          $n/namespace::node(),
          hent:fix-tde(($n/@*, $n/node()), $entity-model-contexts, $uber-model, ())
        }
      case text() return
        fn:replace($n, "^\.\./(.+)$", "(../$1|parent::array-node()/../$1)")
      default return $n
};

(:
Fixes the ES-generated TDE context path by:
- Replacing the use of wildcards, which lead to false positives
- Checking the entity namespacePrefix to determine if the context only needs to support XML

False positives in the context path won't lead to incorrect results when querying via the TDE, but they will
lead to unnecessary reindexing, per DHFPROD-6954.

Example of an ES-generated path: //*:instance[*:info/*:version = "1.0"]
:)
declare private function fix-tde-context(
  $context as element(tde:context),
  $entity-model-contexts as xs:string*,
  $uber-model as map:map,
  $entity-name as xs:string?
) as element(tde:context)
{
  element tde:context {
    $context/namespace::node(),

    (: This appears to be for the 'non-root' context elements in a TDE :)
    if ($context = $entity-model-contexts) then
      fn:replace(".//" || fn:string($context), "(.)$", "$1[node()]")

    else if ($entity-name) then
      let $version := get-version-from-uber-model($uber-model)
      let $ns-prefix := get-namespace-prefix($uber-model, $entity-name)
      return
        if ($ns-prefix) then
          let $entity-predicate := "[(" || $ns-prefix || ":" || $entity-name || "|" || $entity-name || ")]"
          return
            if ($version) then
              "/(es:envelope|envelope)/(es:instance|instance)[es:info/es:version = '" || $version || "' or info/version = '" || $version || "']" || $entity-predicate
            else
              replace-context-wildcards($context/text()) || $entity-predicate
        else
          let $entity-predicate := "[" || $entity-name || "]"
          return
            if ($version) then
              (: An 'or' clause is used to further avoid false positives :)
              "/(es:envelope|envelope)/(es:instance|instance)[es:info/es:version = '" || $version || "' or info/version = '" || $version || "']" || $entity-predicate
            else
              replace-context-wildcards($context/text()) || $entity-predicate

      else
        (: In the absence of an entity-name, which is very unexpected, at least remove the wildcards :)
        replace-context-wildcards($context/text())
  }
};

declare private function get-version-from-uber-model($uber-model as map:map) as xs:string?
{
  let $info := map:get($uber-model, "info")
  where fn:exists($info)
  return map:get($info, "version")
};

declare private function get-namespace-prefix($uber-model as map:map, $entity-name as xs:string) as xs:string?
{
  let $uber-definitions := $uber-model => map:get("definitions")
  where fn:exists($uber-definitions)
  return
    let $def := map:get($uber-definitions, $entity-name)
    where fn:exists($def)
    return map:get($def, "namespacePrefix")
};

(:
Replacing wildcards in the ES-generated context path eliminates many false positives, per DHFPROD-6954.
This function should also only be used when the entity def does not have a namespace prefix, as the path it
returns is intended to support JSON entity instances and XML entity instances that do not have a namespace (but still
use the es namespace for envelope/instance/info/version).
:)
declare private function replace-context-wildcards($path as xs:string) as xs:string
{
  let $temp := fn:replace($path, "//\*:instance", "/(es:envelope|envelope)/(es:instance|instance)")
  return fn:replace($temp, "\*:info/\*:version", "(es:info/es:version|info/version)")
};

declare variable $number-types as xs:string+ := ("byte","decimal","double","float","int","integer","long","negativeInteger","nonNegativeInteger","nonPositiveInteger","positiveInteger","short","unsignedLong","unsignedInt","unsignedShort","unsignedByte");
declare variable $string-types as xs:string+ := ("dateTime","date");

declare function hent:json-schema-generate($entity-title as xs:string, $uber-model as map:map)
{
  let $uber-model := map:new((
  (: Ensure we're not change a map for anyone else :)
  map:map(document{$uber-model}/*),
  map:entry("lang", "zxx"),
  map:entry("$schema", "http://json-schema.org/draft-07/schema#")
  ))
  let $definitions := $uber-model => map:get("definitions")
  (: JSON Schema needs an extra level of wrapping to account for Entity Model label wrapping it. :)
  let $_nest-refs :=
    for $definition-type in map:keys($definitions)
    let $definition-properties := $definitions => map:get($definition-type) => map:get("properties")
    for $property-name in map:keys($definition-properties)
    let $property := $definition-properties => map:get($property-name)
    let $property-items := $property => map:get("items")
    let $datatype := $property => map:get("datatype")
    let $_set-types := (
      if (fn:exists($datatype)) then
        $property => map:put("type", if ($datatype = $number-types) then "number" else if ($datatype = $string-types) then "string" else $datatype)
      else (),
      $property => map:delete("datatype"),
      if ($property-items instance of map:map) then (
        let $items-datatype := $property-items => map:get("datatype")
        return (
          if (fn:exists($items-datatype)) then
            $property-items => map:put("type", if ($items-datatype = $number-types) then "number" else if ($items-datatype = $string-types) then "string" else $items-datatype)
          else (),
          $property-items => map:delete("datatype"))
      ) else ()
    )
    return
    (: references can be in the property or in items for arrays :)
      if ($property => map:contains("$ref")) then
        map:put($definition-properties, $property-name,
          map:new((
            map:entry("type", "object"),
            map:entry("properties",
              map:entry(fn:tokenize(map:get($property,"$ref"),"/")[fn:last()], $property)
            )
          ))
        )
      else if ($property-items instance of map:map and $property-items => map:contains("$ref")) then
        map:put($property, "items",
          map:new((
            map:entry("type", "object"),
            map:entry("properties",
              map:entry(fn:tokenize(map:get($property-items,"$ref"),"/")[fn:last()], $property-items)
            )
          ))
        )
      else ()
  let $_set-info := (
    $uber-model => map:put("properties", map:entry($entity-title, map:entry("$ref", "#/definitions/"||$entity-title)))
  )
  return xdmp:to-json($uber-model)
};

declare %private function hent:build-indexes-for-structured-entity-properties($entity-path as xs:string, $definiton-name as xs:string, $definitions as map:map, $primary-entity-definition as map:map) {
  let $entity-type := map:get($definitions, $definiton-name)
  let $entity-type-properties := map:get($entity-type, "properties")

  let $_ :=
    for $entity-type-property in map:keys($entity-type-properties)
    let $ref := map:get($entity-type-properties, $entity-type-property)=>map:get("$ref")
    let $items := map:get($entity-type-properties, $entity-type-property)=>map:get("items")
    return
      if (fn:empty($ref) and (fn:empty($items) or fn:not(fn:starts-with(map:get($items, "$ref"), "#")))) then
        if (map:get($entity-type-properties, $entity-type-property)=>map:get("facetable")) then
          json:array-push(map:get($primary-entity-definition, "rangeIndex"), $entity-path || "/" || $entity-type-property)
        else ()
      else
        let $definiton-name :=
          if (fn:empty($items)) then
            fn:substring-after($ref, "#/definitions/")
          else
            fn:substring-after(map:get($items, "$ref"), "#/definitions/")
        where $definiton-name
        return
          let $path := $entity-path || "/" || $entity-type-property || "/" || $definiton-name
          let $_ := hent:build-indexes-for-structured-entity-properties($path, $definiton-name, $definitions, $primary-entity-definition)
          return ()
  return $primary-entity-definition
};

(:
  this function finds the first level facetable entityType properties and constrcuts and adds the rangeIndex array to
  the entityModel. All the structured properties are ignored for now even if a property is modeled as facetable as per
  https://project.marklogic.com/jira/browse/DHFPROD-5018
:)
declare %private function hent:add-indexes-for-entity-properties($entities as json:array) as map:map {
  let $models := json:array-values($entities) ! xdmp:to-json(.)/object-node()
  let $updated-models := json:array()
  let $sortable-properties := map:map()
  let $result-map := map:map()

  let $_ :=
    for $model as map:map in $models
      let $entity-title := map:get($model, "info")=>map:get("title")
      let $entity-definition := map:get($model, "definitions")=>map:get($entity-title)
      let $entity-type-properties :=
        let $empty-map := map:map()
          return
            if (fn:empty($entity-definition)) then
              let $_ := xdmp:log("Could not find entity definition with name: " || $entity-title)
              return $empty-map
            else
              let $_ :=
                if (fn:empty(map:get($entity-definition, "rangeIndex"))) then map:put($entity-definition, "rangeIndex", json:array())
                else ()
              return map:get($entity-definition, "properties")

      let $namespace := if (fn:exists($entity-definition)) then map:get($entity-definition, "namespace") else ()
      let $namespace-prefix := if (fn:exists($entity-definition)) then map:get($entity-definition, "namespacePrefix") else ()

      let $_ :=
        for $entity-type-property in map:keys($entity-type-properties)
          let $ref := map:get($entity-type-properties, $entity-type-property)=>map:get("$ref")
          return
            let $items := map:get($entity-type-properties, $entity-type-property)=>map:get("items")
            let $is-facetable :=
              if (fn:empty($ref) and (fn:empty($items) or fn:not(fn:starts-with(map:get($items, "$ref"), "#")))) then
                map:get($entity-type-properties, $entity-type-property)=>map:get("facetable")
              else
                let $definiton-name :=
                  if (fn:empty($items)) then
                    fn:substring-after($ref, "#/definitions/")
                  else
                    fn:substring-after(map:get($items, "$ref"), "#/definitions/")
                where $definiton-name
                return
                  let $definitions := map:get($model, "definitions")
                  let $entity-path := $entity-type-property || "/" || $definiton-name
                  let $model := hent:build-indexes-for-structured-entity-properties($entity-path, $definiton-name, $definitions, $entity-definition)
            return
              fn:false()

            let $is-sortable :=
              if (fn:empty($ref)) then
                let $items := map:get($entity-type-properties, $entity-type-property)=>map:get("items")
                return
                  if (fn:empty($items)) then
                    map:get($entity-type-properties, $entity-type-property)=>map:get("sortable")
                  else
                    fn:not(fn:starts-with(map:get($items, "$ref"), "#")) and
                            map:get($entity-type-properties, $entity-type-property)=>map:get("sortable")
              else
                fn:false()

            let $_ :=
              if ($is-sortable) then
                let $path-expression :=
                  if ($namespace and $namespace-prefix) then
                    fn:concat("/es:envelope/es:instance/", $namespace-prefix, ":", $entity-title, "/", $namespace-prefix, ":", $entity-type-property)
                  else
                    fn:concat("/(es:envelope|envelope)/(es:instance|instance)/", $entity-title, "/", $entity-type-property)
                let $property-datatype :=
                    let $items := map:get($entity-type-properties, $entity-type-property)=>map:get("items")
                    return
                      if(fn:empty($items)) then
                        map:get($entity-type-properties, $entity-type-property)=>map:get("datatype")
                      else
                        map:get($items, "datatype")
                (: If something is only sortable, we need a path range index for it, but we need to ensure that a facet
                is not configured for it :)
                let $sort-info := map:new((
                  map:entry("is-sortable-only", $is-sortable and fn:not($is-facetable)),
                  map:entry("entity-title", $entity-title),
                  map:entry("property-name", $entity-type-property),
                  map:entry("property-datatype", $property-datatype)
                ))
                return map:put($sortable-properties, $path-expression, $sort-info)
              else ()

            where $is-facetable or $is-sortable
            return json:array-push(map:get($entity-definition, "rangeIndex"), $entity-type-property)

      return json:array-push($updated-models, $model)

  let $_ := map:put($result-map, "updated-models", $updated-models)
  let $_ := map:put($result-map, "sortable-properties", $sortable-properties)
  return $result-map
};




© 2015 - 2024 Weber Informatics LLC | Privacy Policy