All Downloads are FREE. Search and download functionalities are using the official Maven repository.

ml-modules.root.com.marklogic.smart-mastering.matcher-impl.matcher-impl.xqy Maven / Gradle / Ivy

There is a newer version: 6.1.1
Show newest version
xquery version "1.0-ml";

(:
 : This is an implementation library, not an interface to the Smart Mastering functionality.
 :
 : The process of matching starts with one document, which is not required to
 : be in the database. The match options specify what properties are to be used
 : to find matches. See match options documentation for details. The options
 : may specify multiple thresholds, each of which corresponds to an action.
 :
 : Implementation notes: the configured properties are used to generate a boost
 : query. The match part of the query identifies a set of subqueries that a
 : document must match in order to get a score above the lowest threshold.
 : Match queries all have their scores set to zero. The boost part of the query
 : is used to provide the score.

 : @see https://marklogic-community.github.io/smart-mastering-core/docs/matching-options/
 :)

module namespace match-impl = "http://marklogic.com/smart-mastering/matcher-impl";

import module namespace blocks-impl = "http://marklogic.com/smart-mastering/blocks-impl"
at "/com.marklogic.smart-mastering/matcher-impl/blocks-impl.xqy";
import module namespace const = "http://marklogic.com/smart-mastering/constants"
at "/com.marklogic.smart-mastering/constants.xqy";
import module namespace helper-impl = "http://marklogic.com/smart-mastering/helper-impl"
at "/com.marklogic.smart-mastering/matcher-impl/helper-impl.xqy";
import module namespace json="http://marklogic.com/xdmp/json"
at "/MarkLogic/json/json.xqy";
import module namespace opt-impl = "http://marklogic.com/smart-mastering/options-impl"
at "/com.marklogic.smart-mastering/matcher-impl/options-impl.xqy";
import module namespace tel = "http://marklogic.com/smart-mastering/telemetry"
at "/com.marklogic.smart-mastering/telemetry.xqy";
import module namespace es-helper = "http://marklogic.com/smart-mastering/entity-services"
at "/com.marklogic.smart-mastering/sm-entity-services.xqy";

declare namespace matcher = "http://marklogic.com/smart-mastering/matcher";

declare option xdmp:mapping "false";

declare variable $match-trace-is-enabled as xs:boolean := xdmp:trace-enabled($const:TRACE-MATCH-RESULTS);
declare variable $performance-trace-is-enabled as xs:boolean := xdmp:trace-enabled($const:TRACE-PERFORMANCE);

declare variable $QUERIES_WITH_WEIGHT := (
  xs:QName("cts:element-attribute-pair-geospatial-query"),xs:QName("cts:element-attribute-range-query"),
  xs:QName("cts:element-attribute-value-query"),xs:QName("cts:element-attribute-word-query"),xs:QName("cts:element-child-geospatial-query"),
  xs:QName("cts:element-geospatial-query"),xs:QName("cts:element-pair-geospatial-query"),xs:QName("cts:element-range-query"),
  xs:QName("cts:element-value-query"),xs:QName("cts:element-word-query"),xs:QName("cts:field-range-query"),
  xs:QName("cts:field-value-query"),xs:QName("cts:field-word-query"),xs:QName("cts:geospatial-region-query"),
  xs:QName("cts:json-property-child-geospatial-query"),xs:QName("cts:json-property-geospatial-query"),
  xs:QName("cts:json-property-pair-geospatial-query"),xs:QName("cts:json-property-range-query"),xs:QName("cts:json-property-value-query"),
  xs:QName("cts:json-property-word-query"),xs:QName("cts:lsqt-query"),xs:QName("cts:near-query"),xs:QName("cts:not-query"),
  xs:QName("cts:path-geospatial-query"),xs:QName("cts:path-range-query"),xs:QName("cts:range-query"),xs:QName("cts:registered-query"),
  xs:QName("cts:reverse-query"),xs:QName("cts:similar-query"),xs:QName("cts:triple-range-query"),xs:QName("cts:word-query"));

(:
 : Find documents that are potential matches for the provided document.
 : @param $document  a source document to draw values from
 : @param $options  XML or JSON representation of match options
 : @param $start  paging: 1-based index
 : @param $page-length  paging: number of results to return
 : @param $minimum-threshold  the required score for the lowest-scoring
 :                            threshold (see match options)
 : @param $include-matches  if true, the response will include, for each result,
 :                          the properties that earned points for the match
 :                          (similar) to snippets
 : @param $filter-query  a cts:query that reduces the scope of documents that
 :                       will be searched for matches
 : @return results specify document URIs that matches for provided document
 :)
declare function match-impl:find-document-matches-by-options(
    $document as node()?,
    $options as item(),
    $start as xs:integer,
    $page-length as xs:integer,
    $minimum-threshold as xs:double,
    $include-matches as xs:boolean,
    $filter-query as cts:query
) as element(results)
{
(: increment usage count :)
  tel:increment(),
  match-impl:find-document-matches-by-options(
      $document,
      $options,
      $start,
      $page-length,
      $minimum-threshold,
      $include-matches,
      $filter-query,
      fn:true()
  )
};

declare function match-impl:find-document-matches-by-options(
    $document as node()?,
    $options as item(),
    $start as xs:integer,
    $page-length as xs:integer,
    $minimum-threshold as xs:double,
    $include-matches as xs:boolean,
    $filter-query as cts:query,
    $include-results as xs:boolean
) as element(results)
{
  match-impl:find-document-matches-by-options(
      $document,
      $options,
      $start,
      $page-length,
      $minimum-threshold,
      $include-matches,
      $filter-query,
      $include-results,
      (: by default don't short-circuit redundant queries :)
      fn:false()
  )
};

(: A map to track match queries previously searched on. Useful for process-impl:build-match-summary :)
declare variable $map-of-queries-previously-run := map:map();

declare function match-impl:find-document-matches-by-options(
    $document as node()?,
    $options as item(),
    $start as xs:integer,
    $page-length as xs:integer,
    $minimum-threshold as xs:double,
    $include-matches as xs:boolean,
    $filter-query as cts:query,
    $include-results as xs:boolean,
    $short-circuit-redundant-queries as xs:boolean
) as element(results)
{
  if (fn:exists($document)) then (
    let $start-elapsed := xdmp:elapsed-time()
    let $is-json := (xdmp:node-kind($document) = "object" or fn:exists($document/(object-node()|array-node())))
    let $_trace := xdmp:trace($const:TRACE-MATCH-RESULTS, " is-json: " || $is-json)
    let $compiled-options := opt-impl:compile-match-options($options, $minimum-threshold)
    let $target-entity-type := map:get($compiled-options, "targetEntityType")
    let $_set-data-format := $compiled-options => map:put("dataFormat", if ($is-json) then $const:FORMAT-JSON else $const:FORMAT-XML)
    let $values-by-property-name := match-impl:values-by-property-name($document, $compiled-options)
    let $query-prov as map:map? :=
      if ($include-results and $include-matches) then
        map:map()
      else ()
    let $cached-queries := map:map()
    let $minimum-threshold-combinations-queries :=
      for $query-set in $compiled-options => map:get("minimumThresholdCombinations")
      let $query-maps := $query-set => map:get("queries")
      let $not-query-maps := $query-set => map:get("notQueries")
      let $algorithm-results :=
        for $query-map in $query-maps
        let $_trace :=
          if ($match-trace-is-enabled) then
            xdmp:trace($const:TRACE-MATCH-RESULTS, "values-by-property-name: " || xdmp:describe($values-by-property-name, (),()))
          else
            ()


        (: if there are no values from the document for a query, then match-impl:query-map-to-query returns an empty sequence  :)
        let $algorithm-results := match-impl:query-map-to-query($document, $query-map, $values-by-property-name, $cached-queries, $query-prov, $target-entity-type)
        let $queries := $algorithm-results[. instance of cts:query]
        let $functions := $algorithm-results[. instance of function(*) or . instance of xdmp:function]
        return (
          if (fn:count($queries) gt 1) then
          (: if a match ruleset has multiple queries, be sure to AND them :)
            helper-impl:group-queries-by-scope($queries, cts:and-query#1)
          else
            $queries,
          if (fn:empty($queries) and fn:exists($functions)) then
            fn:true()
          else ()
        )

      let $queries := $algorithm-results[. instance of cts:query]
      let $not-queries :=
        for $not-query-map in $not-query-maps
        (: if there are no values from the document for a query, then match-impl:query-map-to-query returns an empty sequence  :)
        let $algorithm-results := match-impl:query-map-to-query($document, $not-query-map, $values-by-property-name, $cached-queries, (), $target-entity-type)
        let $queries := $algorithm-results[. instance of cts:query]
        return (
          if (fn:count($queries) gt 1) then
          (: if a match ruleset has multiple queries, be sure to AND them :)
            helper-impl:group-queries-by-scope($queries, cts:and-query#1)
          else
            $queries
        )

      (: We want to be certain that we have values for each of the queries in a min threshold combo :)
      where fn:exists($queries) and fn:count($algorithm-results) eq fn:count($query-maps)
      return
        let $positive-query :=
          if (fn:count($queries) gt 1) then
            helper-impl:group-queries-by-scope($queries, cts:and-query#1)
          else
            $queries
        return
          if (fn:exists($not-queries)) then
            cts:and-not-query(
                $positive-query,
                if (fn:count($not-queries) gt 1) then
                  cts:or-query($not-queries)
                else
                  $not-queries
            )
          else
            $positive-query
    (: We want to ignore redundant queries. This only applies to expanded searches to find additional merges that may be added via process-impl:build-match-summary
     : If the queries generated by a document are the same as document previously searched on, it won't result in additional URIs.
     :)
    let $has-redundant-match-combo := $short-circuit-redundant-queries and (
      every $min-query in $minimum-threshold-combinations-queries satisfies
      map:contains($map-of-queries-previously-run, xdmp:md5(document{$min-query}))
    )
    let $_cache-minimum-threshold := if ($short-circuit-redundant-queries and fn:not($has-redundant-match-combo)) then
      for $min-query in $minimum-threshold-combinations-queries
      return map:put($map-of-queries-previously-run, xdmp:md5(document{$min-query}), fn:true())
    else ()
    (: If there are no match queries, just return a cts:false-query  :)
    let $minimum-threshold-combinations-query :=
      if (fn:exists($minimum-threshold-combinations-queries)) then
        helper-impl:group-queries-by-scope($minimum-threshold-combinations-queries, cts:or-query#1)
      else
        cts:false-query()
    (: Exclude the current document and any blocked matches :)
    let $document-uri := xdmp:node-uri($document)
    let $excluded-uris := (
      $document-uri,
      blocks-impl:get-blocks(fn:base-uri($document))/node()
    )
    let $_trace :=
      if ($match-trace-is-enabled) then
        xdmp:trace($const:TRACE-MATCH-RESULTS, "Excluding the following URIs from matching with cts.doc('"|| $document-uri ||"'): " || xdmp:to-json-string($excluded-uris))
      else
        ()
    let $match-base-query := cts:and-query((
      ($compiled-options => map:get("baseContentQuery")) ! cts:registered-query(cts:register(.)),
      $minimum-threshold-combinations-query
    ))
    let $match-query :=
      if (fn:exists($excluded-uris)) then
        cts:and-not-query(
            $match-base-query,
            cts:document-query($excluded-uris)
        )
      else
        $match-base-query
    let $filter-query-interceptor := $compiled-options => map:get("filterQueryInterceptor")
    let $filter-query :=
      if (fn:exists($filter-query-interceptor)) then
        $filter-query-interceptor($filter-query, $document)
      else
        $filter-query
    let $match-query :=
      match-impl:instance-query-wrapper(
          if ($filter-query instance of cts:query+ and fn:not($filter-query instance of cts:true-query)) then
            cts:and-query((
              $filter-query,
              $match-query
            ))
          else
            $match-query,
          $is-json
      )
    let $serialized-match-query :=
      element match-query {
        $match-query
      }
    let $_trace := if ($match-trace-is-enabled) then
      xdmp:trace($const:TRACE-MATCH-RESULTS, "match-query cts.doc('"||$document-uri||"'):" || xdmp:describe($match-query, (),()))
    else ()
    return
    (: If minimum threshold can't be met or we're short-circuiting a query already run, don't bother with estimate and search :)
      if ($minimum-threshold-combinations-query instance of cts:false-query or $has-redundant-match-combo) then
        match-impl:build-empty-results($start, $page-length, $serialized-match-query)
      else
        let $estimate := xdmp:estimate(cts:search(fn:collection(), $match-query, "unfiltered"))
        return (
          if ($estimate ge 250) then
            xdmp:log("A large number ("|| $estimate ||") of potential matches were discovered for document '" || $document-uri || "' with the following query: " || xdmp:describe($match-query, (), ()), "warning")
          else (),
          if ($match-trace-is-enabled) then
            xdmp:trace($const:TRACE-MATCH-RESULTS, "Estimated " || $estimate || " doc(s) found for cts.doc('"|| $document-uri ||"') in " || xdmp:database-name(xdmp:database()))
          else (),
          element results {
            attribute total { $estimate },
            attribute page-length { $page-length },
            attribute start { $start },
            $serialized-match-query,
            if ($include-results and $estimate gt 0) then
              let $queries-for-scoring :=
                for $query-map in map:get($compiled-options, "queries")
                let $algorithm-results := match-impl:query-map-to-query($document, $query-map, $values-by-property-name, $cached-queries, $query-prov, $target-entity-type)
                let $query := $algorithm-results[. instance of cts:query]
                let $functions := $algorithm-results[. instance of function(*) or . instance of xdmp:function]
                let $_trace :=
                  if ($match-trace-is-enabled) then
                    xdmp:trace($const:TRACE-MATCH-RESULTS, "'"|| $query-map => map:get("name") ||"' query:" || xdmp:describe($algorithm-results,(),()))
                  else ()
                (: query may not exist if there weren't values passed, etc. :)
                where fn:exists($algorithm-results)
                return
                  map:map()
                  => map:with("name", $query-map => map:get("name"))
                  => map:with("weight", $query-map => map:get("weight"))
                  => map:with("matchRules", $query-map => map:get("matchRules"))
                  => map:with("query", (
                      if (fn:count($query) gt 1) then
                        helper-impl:group-queries-by-scope($query, cts:and-query#1)
                      else
                        $query,
                      $functions
                    )
                  )
              let $_trace :=
                if ($match-trace-is-enabled) then
                  xdmp:trace($const:TRACE-MATCH-RESULTS, "cts.doc('"|| $document-uri ||"') property values:" || xdmp:to-json-string($values-by-property-name))
                else ()
              return
                match-impl:search(
                    $document,
                    $match-query,
                    $queries-for-scoring,
                    $minimum-threshold,
                    $start,
                    $page-length,
                    $compiled-options,
                    $include-matches,
                    $is-json,
                    $query-prov
                )
            else (),
            if ($performance-trace-is-enabled) then
              xdmp:trace($const:TRACE-PERFORMANCE, "match-impl:find-document-matches-by-options: " || (xdmp:elapsed-time() - $start-elapsed))
            else ()
          }
        )
  ) else
    match-impl:build-empty-results($start,$page-length, ())
};

declare function match-impl:build-empty-results(
    $start as xs:integer,
    $page-length as xs:integer,
    $serialize-match-query as node()?
) {
  element results {
    attribute total { 0 },
    attribute page-length { $page-length },
    attribute start { $start },
    $serialize-match-query
  }
};

(:
 : Does each item in $s1 appear in $s2?
 :)
declare function match-impl:seq-contains($s1, $s2)
{
  every $s in $s1 satisfies $s = $s2
};

(:
 : Convert map:map describing query to cts:query
 : @param $query-map  map:map describing a query to generate
 : @param $values-by-qname  map:map that organizes document values by QName
 : @param $cached-queries  map:map caching previously generated queries
 : @param $query-prov map:map to optionally track the provenance of queries contributing to match
 : @return cts:query?
 :)
declare function match-impl:query-map-to-query(
    $document as node(),
    $query-map as map:map,
    $values-by-property-name as map:map,
    $cached-queries as map:map,
    $query-prov as map:map?,
    $target-entity as xs:string?
)
{
  if (map:contains($query-map, "matchRulesetId") and map:contains($cached-queries, $query-map => map:get("matchRulesetId"))) then (
    map:get($cached-queries, $query-map => map:get("matchRulesetId"))
  ) else (
    let $sub-query-maps := map:get($query-map, "matchQueries")
    let $non-multi-struct-queries :=
      for $sub-query-map in $sub-query-maps
      let $algorithm-results :=
        if ($sub-query-map => map:get("type") = "reduce") then
          ($sub-query-map => map:get("valuesToQueryFunction"))($document)
        else
          let $property-name := fn:string($sub-query-map => map:get("propertyName"))
          let $values := $values-by-property-name => map:get($property-name)
          where fn:exists($values) and fn:not(map:get($sub-query-map, "isMultiStructPropMultiValueComponent"))
          return
            ($sub-query-map => map:get("valuesToQueryFunction"))($values)
      let $queries := $algorithm-results[. instance of cts:query]
      let $functions := $algorithm-results[. instance of function(*) or . instance of xdmp:function]
      let $query :=
        if (fn:count($queries) gt 1) then
        (: if a query function returns multiple queries without explict ANDing them, we're assuming that they should be ORed :)
          helper-impl:group-queries-by-scope($queries, cts:or-query#1)
        else
          $queries
      where fn:exists($algorithm-results)
      return (
        if (fn:exists($query-prov)) then
          for $query-hash in document {$query}//schema-element(cts:query) ! xdmp:md5(document{.})
          where fn:not(map:contains($query-prov, $query-hash)) or ((map:get($query-prov, $query-hash) => map:get("type")) = "reduce")
          return map:put($query-prov, $query-hash, $sub-query-map)
        else (),
        map:entry("values", ($query, $functions))
      )
    let $multi-struct-prop-map := map:get($query-map, "multiStructPropMultiValueMap")
    (: Determining the number of queries by counting queries from non-multi-struct-multi-value quers and the key values in the
      multi-struct-multi-values map.
    :)
    let $query-count := fn:sum(($multi-struct-prop-map ! map:count(.), fn:count($non-multi-struct-queries)))
    let $attempted-query-count := fn:count($sub-query-maps)
    let $queries := (
      $non-multi-struct-queries ! (if (. instance of map:map) then map:get(., "values") else .),
      match-impl:multi-struct-prop-multi-value-queries($query-map, $document, $target-entity)
    )
    (: Only return the queries if we get the expected count back, so we don't count null/empty values as matches :)
    let $no-missing-values := $query-count eq $attempted-query-count
    where $no-missing-values
    return (
      (: no caching in the structured properties case (yet) :)
      if (fn:empty($multi-struct-prop-map)) then
        map:put($cached-queries, $query-map => map:get("matchRulesetId"), $queries)
      else ()
      ,
      $queries
    ))

};

declare function match-impl:multi-struct-prop-multi-value-queries($query-map, $document, $target-entity-type as xs:string?)
as cts:query*
{
  let $multi-struct-prop-multi-value-map := map:get($query-map, "multiStructPropMultiValueMap")
  return
    if (fn:empty($multi-struct-prop-multi-value-map) or fn:empty($target-entity-type)) then
      ()
    else
      let $is-json := (xdmp:node-kind($document) = "object" or fn:exists($document/(object-node()|array-node())))
      let $count := fn:count(map:get($query-map, "matchQueries"))
      let $count := if ($count eq 0) then 1 else $count
      let $weight := map:get($query-map, "weight") div $count

      let $rev-map := -$multi-struct-prop-multi-value-map

      let $prop-info-map := map:map()
      let $_ :=
        for $prop in (map:keys($rev-map), map:keys($rev-map) ! map:get($rev-map, .))
        return map:put($prop-info-map, $prop, es-helper:get-entity-property-info($target-entity-type, $prop))

      for $prop in map:keys($rev-map)
      let $parent-info := map:get($prop-info-map, $prop)
      let $parent-xpath := $parent-info => map:get("pathExpression")
      let $parent-namespaces := $parent-info => map:get("namespaces")
      let $parent-objects := xdmp:unpath($parent-xpath, $parent-namespaces, $document)
      let $parent-xpath-length := fn:string-length($parent-xpath)

      let $parent-query-fn := helper-impl:get-struct-prop-parent-scope-query-fn($parent-info, $prop)

      let $query :=
        cts:or-query((
          for $parent-object in $parent-objects

          let $child-props := map:get($rev-map, $prop)
          let $child-props-count := fn:count($child-props)

          let $child-queries :=
            for $child-prop in $child-props
            let $child-info := map:get($prop-info-map, $child-prop)
            let $child-xpath := $child-info => map:get("pathExpression")
            let $child-namespaces := $child-info => map:get("namespaces")
            let $child-xpath-from-parent := "." || fn:substring($child-xpath, $parent-xpath-length + 1)
            let $child-prop-vals := xdmp:unpath($child-xpath-from-parent, $child-namespaces, $parent-object)
            let $child-query-fn := helper-impl:get-value-query-fn($child-info, $child-prop, $is-json)
            return $child-query-fn($child-prop-vals, $weight)

          return
            if ($child-props-count eq fn:count($child-queries)) then
              cts:and-query(($child-queries))
            else
              ()
        ))
      return $parent-query-fn($query, $is-json)
};

(:
 : Organize values by property name
 : @param $document  document with property values
 : @param $compiled-options  map:map with compiled details about match options
 : @return map:map of values organized by QName
 :)
declare function match-impl:values-by-property-name(
    $document as node()?,
    $compiled-options as map:map
)
{
  map:new(
      let $property-names-to-values := map:get($compiled-options, "propertyNamesToValues")
      for $property-name in map:keys($property-names-to-values)
      let $values := map:get($property-names-to-values, $property-name)($document)
      let $_trace :=
        if ($match-trace-is-enabled) then
          xdmp:trace($const:TRACE-MATCH-RESULTS, "Values for cts.doc(" || xdmp:node-uri($document) || ") " || $property-name || ": " || xdmp:describe($values, (), ()))
        else ()
      where fn:exists($values)
      return
        map:entry($property-name, $values)
  )
};

(:
 : Execute the generated search and construct the response.
 : @param $document the document node that is being searched with
 : @param $match-query  a query built such that any matches will score at least
 :                      high enough to reach the lowest threshold
 : @param $queries-for-scoring  a sequence of query maps that are used to score matches
 : @param $filter-query  a query to reduce the universe of match candidates
 : @param $min-threshold  lowest score required to hit a threshold
 : @param $start  paging: 1-based index
 : @param $page-length  paging
 : @param $algorithms  map derived from match options
 : @param $options  full match options; included to pass to reduce algorithm
 : @param $include-matches
 : @param $is-json
 : @param $query-prov an optional map for tracking how options/algorithms contributed to matches
 : @return results specify document URIs that matches for provided document
 :)
declare function match-impl:search(
    $document as node()?,
    $match-query,
    $queries-for-scoring,
    $min-threshold as xs:double,
    $start as xs:int,
    $page-length as xs:int,
    $compiled-options as map:map,
    $include-matches as xs:boolean,
    $is-json as xs:boolean,
    $query-prov as map:map?
) {
  let $range := $start to ($start + $page-length - 1)
  let $cts-walk-query :=
    if ($include-matches) then
      cts:or-query($queries-for-scoring ! map:get(., "query")[. instance of cts:query])
    else ()
  let $thresholds := $compiled-options => map:get("orderedThresholds")
  let $score-document-interceptor := $compiled-options => map:get("scoreDocumentInterceptor")
  for $result at $pos in cts:search(
      fn:collection(),
      $match-query,
      ("unfiltered", "score-simple"),
      0
  )[fn:position() = $range]
  let $uri := xdmp:node-uri($result)
  let $matching-query-maps :=
    for $query-map in $queries-for-scoring
    let $algorithm-results := $query-map => map:get("query")
    let $query := $algorithm-results[. instance of cts:query]
    let $functions := $algorithm-results[. instance of function(*) or . instance of xdmp:function]
    let $contains := fn:exists($algorithm-results) and (fn:empty($query) or cts:contains($result,$query)) and
      (fn:empty($functions) or (every $fun in $functions satisfies xdmp:apply($fun, $result)))
    let $weight := $query-map => map:get("weight")
    let $weight :=
      if (fn:empty($weight) and fn:exists($query)) then
      (: This allows a weight to not be specified for a match ruleset and then the weight of the cts:queries will determine the weight.
          See https://project.marklogic.com/jira/browse/DHFPROD-7234.:)
        let $custom-weight := match-impl:score-from-cts-query($result, $query)
        return $custom-weight
      else
        $weight
    let $_trace :=
      if ($match-trace-is-enabled) then
        let $name := $query-map => map:get("name")
        return (
          xdmp:trace($const:TRACE-MATCH-RESULTS, "Checking cts.doc('" || $uri ||"') for match against ruleset '" || $name || " with weight "|| fn:string($weight) ||": " || $contains),
          xdmp:trace($const:TRACE-MATCH-RESULTS, "Ruleset '" || $name || "' query: " || xdmp:describe($algorithm-results, (),()))
        )
      else ()
    where $contains
    return map:new(($query-map, map:entry("weight", $weight)))
  let $score :=
    fn:sum(
        $matching-query-maps ! map:get(., "weight")
    )
  let $_trace := if ($match-trace-is-enabled) then
    xdmp:trace($const:TRACE-MATCH-RESULTS, "cts.doc('" || $uri || "') score: " || $score || " minimum-threshold: " || $min-threshold)
  else ()
  let $score := if (fn:exists($score-document-interceptor)) then
      $score-document-interceptor(
        $score,
        map:entry("value", $document) => map:with("uri", xdmp:node-uri($document)),
        map:entry("value", $result) => map:with("uri", $uri),
        json:to-array($queries-for-scoring)
      )
    else
      $score
  where $score ge $min-threshold
  return
    element result {
      attribute uri {$uri},
      attribute index {$range[fn:position() = $pos]},
      attribute score {$score},
      let $selected-threshold := fn:head($thresholds[$score ge score])
      return (
        attribute threshold { fn:string($selected-threshold/thresholdName) },
        attribute action { fn:string($selected-threshold/action) }
      ),
      if ($include-matches) then
        element matches {
        (: rather than store the entire node and risk mixing
             content type (json != xml) we store the path to the
             node instead :)
          let $instance := $result/*:envelope/*:instance
          for $match-query-map in $matching-query-maps
          let $ruleset-name := map:get($match-query-map, "name")
          let $ruleset-query := map:get($match-query-map, "query")
          let $ruleset-weight := map:get($match-query-map, "weight")
          let $walk-query := if ($ruleset-query instance of cts:and-query) then
            cts:or-query(cts:and-query-queries($ruleset-query))
          else
            $ruleset-query
          let $_trace := if ($match-trace-is-enabled) then
            xdmp:trace($const:TRACE-MATCH-RESULTS, "Walking document with query: " || xdmp:describe($walk-query,(),()))
          else ()
          return 
            {$ruleset-name}
            { cts:walk(
                $instance,
                $walk-query,
                (for $query in $cts:queries
                let $query-hash := xdmp:md5(document {$query})
                let $node-name := fn:string(fn:head((fn:node-name($cts:node), fn:node-name($cts:node/..))))
                let $query-map := map:get($query-prov, $query-hash)
                let $_trace := if ($match-trace-is-enabled) then (
                  xdmp:trace($const:TRACE-MATCH-RESULTS, "Walking document and matched query: " || xdmp:describe($query,(),())),
                  if (fn:empty($query-map)) then
                    xdmp:trace($const:TRACE-MATCH-RESULTS, "Query hash '" || $query-hash || "' not found in provenance. All provenance: " || xdmp:to-json-string($query-prov))
                  else
                    xdmp:trace($const:TRACE-MATCH-RESULTS, "Query hash '" || $query-hash || "' found in provenance. Query provenance: " || xdmp:to-json-string($query-map))
                ) else ()
                return
                  
                    {fn:string(fn:head(($query-map ! map:get(., "algorithm") ! fn:string(.), "exact")))}
                    {$node-name}
                    {fn:data($cts:node)}
                    {xdmp:path($cts:node, fn:true())}
                  )
            )
            }
        }
      else ()
    }
};

(: Configuration used to convert XML match results to JSON. :)
declare variable $results-json-config := match-impl:_results-json-config();

declare function match-impl:_results-json-config()
{
  let $config := json:config("custom")
  return (
    map:put($config, "array-element-names", ("result","matches","algorithms",xs:QName("cts:option"),xs:QName("cts:text"),xs:QName("cts:element"))),
    map:put($config, "full-element-names",
        (xs:QName("cts:query"),
        xs:QName("cts:and-query"),
        xs:QName("cts:near-query"),
        xs:QName("cts:or-query"))
    ),
    map:put($config, "json-children", "queries"),
    map:put($config, "attribute-names",
        ("name","localname", "namespace", "function", "weight",
        "at", "property-name", "weight", "above", "label","algorithm-ref")
    ),
    map:put($config, "camel-case", fn:true()),
    $config
  )
};

(:
 : Convert XML match results to JSON.
 :)
declare function match-impl:results-to-json($results-xml)
as object-node()?
{
  if (fn:exists($results-xml)) then
    xdmp:to-json(
        json:transform-to-json-object($results-xml, $results-json-config)
    )/node()
  else ()
};

declare function match-impl:instance-query-wrapper(
    $query as cts:query,
    $is-json as xs:boolean
) {
  if ($is-json) then
    if (fn:exists($const:JSON-INSTANCE)) then
      cts:json-property-scope-query($const:JSON-INSTANCE, $query)
    else ()
  else
    if (fn:exists($const:XML-INSTANCE)) then
      cts:element-query($const:XML-INSTANCE, $query)
    else ()
};

declare function match-impl:score-from-cts-query($result as node(), $query as cts:query) as xs:double {
(: We don't want to double count for the same query/value pair hit :)
  let $queries-and-values-hit := map:map()
  return
    fn:sum(
        cts:walk(
            $result,
            $query,
            let $key := xdmp:hash64(xdmp:describe($cts:queries, (), ())) ||  ":" || $cts:text
            where fn:not(map:contains($queries-and-values-hit, $key))
            return (
              map:put($queries-and-values-hit, $key, fn:true()),
              document{$cts:queries}
              //schema-element(cts:query)[fn:node-name(.) = $QUERIES_WITH_WEIGHT] ! fn:number(fn:head((./@weight, 1)))
            )
        )
    )
};




© 2015 - 2024 Weber Informatics LLC | Privacy Policy