All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.solr.search.join.GraphQuery Maven / Gradle / Ivy

There is a newer version: 9.7.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.solr.search.join;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.TreeSet;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.DocValuesFieldExistsQuery;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Weight;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefHash;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.DaciukMihovAutomatonBuilder;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.BitDocSet;
import org.apache.solr.search.DocSet;
import org.apache.solr.search.SolrIndexSearcher;

/**
 * GraphQuery - search for nodes and traverse edges in an index.
 *
 * 

Params: fromField = the field that contains the node id toField = the field that contains the * edge ids traversalFilter = a query that can be applied for each hop in the graph. maxDepth = the * max depth to traverse. (start nodes is depth=1) onlyLeafNodes = only return documents that have * no edge id values. returnRoot = if false, the documents matching the initial query will not be * returned. * * @lucene.experimental */ public class GraphQuery extends Query { /** The inital node matching query */ private Query q; /** the field with the node id */ private String fromField; /** the field containing the edge ids */ private String toField; /** A query to apply while traversing the graph to filter out edges */ private Query traversalFilter; /** The max depth to traverse the graph, -1 means no limit. */ private int maxDepth = -1; /** Use automaton compilation for graph query traversal (experimental + expert use only) */ private boolean useAutn = true; /** * If this is true, the graph traversal result will only return documents that do not have a value * in the edge field. (Only leaf nodes returned from the graph) */ private boolean onlyLeafNodes = false; /** * False if documents matching the start query for the graph will be excluded from the final * result set. */ private boolean returnRoot = true; /** * Create a graph query q - the starting node query fromField - the field containing the node id * toField - the field containing the edge ids */ public GraphQuery(Query q, String fromField, String toField) { this(q, fromField, toField, null); } /** * Create a graph query with a traversal filter applied while traversing the frontier. q - the * starting node query fromField - the field containing the node id toField - the field containing * the edge ids traversalFilter - the filter to be applied on each iteration of the frontier. */ public GraphQuery(Query q, String fromField, String toField, Query traversalFilter) { this.q = q; this.fromField = fromField; this.toField = toField; this.traversalFilter = traversalFilter; } @Override public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { Weight graphWeight = new GraphQueryWeight((SolrIndexSearcher) searcher, boost); return graphWeight; } @Override public String toString(String field) { StringBuilder sb = new StringBuilder(); sb.append("[[") .append(q.toString()) .append("],") .append(fromField) .append('=') .append(toField) .append(']'); if (traversalFilter != null) { sb.append(" [TraversalFilter: ").append(traversalFilter.toString()).append(']'); } sb.append("[maxDepth=").append(maxDepth).append(']'); sb.append("[returnRoot=").append(returnRoot).append(']'); sb.append("[onlyLeafNodes=").append(onlyLeafNodes).append(']'); sb.append("[useAutn=").append(useAutn).append(']'); return sb.toString(); } protected class GraphQueryWeight extends Weight { final SolrIndexSearcher fromSearcher; private int currentDepth = -1; private DocSet resultSet; SchemaField collectSchemaField; // the field to collect values from SchemaField matchSchemaField; // the field to match those values public GraphQueryWeight(SolrIndexSearcher searcher, float boost) { // Grab the searcher so we can run additional searches. super(null); this.fromSearcher = searcher; this.matchSchemaField = searcher.getSchema().getField(fromField); this.collectSchemaField = searcher.getSchema().getField(toField); } GraphQuery getGraphQuery() { return GraphQuery.this; } @Override public Explanation explain(LeafReaderContext context, int doc) throws IOException { // currently no ranking for graph queries. final Scorer cs = scorer(context); final boolean exists = (cs != null && cs.iterator().advance(doc) == doc); if (exists) { List subs = new ArrayList<>(); return Explanation.match(1.0F, "Graph Match", subs); } else { List subs = new ArrayList<>(); return Explanation.noMatch("No Graph Match.", subs); } } /** * This computes the matching doc set for a given graph query * * @return DocSet representing the documents in the graph. * @throws IOException - if a sub search fails... maybe other cases too! :) */ private DocSet getDocSet() throws IOException { // Size that the bit set needs to be. int capacity = fromSearcher.getRawReader().maxDoc(); // The bit set to contain the results that match the query. FixedBitSet resultBits = new FixedBitSet(capacity); // this holds the result at each level BitDocSet fromSet = null; // the root docs if we return root is false FixedBitSet rootBits = null; // the initial query for the frontier for the first query Query frontierQuery = q; // Find all documents in this graph that are leaf nodes to speed traversal DocSet leafNodes = resolveLeafNodes(); // Start the breadth first graph traversal. do { // Increment how far we have gone in the frontier. currentDepth++; // if we are at the max level we don't need the graph terms collector. // TODO validate that the join case works properly. if (maxDepth != -1 && currentDepth >= maxDepth) { // if we've reached the max depth, don't worry about collecting edges. fromSet = fromSearcher.getDocSetBits(frontierQuery); // explicitly the frontier size is zero now so we can break frontierQuery = null; } else { // when we're not at the max depth level, we need to collect edges // Create the graph result collector for this level GraphEdgeCollector graphResultCollector = collectSchemaField.getType().isPointField() ? new GraphPointsCollector( collectSchemaField, new BitDocSet(resultBits), leafNodes) : new GraphEdgeCollector.GraphTermsCollector( collectSchemaField, new BitDocSet(resultBits), leafNodes); fromSet = new BitDocSet(new FixedBitSet(capacity)); graphResultCollector.setCollectDocs(fromSet.getBits()); fromSearcher.search(frontierQuery, graphResultCollector); frontierQuery = graphResultCollector.getResultQuery(matchSchemaField, isUseAutn()); // If there is a filter to be used while crawling the graph, add that. if (frontierQuery != null && getTraversalFilter() != null) { BooleanQuery.Builder builder = new BooleanQuery.Builder(); builder.add(frontierQuery, BooleanClause.Occur.MUST); builder.add(getTraversalFilter(), BooleanClause.Occur.MUST); frontierQuery = builder.build(); } } if (currentDepth == 0 && !returnRoot) { // grab a copy of the root bits but only if we need it. rootBits = fromSet.getBits(); } // Add the bits from this level to the result set. resultBits.or(fromSet.getBits()); // test if we discovered any new edges, if not , we're done. if ((maxDepth != -1 && currentDepth >= maxDepth)) { break; } } while (frontierQuery != null); // helper bit set operations on the final result set if (!returnRoot) { resultBits.andNot(rootBits); } // this is the final resulting filter. BitDocSet resultSet = new BitDocSet(resultBits); // If we only want to return leaf nodes do that here. if (onlyLeafNodes) { return resultSet.intersection(leafNodes); } else { return resultSet; } } private DocSet resolveLeafNodes() throws IOException { String field = collectSchemaField.getName(); BooleanQuery.Builder leafNodeQuery = new BooleanQuery.Builder(); Query edgeQuery = collectSchemaField.hasDocValues() ? new DocValuesFieldExistsQuery(field) : new WildcardQuery(new Term(field, "*")); leafNodeQuery.add(edgeQuery, Occur.MUST_NOT); DocSet leafNodes = fromSearcher.getDocSet(leafNodeQuery.build()); return leafNodes; } /** Build an automaton to represent the frontier query */ private Automaton buildAutomaton(BytesRefHash termBytesHash) { // need top pass a sorted set of terms to the autn builder (maybe a better way to avoid this?) final TreeSet terms = new TreeSet<>(); for (int i = 0; i < termBytesHash.size(); i++) { BytesRef ref = new BytesRef(); termBytesHash.get(i, ref); terms.add(ref); } final Automaton a = DaciukMihovAutomatonBuilder.build(terms); return a; } @Override public Scorer scorer(LeafReaderContext context) throws IOException { if (resultSet == null) { resultSet = getDocSet(); } DocIdSetIterator disi = resultSet.iterator(context); // create a scrorer on the result set, if results from right query are empty, use empty // iterator. return new GraphScorer(this, disi == null ? DocIdSetIterator.empty() : disi, 1); } @Override public boolean isCacheable(LeafReaderContext ctx) { return true; } } private static class GraphScorer extends Scorer { final DocIdSetIterator iter; final float score; // graph query scorer constructor with iterator public GraphScorer(Weight w, DocIdSetIterator iter, float score) throws IOException { super(w); this.iter = iter == null ? DocIdSet.EMPTY.iterator() : iter; this.score = score; } @Override public float score() throws IOException { // no dynamic scoring now. return score; } @Override public float getMaxScore(int upTo) throws IOException { return score; } @Override public DocIdSetIterator iterator() { return iter; } @Override public int docID() { // current position of the doc iterator. return iter.docID(); } } /** * @return The query to be used as a filter for each hop in the graph. */ public Query getTraversalFilter() { return traversalFilter; } public void setTraversalFilter(Query traversalFilter) { this.traversalFilter = traversalFilter; } public Query getQ() { return q; } public void setQ(Query q) { this.q = q; } /** * @return The field that contains the node id */ public String getFromField() { return fromField; } public void setFromField(String fromField) { this.fromField = fromField; } /** * @return the field that contains the edge id(s) */ public String getToField() { return toField; } public void setToField(String toField) { this.toField = toField; } /** * @return Max depth for traversal, -1 for infinite! */ public int getMaxDepth() { return maxDepth; } public void setMaxDepth(int maxDepth) { this.maxDepth = maxDepth; } /** * @return If true , an automaton query will be compiled for each new frontier traversal this * helps to avoid max boolean clause errors. */ public boolean isUseAutn() { return useAutn; } public void setUseAutn(boolean useAutn) { this.useAutn = useAutn; } /** * @return if true only documents that do not have a value in the edge id field will be returned. */ public boolean isOnlyLeafNodes() { return onlyLeafNodes; } public void setOnlyLeafNodes(boolean onlyLeafNodes) { this.onlyLeafNodes = onlyLeafNodes; } /** * @return if true the documents that matched the rootNodes query will be returned. o/w they will * be removed from the result set. */ public boolean isReturnRoot() { return returnRoot; } public void setReturnRoot(boolean returnRoot) { this.returnRoot = returnRoot; } @Override public int hashCode() { final int prime = 31; int result = classHash(); result = prime * result + Objects.hashCode(fromField); result = prime * result + maxDepth; result = prime * result + (onlyLeafNodes ? 1231 : 1237); result = prime * result + Objects.hashCode(q); result = prime * result + (returnRoot ? 1231 : 1237); result = prime * result + Objects.hashCode(toField); result = prime * result + Objects.hashCode(traversalFilter); result = prime * result + (useAutn ? 1231 : 1237); return result; } @Override public boolean equals(Object other) { return sameClassAs(other) && equalsTo(getClass().cast(other)); } private boolean equalsTo(GraphQuery other) { return Objects.equals(fromField, other.fromField) && maxDepth == other.maxDepth && onlyLeafNodes == other.onlyLeafNodes && returnRoot == other.returnRoot && useAutn == other.useAutn && Objects.equals(q, other.q) && Objects.equals(toField, other.toField) && Objects.equals(traversalFilter, other.traversalFilter); } @Override public void visit(QueryVisitor visitor) { visitor.visitLeaf(this); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy