All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.solr.client.solrj.io.stream.CloudSolrStream Maven / Gradle / Ivy

There is a newer version: 9.5.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.solr.client.solrj.io.stream;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Random;
import java.util.Set;
import java.util.TreeSet;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Future;
import java.util.stream.Collectors;

import org.apache.solr.client.solrj.impl.CloudSolrClient;
import org.apache.solr.client.solrj.impl.CloudSolrClient.Builder;
import org.apache.solr.client.solrj.io.Tuple;
import org.apache.solr.client.solrj.io.comp.ComparatorOrder;
import org.apache.solr.client.solrj.io.comp.FieldComparator;
import org.apache.solr.client.solrj.io.comp.MultipleFieldComparator;
import org.apache.solr.client.solrj.io.comp.StreamComparator;
import org.apache.solr.client.solrj.io.stream.expr.Explanation;
import org.apache.solr.client.solrj.io.stream.expr.Explanation.ExpressionType;
import org.apache.solr.client.solrj.io.stream.expr.Expressible;
import org.apache.solr.client.solrj.io.stream.expr.StreamExplanation;
import org.apache.solr.client.solrj.io.stream.expr.StreamExpression;
import org.apache.solr.client.solrj.io.stream.expr.StreamExpressionNamedParameter;
import org.apache.solr.client.solrj.io.stream.expr.StreamExpressionValue;
import org.apache.solr.client.solrj.io.stream.expr.StreamFactory;
import org.apache.solr.common.cloud.ClusterState;
import org.apache.solr.common.cloud.DocCollection;
import org.apache.solr.common.cloud.Replica;
import org.apache.solr.common.cloud.Slice;
import org.apache.solr.common.cloud.ZkCoreNodeProps;
import org.apache.solr.common.cloud.ZkStateReader;
import org.apache.solr.common.params.MapSolrParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.ExecutorUtil;
import org.apache.solr.common.util.SolrjNamedThreadFactory;

/**
 * Connects to Zookeeper to pick replicas from a specific collection to send the query to.
 * Under the covers the SolrStream instances send the query to the replicas.
 * SolrStreams are opened using a thread pool, but a single thread is used
 * to iterate and merge Tuples from each SolrStream.
 **/

public class CloudSolrStream extends TupleStream implements Expressible {

  private static final long serialVersionUID = 1;

  protected String zkHost;
  protected String collection;
  protected SolrParams params;
  private Map fieldMappings;
  protected StreamComparator comp;
  private boolean trace;
  protected transient Map eofTuples;
  protected transient CloudSolrClient cloudSolrClient;
  protected transient List solrStreams;
  protected transient TreeSet tuples;
  protected transient StreamContext streamContext;

  // Used by parallel stream
  protected CloudSolrStream(){
    
  }


  /**
   * @param zkHost         Zookeeper ensemble connection string
   * @param collectionName Name of the collection to operate on
   * @param params         Map<String, String> of parameter/value pairs
   * @throws IOException Something went wrong
   *                     

* This form does not allow specifying multiple clauses, say "fq" clauses, use the form that * takes a SolrParams. Transition code can call the preferred method that takes SolrParams * by calling CloudSolrStream(zkHost, collectionName, * new ModifiableSolrParams(SolrParams.toMultiMap(new NamedList(Map<String, String>))); * @deprecated Use the constructor that has a SolrParams obj rather than a Map */ @Deprecated public CloudSolrStream(String zkHost, String collectionName, Map params) throws IOException { init(collectionName, zkHost, new MapSolrParams(params)); } /** * @param zkHost Zookeeper ensemble connection string * @param collectionName Name of the collection to operate on * @param params Map<String, String[]> of parameter/value pairs * @throws IOException Something went wrong */ public CloudSolrStream(String zkHost, String collectionName, SolrParams params) throws IOException { init(collectionName, zkHost, params); } public CloudSolrStream(StreamExpression expression, StreamFactory factory) throws IOException{ // grab all parameters out String collectionName = factory.getValueOperand(expression, 0); List namedParams = factory.getNamedOperands(expression); StreamExpressionNamedParameter aliasExpression = factory.getNamedOperand(expression, "aliases"); StreamExpressionNamedParameter zkHostExpression = factory.getNamedOperand(expression, "zkHost"); // Collection Name if(null == collectionName){ throw new IOException(String.format(Locale.ROOT,"invalid expression %s - collectionName expected as first operand",expression)); } // Validate there are no unknown parameters - zkHost and alias are namedParameter so we don't need to count it twice if(expression.getParameters().size() != 1 + namedParams.size()){ throw new IOException(String.format(Locale.ROOT,"invalid expression %s - unknown operands found",expression)); } // Named parameters - passed directly to solr as solrparams if(0 == namedParams.size()){ throw new IOException(String.format(Locale.ROOT,"invalid expression %s - at least one named parameter expected. eg. 'q=*:*'",expression)); } ModifiableSolrParams mParams = new ModifiableSolrParams(); for(StreamExpressionNamedParameter namedParam : namedParams){ if(!namedParam.getName().equals("zkHost") && !namedParam.getName().equals("aliases")){ mParams.add(namedParam.getName(), namedParam.getParameter().toString().trim()); } } // Aliases, optional, if provided then need to split if(null != aliasExpression && aliasExpression.getParameter() instanceof StreamExpressionValue){ fieldMappings = new HashMap<>(); for(String mapping : ((StreamExpressionValue)aliasExpression.getParameter()).getValue().split(",")){ String[] parts = mapping.trim().split("="); if(2 == parts.length){ fieldMappings.put(parts[0], parts[1]); } else{ throw new IOException(String.format(Locale.ROOT,"invalid expression %s - alias expected of the format origName=newName",expression)); } } } // zkHost, optional - if not provided then will look into factory list to get String zkHost = null; if(null == zkHostExpression){ zkHost = factory.getCollectionZkHost(collectionName); if(zkHost == null) { zkHost = factory.getDefaultZkHost(); } } else if(zkHostExpression.getParameter() instanceof StreamExpressionValue){ zkHost = ((StreamExpressionValue)zkHostExpression.getParameter()).getValue(); } if(null == zkHost){ throw new IOException(String.format(Locale.ROOT,"invalid expression %s - zkHost not found for collection '%s'",expression,collectionName)); } // We've got all the required items init(collectionName, zkHost, mParams); } @Override public StreamExpression toExpression(StreamFactory factory) throws IOException { // functionName(collectionName, param1, param2, ..., paramN, sort="comp", [aliases="field=alias,..."]) // function name StreamExpression expression = new StreamExpression(factory.getFunctionName(this.getClass())); // collection expression.addParameter(collection); // parameters ModifiableSolrParams mParams = new ModifiableSolrParams(SolrParams.toMultiMap(params.toNamedList())); for (Entry param : mParams.getMap().entrySet()) { String value = String.join(",", param.getValue()); // SOLR-8409: This is a special case where the params contain a " character // Do note that in any other BASE streams with parameters where a " might come into play // that this same replacement needs to take place. value = value.replace("\"", "\\\""); expression.addParameter(new StreamExpressionNamedParameter(param.getKey(), value)); } // zkHost expression.addParameter(new StreamExpressionNamedParameter("zkHost", zkHost)); // aliases if(null != fieldMappings && 0 != fieldMappings.size()){ StringBuilder sb = new StringBuilder(); for(Entry mapping : fieldMappings.entrySet()){ if(sb.length() > 0){ sb.append(","); } sb.append(mapping.getKey()); sb.append("="); sb.append(mapping.getValue()); } expression.addParameter(new StreamExpressionNamedParameter("aliases", sb.toString())); } return expression; } @Override public Explanation toExplanation(StreamFactory factory) throws IOException { StreamExplanation explanation = new StreamExplanation(getStreamNodeId().toString()); explanation.setFunctionName(factory.getFunctionName(this.getClass())); explanation.setImplementingClass(this.getClass().getName()); explanation.setExpressionType(ExpressionType.STREAM_SOURCE); explanation.setExpression(toExpression(factory).toString()); // child is a datastore so add it at this point StreamExplanation child = new StreamExplanation(getStreamNodeId() + "-datastore"); child.setFunctionName(String.format(Locale.ROOT, "solr (%s)", collection)); child.setImplementingClass("Solr/Lucene"); child.setExpressionType(ExpressionType.DATASTORE); if(null != params){ ModifiableSolrParams mParams = new ModifiableSolrParams(params); child.setExpression(mParams.getMap().entrySet().stream().map(e -> String.format(Locale.ROOT, "%s=%s", e.getKey(), e.getValue())).collect(Collectors.joining(","))); } explanation.addChild(child); return explanation; } private void init(String collectionName, String zkHost, SolrParams params) throws IOException { this.zkHost = zkHost; this.collection = collectionName; this.params = new ModifiableSolrParams(params); // If the comparator is null then it was not explicitly set so we will create one using the sort parameter // of the query. While doing this we will also take into account any aliases such that if we are sorting on // fieldA but fieldA is aliased to alias.fieldA then the comparater will be against alias.fieldA. if (params.get("q") == null) { throw new IOException("q param expected for search function"); } if (params.getParams("fl") == null) { throw new IOException("fl param expected for search function"); } String fls = String.join(",", params.getParams("fl")); if (params.getParams("sort") == null) { throw new IOException("sort param expected for search function"); } String sorts = String.join(",", params.getParams("sort")); this.comp = parseComp(sorts, fls); } public void setFieldMappings(Map fieldMappings) { this.fieldMappings = fieldMappings; } public void setTrace(boolean trace) { this.trace = trace; } public void setStreamContext(StreamContext context) { this.streamContext = context; } /** * Opens the CloudSolrStream * ***/ public void open() throws IOException { this.tuples = new TreeSet(); this.solrStreams = new ArrayList(); this.eofTuples = Collections.synchronizedMap(new HashMap()); if (this.streamContext != null && this.streamContext.getSolrClientCache() != null) { this.cloudSolrClient = this.streamContext.getSolrClientCache().getCloudSolrClient(zkHost); } else { this.cloudSolrClient = new Builder() .withZkHost(zkHost) .build(); this.cloudSolrClient.connect(); } constructStreams(); openStreams(); } public Map getEofTuples() { return this.eofTuples; } public List children() { return solrStreams; } private StreamComparator parseComp(String sort, String fl) throws IOException { String[] fls = fl.split(","); HashSet fieldSet = new HashSet(); for(String f : fls) { fieldSet.add(f.trim()); //Handle spaces in the field list. } String[] sorts = sort.split(","); StreamComparator[] comps = new StreamComparator[sorts.length]; for(int i=0; i 1) { return new MultipleFieldComparator(comps); } else { return comps[0]; } } protected void constructStreams() throws IOException { try { ZkStateReader zkStateReader = cloudSolrClient.getZkStateReader(); ClusterState clusterState = zkStateReader.getClusterState(); Set liveNodes = clusterState.getLiveNodes(); //System.out.println("Connected to zk an got cluster state."); Collection slices = clusterState.getActiveSlices(this.collection); if (slices == null) { //Try case insensitive match Map collectionsMap = clusterState.getCollectionsMap(); for (Map.Entry entry : collectionsMap.entrySet()) { if (entry.getKey().equalsIgnoreCase(collection)) { slices = entry.getValue().getActiveSlices(); break; } } if (slices == null) { throw new Exception("Collection not found:" + this.collection); } } ModifiableSolrParams mParams = new ModifiableSolrParams(params); mParams.set("distrib", "false"); // We are the aggregator. for(Slice slice : slices) { Collection replicas = slice.getReplicas(); List shuffler = new ArrayList(); for(Replica replica : replicas) { if(replica.getState() == Replica.State.ACTIVE && liveNodes.contains(replica.getNodeName())) shuffler.add(replica); } Collections.shuffle(shuffler, new Random()); Replica rep = shuffler.get(0); ZkCoreNodeProps zkProps = new ZkCoreNodeProps(rep); String url = zkProps.getCoreUrl(); SolrStream solrStream = new SolrStream(url, mParams); if(streamContext != null) { solrStream.setStreamContext(streamContext); } solrStream.setFieldMappings(this.fieldMappings); solrStreams.add(solrStream); } } catch (Exception e) { throw new IOException(e); } } private void openStreams() throws IOException { ExecutorService service = ExecutorUtil.newMDCAwareCachedThreadPool(new SolrjNamedThreadFactory("CloudSolrStream")); try { List> futures = new ArrayList(); for (TupleStream solrStream : solrStreams) { StreamOpener so = new StreamOpener((SolrStream) solrStream, comp); Future future = service.submit(so); futures.add(future); } try { for (Future f : futures) { TupleWrapper w = f.get(); if (w != null) { tuples.add(w); } } } catch (Exception e) { throw new IOException(e); } } finally { service.shutdown(); } } /** * Closes the CloudSolrStream **/ public void close() throws IOException { if(solrStreams != null) { for (TupleStream solrStream : solrStreams) { solrStream.close(); } } if ((this.streamContext == null || this.streamContext.getSolrClientCache() == null) && cloudSolrClient != null) { cloudSolrClient.close(); } } /** Return the stream sort - ie, the order in which records are returned */ public StreamComparator getStreamSort(){ return comp; } public Tuple read() throws IOException { return _read(); } protected Tuple _read() throws IOException { TupleWrapper tw = tuples.pollFirst(); if(tw != null) { Tuple t = tw.getTuple(); if (trace) { t.put("_COLLECTION_", this.collection); } if(tw.next()) { tuples.add(tw); } return t; } else { Map m = new HashMap(); if(trace) { m.put("_COLLECTION_", this.collection); } m.put("EOF", true); return new Tuple(m); } } protected class TupleWrapper implements Comparable { private Tuple tuple; private SolrStream stream; private StreamComparator comp; public TupleWrapper(SolrStream stream, StreamComparator comp) { this.stream = stream; this.comp = comp; } public int compareTo(TupleWrapper w) { if(this == w) { return 0; } int i = comp.compare(tuple, w.tuple); if(i == 0) { return 1; } else { return i; } } public boolean equals(Object o) { return this == o; } public Tuple getTuple() { return tuple; } public boolean next() throws IOException { this.tuple = stream.read(); if(tuple.EOF) { eofTuples.put(stream.getBaseUrl(), tuple); } return !tuple.EOF; } } protected class StreamOpener implements Callable { private SolrStream stream; private StreamComparator comp; public StreamOpener(SolrStream stream, StreamComparator comp) { this.stream = stream; this.comp = comp; } public TupleWrapper call() throws Exception { stream.open(); TupleWrapper wrapper = new TupleWrapper(stream, comp); if(wrapper.next()) { return wrapper; } else { return null; } } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy