com.lucidworks.spark.query.SolrStreamIterator Maven / Gradle / Ivy

Go to download
package com.lucidworks.spark.query;

import org.apache.log4j.Logger;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.impl.CloudSolrClient;
import org.apache.solr.client.solrj.impl.HttpSolrClient;
import org.apache.solr.client.solrj.io.SolrClientCache;
import org.apache.solr.client.solrj.io.stream.SolrStream;
import org.apache.solr.client.solrj.io.stream.StreamContext;
import org.apache.solr.client.solrj.io.stream.TupleStream;
import org.apache.solr.common.params.CommonParams;

import java.io.IOException;

/**
 * An iterator over a stream of query results from one Solr Core. It is a
 * wrapper over the SolrStream to adapt it to an iterator interface.
 * 
 * This iterator is not thread safe. It is intended to be used within the
 * context of a single thread.
 */
public class SolrStreamIterator extends TupleStreamIterator {

  private static final Logger log = Logger.getLogger(SolrStreamIterator.class);

  protected SolrQuery solrQuery;
  protected String shardUrl;
  protected int numWorkers;
  protected int workerId;
  protected SolrClientCache solrClientCache;
  protected HttpSolrClient httpSolrClient;
  protected CloudSolrClient cloudSolrClient;

  // Remove the whole code around StreamContext, numWorkers, workerId once SOLR-10490 is fixed.
  // It should just work if an 'fq' passed in the params with HashQ filter
  public SolrStreamIterator(String shardUrl, CloudSolrClient cloudSolrClient, HttpSolrClient httpSolrClient, SolrQuery solrQuery, int numWorkers, int workerId) {
    super(solrQuery);

    this.shardUrl = shardUrl;
    this.cloudSolrClient = cloudSolrClient;
    this.httpSolrClient = httpSolrClient;
    this.solrQuery = solrQuery;
    this.numWorkers = numWorkers;
    this.workerId = workerId;

    if (solrQuery.getRequestHandler() == null) {
      solrQuery = solrQuery.setRequestHandler("/export");
    }
    solrQuery.setRows(null);
    solrQuery.set(CommonParams.WT, CommonParams.JAVABIN);
    //SolrQuerySupport.validateExportHandlerQuery(solrServer, solrQuery);
  }

  protected TupleStream openStream() {
    SolrStream stream;
    try {
      stream = new SolrStream(shardUrl, solrQuery);
      stream.setStreamContext(getStreamContext());
      stream.open();
    } catch (IOException e1) {
      throw new RuntimeException(e1);
    }
    return stream;
  }

  // We have to set the streaming context so that we can pass our own cloud client with authentication
  protected StreamContext getStreamContext() {
    StreamContext context = new StreamContext();
    solrClientCache = new SparkSolrClientCache(cloudSolrClient, httpSolrClient);
    context.setSolrClientCache(solrClientCache);
    context.numWorkers = numWorkers;
    context.workerID = workerId;
    return context;
  }

  protected void afterStreamClosed() throws Exception {
    // No need to close http or cloudClient because they are re-used from cache
  }
}