All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.cdap.plugin.common.SolrRecordWriter Maven / Gradle / Ivy

/*
 * Copyright © 2016-2019 Cask Data, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */
package io.cdap.plugin.common;

import com.google.gson.Gson;
import com.google.gson.reflect.TypeToken;
import io.cdap.cdap.api.data.format.StructuredRecord;
import io.cdap.cdap.api.data.schema.Schema;
import io.cdap.cdap.format.StructuredRecordStringConverter;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.common.SolrInputDocument;

import java.io.IOException;
import java.lang.reflect.Type;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;

/**
 * SolrRecordWriter - Instantiate a record writer that will build a Solr index.
 */
public class SolrRecordWriter extends RecordWriter {
  public static final String SERVER_URL = "solr.server.url";
  public static final String SERVER_MODE = "solr.server.mode";
  public static final String COLLECTION_NAME = "solr.server.collection";
  public static final String KEY_FIELD = "solr.server.keyfield";
  public static final String FIELD_MAPPINGS = "solr.output.field.mappings";
  public static final String BATCH_SIZE = "solr.batch.size";
  private static final Gson GSON = new Gson();
  private static final Type SCHEMA_TYPE = new TypeToken() { }.getType();
  private final SolrSearchSinkConfig config;
  private final List documentList = new ArrayList();
  private SolrClient solrClient;
  private Configuration conf;
  private int batchSize;
  private Map outputFieldMap;

  public SolrRecordWriter(TaskAttemptContext context) {
    conf = context.getConfiguration();
    config = new SolrSearchSinkConfig(null, conf.get(SERVER_MODE), conf.get(SERVER_URL), conf.get(COLLECTION_NAME),
                                      conf.get(KEY_FIELD), conf.get(FIELD_MAPPINGS));
    solrClient = config.getSolrConnection();
    batchSize = Integer.parseInt(conf.get(BATCH_SIZE));
    if (outputFieldMap == null) {
      outputFieldMap = config.createOutputFieldMap();
    }

  }

  @Override
  public void write(Text key, Text value) throws IOException {
    String solrFieldName;
    SolrInputDocument document = new SolrInputDocument();

    Schema inputSchema = GSON.fromJson(key.toString(), SCHEMA_TYPE);
    StructuredRecord structuredRecord = StructuredRecordStringConverter.fromJsonString(value.toString(), inputSchema);
    for (Schema.Field field : structuredRecord.getSchema().getFields()) {
      solrFieldName = field.getName();
      if (outputFieldMap.containsKey(solrFieldName)) {
        document.addField(outputFieldMap.get(solrFieldName), structuredRecord.get(solrFieldName));
      } else {
        document.addField(solrFieldName, structuredRecord.get(solrFieldName));
      }
    }
    documentList.add(document);
    try {
      if (documentList.size() == batchSize) {
        solrClient.add(documentList);
        solrClient.commit();
        documentList.clear();
      }
    } catch (SolrServerException e) {
      throw new IllegalArgumentException("Exception while indexing the documents to Solr. For more details, Please " +
                                           "check the logs.", e);
    }
  }

  @Override
  public void close(TaskAttemptContext context) throws IOException, InterruptedException {
    try {
      if (!documentList.isEmpty()) {
        solrClient.add(documentList);
        solrClient.commit();
      }
    } catch (SolrServerException e) {
      throw new IllegalArgumentException("Exception while indexing the documents to Solr. For more details, Please " +
                                           "check the logs.", e);
    } finally {
      documentList.clear();
      solrClient.shutdown();
    }
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy