All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.senseidb.gateway.kafka.SimpleKafkaStreamDataProvider Maven / Gradle / Ivy

There is a newer version: 1.5.7
Show newest version
/**
 * This software is licensed to you under the Apache License, Version 2.0 (the
 * "Apache License").
 *
 * LinkedIn's contributions are made under the Apache License. If you contribute
 * to the Software, the contributions will be deemed to have been made under the
 * Apache License, unless you expressly indicate otherwise. Please do not make any
 * contributions that would be inconsistent with the Apache License.
 *
 * You may obtain a copy of the Apache License at http://www.apache.org/licenses/LICENSE-2.0
 * Unless required by applicable law or agreed to in writing, this software
 * distributed under the Apache License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the Apache
 * License for the specific language governing permissions and limitations for the
 * software governed under the Apache License.
 *
 * © 2012 LinkedIn Corp. All Rights Reserved.  
 */

package com.senseidb.gateway.kafka;

import java.nio.ByteBuffer;
import java.util.Comparator;

import kafka.api.FetchRequest;
import kafka.api.OffsetRequest;
import kafka.consumer.SimpleConsumer;
import kafka.message.ByteBufferMessageSet;
import kafka.message.MessageAndOffset;

import org.apache.log4j.Logger;
import org.json.JSONObject;

import proj.zoie.api.DataConsumer.DataEvent;
import proj.zoie.impl.indexing.StreamDataProvider;
import scala.collection.Iterator;

import com.senseidb.indexing.DataSourceFilter;

public class SimpleKafkaStreamDataProvider extends StreamDataProvider {
  private final String _topic;
  private long _offset;
  private long _startingOffset;
  private SimpleConsumer _kafkaConsumer;
  
  private Iterator _msgIter;
  private ThreadLocal bytesFactory;
  
  private static Logger logger = Logger.getLogger(KafkaStreamDataProvider.class);
  
    public static final int DEFAULT_MAX_MSG_SIZE = 5*1024*1024;
    private final String _kafkaHost;
    private final int _kafkaPort;
    private final int _kafkaSoTimeout;
    private volatile boolean _started = false;
    private final DataSourceFilter _dataConverter;
  
  public SimpleKafkaStreamDataProvider(Comparator versionComparator, String kafkaHost,int kafkaPort,int soTimeout,int batchSize,String topic,long startingOffset,DataSourceFilter dataConverter){
    super(versionComparator);
    _topic = topic;
    _startingOffset = startingOffset;
    _offset = startingOffset;
    super.setBatchSize(batchSize);
    _kafkaHost = kafkaHost;
    _kafkaPort = kafkaPort;
    _kafkaSoTimeout = soTimeout;
    _kafkaConsumer = null;
    _msgIter = null;
    _dataConverter = dataConverter;
    if (_dataConverter == null){
      throw new IllegalArgumentException("kafka data converter is null");
    }
     bytesFactory = new ThreadLocal(){
      @Override
      protected byte[] initialValue() {
        return new byte[DEFAULT_MAX_MSG_SIZE];
      }
    };
  }
  
  @Override
  public void setStartingOffset(String version){
      _offset = Long.parseLong(version);
  }
  
  private FetchRequest buildReq(){
    if (_offset<=0){
      long time = OffsetRequest.EarliestTime();
      if (_offset==-1){
        time = -OffsetRequest.LatestTime();
      }
      _offset = _kafkaConsumer.getOffsetsBefore(_topic, 0, time, 1)[0];
    }
    return new FetchRequest(_topic, 0, _offset,DEFAULT_MAX_MSG_SIZE );
  }
  
  @Override
  public DataEvent next() {
    if (!_started) return null;
    if(_msgIter==null || !_msgIter.hasNext()){
      if (logger.isDebugEnabled()){
        logger.debug("fetching new batch from offset: "+_offset);
      }
      FetchRequest req = buildReq();
      ByteBufferMessageSet msgSet = _kafkaConsumer.fetch(req);
      _msgIter = msgSet.iterator();
    }
    
    if (_msgIter==null || !_msgIter.hasNext() ) {
      if (logger.isDebugEnabled()){
        logger.debug("no more data, msgIter: "+_msgIter);
      }
      return null;
    }
    
    MessageAndOffset msg = _msgIter.next();
    if (logger.isDebugEnabled()){
      logger.debug("got new message: "+msg);
    }
    long version = _offset;
    _offset = msg.offset();
    
    JSONObject data;
    try {
      int size = msg.message().payloadSize();
      ByteBuffer byteBuffer = msg.message().payload();
      byte[] bytes = bytesFactory.get();
      byteBuffer.get(bytes,0,size);
      
      data = _dataConverter.filter(new DataPacket(bytes,0,size));
      
      if (logger.isDebugEnabled()){
        logger.debug("message converted: "+data);
      }
      return new DataEvent(data, String.valueOf(version));
    } catch (Exception e) {
      logger.error(e.getMessage(),e);
      return null;
    }
  }

  @Override
  public void reset() {
    _offset = _startingOffset;
  }

  @Override
  public void start() {
    _kafkaConsumer = new SimpleConsumer(_kafkaHost, _kafkaPort, _kafkaSoTimeout, DEFAULT_MAX_MSG_SIZE);
    super.start();
    _started = true;
  }

  @Override
  public void stop() {
    _started = false;
    try{
      if (_kafkaConsumer!=null){
        _kafkaConsumer.close();
      }
    }
    finally{
      super.stop(); 
    }
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy