All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.datatorrent.lib.io.fs.AbstractReconciler Maven / Gradle / Ivy

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package com.datatorrent.lib.io.fs;

import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Queue;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.atomic.AtomicReference;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.collect.Maps;
import com.google.common.collect.Queues;

import com.datatorrent.api.Context;
import com.datatorrent.api.DefaultInputPort;
import com.datatorrent.api.Operator.CheckpointListener;
import com.datatorrent.api.Operator.IdleTimeHandler;
import com.datatorrent.common.util.BaseOperator;
import com.datatorrent.common.util.NameableThreadFactory;
import com.datatorrent.netlet.util.DTThrowable;

/**
 * This base operator queues input tuples for each window and asynchronously processes them after the window is committed.
 *
 * The operator holds all the tuple info in memory until the committed window and then calls the processCommittedData method
 * to give an opportunity to process tuple info from each committed window.
 *
 * This operator can be implemented to asynchronously read and process data that is being written by current application.
 *
 * Use case examples: write to relational database, write to an external queue etc without blocking the dag i/o.
 *
 * @param       input type
 * @param  tuple enqueued each window to be processed after window is committed
 * @since 2.0.0
 */
@org.apache.hadoop.classification.InterfaceStability.Evolving
public abstract class AbstractReconciler extends BaseOperator implements CheckpointListener, IdleTimeHandler
{
  private static final Logger logger = LoggerFactory.getLogger(AbstractReconciler.class);
  public transient DefaultInputPort input = new DefaultInputPort()
  {
    @Override
    public void process(INPUT input)
    {
      processTuple(input);
    }

  };
  protected transient ExecutorService executorService;
  protected long currentWindowId;
  protected transient int spinningTime = 10;
  // this stores the mapping from the window to the list of enqueued tuples
  private Map> currentWindowTuples = Maps.newConcurrentMap();
  private Queue currentWindows = Queues.newLinkedBlockingQueue();
  protected Queue committedTuples = Queues.newLinkedBlockingQueue();
  protected transient Queue doneTuples = Queues.newLinkedBlockingQueue();
  private transient Queue waitingTuples = Queues.newLinkedBlockingQueue();
  private transient volatile boolean execute;
  private transient AtomicReference cause;

  @Override
  public void setup(Context.OperatorContext context)
  {
    if (context != null) {
      spinningTime = context.getValue(Context.OperatorContext.SPIN_MILLIS);
    }
    execute = true;
    cause = new AtomicReference();
    waitingTuples.addAll(committedTuples);
    executorService = Executors.newSingleThreadExecutor(new NameableThreadFactory("Reconciler-Helper"));
    executorService.submit(processEnqueuedData());
  }

  @Override
  public void beginWindow(long windowId)
  {
    currentWindowId = windowId;
    currentWindowTuples.put(currentWindowId, new ArrayList());
    currentWindows.add(windowId);
  }

  @Override
  public void endWindow()
  {
    while (doneTuples.peek() != null) {
      committedTuples.remove(doneTuples.poll());
    }
  }

  @Override
  public void handleIdleTime()
  {
    if (execute) {
      try {
        Thread.sleep(spinningTime);
      } catch (InterruptedException ie) {
        throw new RuntimeException(ie);
      }
    } else {
      logger.error("Exception: ", cause);
      DTThrowable.rethrow(cause.get());
    }

  }

  @Override
  public void checkpointed(long l)
  {
  }

  @Override
  public void committed(long l)
  {
    logger.debug(" current committed window {}", l);
    if (currentWindows.isEmpty()) {
      return;
    }
    long processedWindowId = currentWindows.peek();
    while (processedWindowId <= l) {
      List outputDataList = currentWindowTuples.get(processedWindowId);
      if (outputDataList != null && !outputDataList.isEmpty()) {
        committedTuples.addAll(outputDataList);
        waitingTuples.addAll(outputDataList);
      }
      currentWindows.remove();
      currentWindowTuples.remove(processedWindowId);
      if (currentWindows.isEmpty()) {
        return;
      }
      processedWindowId = currentWindows.peek();
    }
  }

  @Override
  public void teardown()
  {
    execute = false;
    executorService.shutdownNow();
  }

  private Runnable processEnqueuedData()
  {
    return new Runnable()
    {
      @Override
      public void run()
      {
        try {
          while (execute) {
            while (waitingTuples.isEmpty()) {
              Thread.sleep(spinningTime);
            }
            QUEUETUPLE output = waitingTuples.remove();
            processCommittedData(output);
            doneTuples.add(output);
          }
        } catch (Throwable e) {
          cause.set(e);
          execute = false;
        }
      }
    };
  }

  /**
   * The implementation class should call this method to enqueue output once input is converted to queue input.
   *
   * The queueTuple is processed once the window in which queueTuple is enqueued is committed.
   *
   * @param queueTuple
   */
  protected void enqueueForProcessing(QUEUETUPLE queueTuple)
  {
    currentWindowTuples.get(currentWindowId).add(queueTuple);
  }

  /**
   * Process input tuple
   *
   * @param input
   */
  protected abstract void processTuple(INPUT input);

  /**
   * This method is called once the window in which queueTuple was created is committed.
   * Implement this method to define the functionality to synchronize data.
   *
   * @param queueInput
   */
  protected abstract void processCommittedData(QUEUETUPLE queueInput);
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy