All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.wtanaka.beam.StdinIO Maven / Gradle / Ivy

There is a newer version: v0.0.0-128-g8bec6d4
Show newest version
/*
 * com.wtanaka.beam
 *
 * Copyright (C) 2017 Wesley Tanaka 
 *
 * This program is free software: you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see
 * .
 */
package com.wtanaka.beam;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.Serializable;
import java.util.Collections;
import java.util.List;
import java.util.NoSuchElementException;

import javax.annotation.Nullable;

import org.apache.beam.sdk.coders.ByteArrayCoder;
import org.apache.beam.sdk.coders.Coder;
import org.apache.beam.sdk.io.BoundedSource;
import org.apache.beam.sdk.io.Read;
import org.apache.beam.sdk.io.UnboundedSource;
import org.apache.beam.sdk.options.PipelineOptions;
import org.apache.beam.sdk.transforms.PTransform;
import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
import org.apache.beam.sdk.values.PBegin;
import org.apache.beam.sdk.values.PCollection;
import org.joda.time.Instant;

/**
 * Code for incorporating System.in into Beam, primarily for experimenting
 * and learning with DirectRunner
 */
public class StdinIO
{
   /**
    * BoundSource
    */
   static class BoundSource extends BoundedSource
   {
      private static final long serialVersionUID = 1L;
      private final InputStream m_serializableInStream;

      /**
       * Source.Reader implementation for Stdin
       */
      static class StdinBoundedReader extends BoundedReader
      {
         private final InputStream m_stream;
         private final BoundedSource m_source;
         private final ByteArrayOutputStream m_buffer =
            new ByteArrayOutputStream();

         private StdinBoundedReader(final BoundedSource source,
                                    InputStream stream)
         {
            m_source = source;
            m_stream = stream;
         }

         @Override
         public boolean advance() throws IOException
         {
            return readNext();
         }

         @Override
         public void close()
         {

         }

         @Override
         public byte[] getCurrent() throws NoSuchElementException
         {
            return m_buffer.toByteArray();
         }

         @Override
         public BoundedSource getCurrentSource()
         {
            return m_source;
         }

         private InputStream getStream()
         {
            return m_stream == null ? System.in : m_stream;
         }

         private boolean readNext() throws IOException
         {
            m_buffer.reset();
            int ch = getStream().read();
            if (ch == -1)
            {
               return false;
            }
            while (ch != -1 && ch != (int) '\n')
            {
               m_buffer.write(ch);
               ch = getStream().read();
            }
            if (ch != -1)
            {
               m_buffer.write(ch);
            }
            return true;
         }

         @Override
         public boolean start() throws IOException
         {
            return readNext();
         }
      }

      BoundSource()
      {
         m_serializableInStream = null;
      }

      BoundSource(InputStream serializableInStream)
      {
         assert serializableInStream instanceof Serializable :
            serializableInStream + " is not Serializable";
         m_serializableInStream = serializableInStream;
      }

      @Override
      public BoundedReader createReader(
         final PipelineOptions options)
      {
         return new StdinBoundedReader(this, m_serializableInStream);
      }

      @Override
      public Coder getDefaultOutputCoder()
      {
         return ByteArrayCoder.of();
      }

      @Override
      public long getEstimatedSizeBytes(final PipelineOptions options)
         throws IOException
      {
         return m_serializableInStream != null ?
            m_serializableInStream.available() : System.in.available();
      }

      @Override
      public List> splitIntoBundles(
         final long desiredBundleSizeBytes, final PipelineOptions options)
      {
         return Collections.singletonList(this);
      }

      @Override
      public void validate()
      {

      }
   }

   /**
    * UnboundSource
    */
   static class UnboundSource extends UnboundedSource
   {
      private static final long serialVersionUID = 1L;

      /**
       * Source.Reader implementation for Stdin
       */
      static class UnboundReader extends UnboundedReader
      {
         final private UnboundedSource m_source;
         private final InputStream m_stream;
         private final ByteArrayOutputStream m_buffer =
            new ByteArrayOutputStream();
         private Instant m_timestamp = BoundedWindow.TIMESTAMP_MIN_VALUE;
         private Instant m_watermark = BoundedWindow.TIMESTAMP_MIN_VALUE;

         UnboundReader(final UnboundedSource source)
         {
            this(source, null);
         }

         UnboundReader(final UnboundedSource source,
                       InputStream stream)
         {
            m_source = source;
            m_stream = stream;
         }

         @Override
         public boolean advance() throws IOException
         {
            return readNext();
         }

         @Override
         public void close() throws IOException
         {
         }

         @Override
         public CheckpointMark getCheckpointMark()
         {
            return () ->
            {
            };
         }

         @Override
         public byte[] getCurrent() throws NoSuchElementException
         {
            return m_buffer.toByteArray();
         }

         @Override
         public UnboundedSource getCurrentSource()
         {
            return m_source;
         }

         @Override
         public Instant getCurrentTimestamp() throws NoSuchElementException
         {
            return m_timestamp;
         }

         private InputStream getStream()
         {
            return m_stream == null ? System.in : m_stream;
         }

         @Override
         public Instant getWatermark()
         {
            // return m_watermark.plus(10000L);
            return m_watermark;
         }

         private boolean readNext() throws IOException
         {
            m_buffer.reset();
            int ch = getStream().read();
            if (ch == -1)
            {
               m_watermark = BoundedWindow.TIMESTAMP_MAX_VALUE;
               return false;
            }
            while (ch != -1 && ch != (int) '\n')
            {
               m_buffer.write(ch);
               ch = getStream().read();
            }
            if (ch != -1)
            {
               m_buffer.write(ch);
            }
            m_watermark = m_timestamp = Instant.now();
            return true;
         }

         @Override
         public boolean start() throws IOException
         {
            return readNext();
         }
      }

      @Override
      public UnboundedReader createReader(
         final PipelineOptions options,
         @Nullable final CheckpointMark ignored)
         throws IOException
      {
         return new UnboundReader(this);
      }

      @Override
      public List>
      generateInitialSplits(
         final int desiredNumSplits, final PipelineOptions options)
         throws Exception
      {
         return Collections.singletonList(this);
      }

      @Nullable
      @Override
      public Coder getCheckpointMarkCoder()
      {
         return null;
      }

      @Override
      public Coder getDefaultOutputCoder()
      {
         return ByteArrayCoder.of();
      }

      @Override
      public String toString()
      {
         return "[StdinIO.UnboundSource]";
      }

      @Override
      public void validate()
      {
      }
   }

   public static PTransform> readBound()
   {
      return Read.from(new BoundSource());
   }

   public static PTransform> readUnbounded()
   {
      return Read.from(new UnboundSource());
   }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy