
com.wtanaka.beam.StdinIO Maven / Gradle / Ivy
/*
* com.wtanaka.beam
*
* Copyright (C) 2017 Wesley Tanaka
*
* This program is free software: you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see
* .
*/
package com.wtanaka.beam;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.Serializable;
import java.util.Collections;
import java.util.List;
import java.util.NoSuchElementException;
import javax.annotation.Nullable;
import org.apache.beam.sdk.coders.ByteArrayCoder;
import org.apache.beam.sdk.coders.Coder;
import org.apache.beam.sdk.io.BoundedSource;
import org.apache.beam.sdk.io.Read;
import org.apache.beam.sdk.io.UnboundedSource;
import org.apache.beam.sdk.options.PipelineOptions;
import org.apache.beam.sdk.transforms.PTransform;
import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
import org.apache.beam.sdk.values.PBegin;
import org.apache.beam.sdk.values.PCollection;
import org.joda.time.Instant;
/**
* Code for incorporating System.in into Beam, primarily for experimenting
* and learning with DirectRunner
*/
public class StdinIO
{
/**
* BoundSource
*/
static class BoundSource extends BoundedSource
{
private static final long serialVersionUID = 1L;
private final InputStream m_serializableInStream;
/**
* Source.Reader implementation for Stdin
*/
static class StdinBoundedReader extends BoundedReader
{
private final InputStream m_stream;
private final BoundedSource m_source;
private final ByteArrayOutputStream m_buffer =
new ByteArrayOutputStream();
private StdinBoundedReader(final BoundedSource source,
InputStream stream)
{
m_source = source;
m_stream = stream;
}
@Override
public boolean advance() throws IOException
{
return readNext();
}
@Override
public void close()
{
}
@Override
public byte[] getCurrent() throws NoSuchElementException
{
return m_buffer.toByteArray();
}
@Override
public BoundedSource getCurrentSource()
{
return m_source;
}
private InputStream getStream()
{
return m_stream == null ? System.in : m_stream;
}
private boolean readNext() throws IOException
{
m_buffer.reset();
int ch = getStream().read();
if (ch == -1)
{
return false;
}
while (ch != -1 && ch != (int) '\n')
{
m_buffer.write(ch);
ch = getStream().read();
}
if (ch != -1)
{
m_buffer.write(ch);
}
return true;
}
@Override
public boolean start() throws IOException
{
return readNext();
}
}
BoundSource()
{
m_serializableInStream = null;
}
BoundSource(InputStream serializableInStream)
{
assert serializableInStream instanceof Serializable :
serializableInStream + " is not Serializable";
m_serializableInStream = serializableInStream;
}
@Override
public BoundedReader createReader(
final PipelineOptions options)
{
return new StdinBoundedReader(this, m_serializableInStream);
}
@Override
public Coder getDefaultOutputCoder()
{
return ByteArrayCoder.of();
}
@Override
public long getEstimatedSizeBytes(final PipelineOptions options)
throws IOException
{
return m_serializableInStream != null ?
m_serializableInStream.available() : System.in.available();
}
@Override
public List extends BoundedSource> splitIntoBundles(
final long desiredBundleSizeBytes, final PipelineOptions options)
{
return Collections.singletonList(this);
}
@Override
public void validate()
{
}
}
/**
* UnboundSource
*/
static class UnboundSource extends UnboundedSource
{
private static final long serialVersionUID = 1L;
/**
* Source.Reader implementation for Stdin
*/
static class UnboundReader extends UnboundedReader
{
final private UnboundedSource m_source;
private final InputStream m_stream;
private final ByteArrayOutputStream m_buffer =
new ByteArrayOutputStream();
private Instant m_timestamp = BoundedWindow.TIMESTAMP_MIN_VALUE;
private Instant m_watermark = BoundedWindow.TIMESTAMP_MIN_VALUE;
UnboundReader(final UnboundedSource source)
{
this(source, null);
}
UnboundReader(final UnboundedSource source,
InputStream stream)
{
m_source = source;
m_stream = stream;
}
@Override
public boolean advance() throws IOException
{
return readNext();
}
@Override
public void close() throws IOException
{
}
@Override
public CheckpointMark getCheckpointMark()
{
return () ->
{
};
}
@Override
public byte[] getCurrent() throws NoSuchElementException
{
return m_buffer.toByteArray();
}
@Override
public UnboundedSource getCurrentSource()
{
return m_source;
}
@Override
public Instant getCurrentTimestamp() throws NoSuchElementException
{
return m_timestamp;
}
private InputStream getStream()
{
return m_stream == null ? System.in : m_stream;
}
@Override
public Instant getWatermark()
{
// return m_watermark.plus(10000L);
return m_watermark;
}
private boolean readNext() throws IOException
{
m_buffer.reset();
int ch = getStream().read();
if (ch == -1)
{
m_watermark = BoundedWindow.TIMESTAMP_MAX_VALUE;
return false;
}
while (ch != -1 && ch != (int) '\n')
{
m_buffer.write(ch);
ch = getStream().read();
}
if (ch != -1)
{
m_buffer.write(ch);
}
m_watermark = m_timestamp = Instant.now();
return true;
}
@Override
public boolean start() throws IOException
{
return readNext();
}
}
@Override
public UnboundedReader createReader(
final PipelineOptions options,
@Nullable final CheckpointMark ignored)
throws IOException
{
return new UnboundReader(this);
}
@Override
public List extends UnboundedSource>
generateInitialSplits(
final int desiredNumSplits, final PipelineOptions options)
throws Exception
{
return Collections.singletonList(this);
}
@Nullable
@Override
public Coder getCheckpointMarkCoder()
{
return null;
}
@Override
public Coder getDefaultOutputCoder()
{
return ByteArrayCoder.of();
}
@Override
public String toString()
{
return "[StdinIO.UnboundSource]";
}
@Override
public void validate()
{
}
}
public static PTransform> readBound()
{
return Read.from(new BoundSource());
}
public static PTransform> readUnbounded()
{
return Read.from(new UnboundSource());
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy