![JAR search and dependency download from the Maven repository](/logo.png)
com.wtanaka.beam.StreamIO Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of beam Show documentation
Show all versions of beam Show documentation
Apache Beam Utility Code
/*
* com.wtanaka.beam
*
* Copyright (C) 2017 Wesley Tanaka
*
* This program is free software: you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see
* .
*/
package com.wtanaka.beam;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.Serializable;
import java.util.Collections;
import java.util.List;
import java.util.NoSuchElementException;
import org.apache.beam.sdk.coders.AvroCoder;
import org.apache.beam.sdk.coders.ByteArrayCoder;
import org.apache.beam.sdk.coders.Coder;
import org.apache.beam.sdk.coders.StringUtf8Coder;
import org.apache.beam.sdk.io.BoundedSource;
import org.apache.beam.sdk.io.Read;
import org.apache.beam.sdk.io.UnboundedSource;
import org.apache.beam.sdk.options.PipelineOptions;
import org.apache.beam.sdk.transforms.DoFn;
import org.apache.beam.sdk.transforms.PTransform;
import org.apache.beam.sdk.transforms.ParDo;
import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
import org.apache.beam.sdk.values.PBegin;
import org.apache.beam.sdk.values.PCollection;
import org.apache.beam.sdk.values.PDone;
import org.joda.time.Instant;
import com.wtanaka.beam.io.EmptyCheckpointMark;
/**
* Contains bindings between InputStream and PTransform, for experimenting
* with DirectRunner
*/
public class StreamIO
{
private enum HardCodedOutputStream implements Serializable
{
STDOUT, STDERR
}
private static final Coder DEFAULT_TEXT_CODER =
StringUtf8Coder.of();
private static class OutStreamDoFn extends DoFn
{
private final OutStreamT m_stream;
private final HardCodedOutputStream m_streamKey;
public OutStreamDoFn(final OutStreamT stream)
{
m_stream = stream;
m_streamKey = null;
assert (m_stream != null) ^ (m_streamKey != null);
}
public OutStreamDoFn(final HardCodedOutputStream streamKey)
{
m_streamKey = streamKey;
m_stream = null;
assert (m_stream != null) ^ (m_streamKey != null);
}
@ProcessElement
public void processElement(ProcessContext c) throws IOException
{
byte[] line = c.element();
OutputStream stream = m_stream;
// We can't store this in a map ahead of time because someone may
// have called System.setOut or System.setErr
if (stream == null)
{
switch (m_streamKey)
{
case STDOUT:
stream = System.out;
break;
case STDERR:
stream = System.err;
break;
}
}
assert stream != null;
stream.write(line);
stream.flush();
}
}
private static class OutTransform extends PTransform, PDone>
{
private final OutT m_stream;
private final HardCodedOutputStream m_streamKey;
private final PTransform, PDone> m_parDo;
public OutTransform(OutT stream)
{
m_stream = stream;
m_streamKey = null;
m_parDo = ParDo.of(new OutStreamDoFn(m_stream));
}
public OutTransform(HardCodedOutputStream streamKey)
{
m_stream = null;
m_streamKey = streamKey;
m_parDo = ParDo.of(new OutStreamDoFn(m_streamKey));
}
@Override
public PDone expand(final PCollection input)
{
input.apply(m_parDo);
return PDone.in(input.getPipeline());
}
}
/**
* BoundSource
*/
static class BoundSource extends
BoundedSource
{
private static final long serialVersionUID = 1L;
private final InT m_serializableInStream;
/**
* Source.Reader implementation for Stdin
*/
static class StdinBoundedReader
extends BoundedReader
{
private final InT m_stream;
private final BoundedSource m_source;
private final ByteArrayOutputStream m_buffer =
new ByteArrayOutputStream();
private StdinBoundedReader(final BoundedSource source,
InT stream)
{
m_source = source;
m_stream = stream;
}
@Override
public boolean advance() throws IOException
{
return readNext();
}
@Override
public void close()
{
}
@Override
public byte[] getCurrent() throws NoSuchElementException
{
return m_buffer.toByteArray();
}
@Override
public BoundedSource getCurrentSource()
{
return m_source;
}
private InputStream getStream()
{
return m_stream == null ? System.in : m_stream;
}
private boolean readNext() throws IOException
{
m_buffer.reset();
int ch = getStream().read();
if (ch == -1)
{
return false;
}
while (ch != -1 && ch != (int) '\n')
{
m_buffer.write(ch);
ch = getStream().read();
}
if (ch != -1)
{
m_buffer.write(ch);
}
return true;
}
@Override
public boolean start() throws IOException
{
return readNext();
}
}
BoundSource()
{
m_serializableInStream = null;
}
BoundSource(InT serializableInStream)
{
m_serializableInStream = serializableInStream;
}
@Override
public BoundedReader createReader(
final PipelineOptions options)
{
return new StdinBoundedReader(this, m_serializableInStream);
}
@Override
public Coder getDefaultOutputCoder()
{
return ByteArrayCoder.of();
}
@Override
public long getEstimatedSizeBytes(final PipelineOptions options)
throws IOException
{
final int estimate = m_serializableInStream != null ?
m_serializableInStream.available() : System.in.available();
return estimate;
}
@Override
public List extends BoundedSource> split(
final long desiredBundleSizeBytes, final PipelineOptions options)
{
return Collections.singletonList(this);
}
@Override
public void validate()
{
}
}
/**
* UnboundSource
*/
static class UnboundSource
extends UnboundedSource
{
private static final long serialVersionUID = 1L;
private InStreamT m_stream;
/**
* Source.Reader implementation for Stdin
*/
static class UnboundReader
extends UnboundedReader
{
final private UnboundedSource m_source;
private final InStreamT m_stream;
private final ByteArrayOutputStream m_buffer =
new ByteArrayOutputStream();
private Instant m_timestamp = BoundedWindow.TIMESTAMP_MIN_VALUE;
private Instant m_watermark = BoundedWindow.TIMESTAMP_MIN_VALUE;
UnboundReader(final UnboundedSource source)
{
this(source, null);
}
UnboundReader(final UnboundedSource source,
InStreamT stream)
{
m_source = source;
m_stream = stream;
}
@Override
public boolean advance() throws IOException
{
return readNext();
}
@Override
public void close() throws IOException
{
}
@Override
public CheckpointMark getCheckpointMark()
{
return () ->
{
};
}
@Override
public byte[] getCurrent() throws NoSuchElementException
{
return m_buffer.toByteArray();
}
@Override
public UnboundedSource getCurrentSource()
{
return m_source;
}
@Override
public Instant getCurrentTimestamp() throws NoSuchElementException
{
return m_timestamp;
}
private InputStream getStream()
{
return m_stream == null ? System.in : m_stream;
}
@Override
public Instant getWatermark()
{
try
{
throw new Exception("trace");
}
catch (Exception e)
{
e.printStackTrace();
}
// return m_watermark.plus(10000L);
return m_watermark;
}
private boolean readNext() throws IOException
{
m_buffer.reset();
int ch = getStream().read();
if (ch == -1)
{
m_watermark = BoundedWindow.TIMESTAMP_MAX_VALUE;
return false;
}
while (ch != -1 && ch != (int) '\n')
{
m_buffer.write(ch);
ch = getStream().read();
}
if (ch != -1)
{
m_buffer.write(ch);
}
m_watermark = m_timestamp = Instant.now();
return true;
}
@Override
public boolean start() throws IOException
{
return readNext();
}
}
public UnboundSource(final InStreamT in)
{
m_stream = in;
}
public UnboundSource()
{
m_stream = null;
}
@Override
public UnboundedReader createReader(
final PipelineOptions options,
final EmptyCheckpointMark ignored)
throws IOException
{
return new UnboundReader(this, m_stream);
}
@Override
public Coder getCheckpointMarkCoder()
{
return AvroCoder.of(EmptyCheckpointMark.class);
}
@Override
public Coder getDefaultOutputCoder()
{
return ByteArrayCoder.of();
}
@Override
public List extends UnboundedSource>
split(
final int desiredNumSplits, final PipelineOptions options)
throws Exception
{
return Collections.singletonList(this);
}
@Override
public String toString()
{
return "[StdinIO.UnboundSource]";
}
@Override
public void validate()
{
}
}
public static PTransform> stdinBound()
{
return Read.from(new BoundSource());
}
public static PTransform> stdinUnbounded()
{
return Read.from(new UnboundSource());
}
public static PTransform, PDone> stderr()
{
return new OutTransform(HardCodedOutputStream.STDERR);
}
public static PTransform, PDone> stdout()
{
return new OutTransform(HardCodedOutputStream.STDOUT);
}
public static
PTransform, PDone> write(OutStreamT out)
{
return new OutTransform(out);
}
public static
PTransform> readUnbounded(InStreamT in)
{
return Read.from(new UnboundSource(in));
}
public static
PTransform>
readBound(final InStreamT in)
{
return Read.from(new BoundSource(in));
}
;
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy