com.google.cloud.dataflow.sdk.runners.inprocess.InProcessCreate Maven / Gradle / Ivy
Show all versions of google-cloud-dataflow-java-sdk-all Show documentation
/*
* Copyright (C) 2015 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package com.google.cloud.dataflow.sdk.runners.inprocess;
import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
import com.google.cloud.dataflow.sdk.coders.Coder;
import com.google.cloud.dataflow.sdk.coders.CoderException;
import com.google.cloud.dataflow.sdk.io.BoundedSource;
import com.google.cloud.dataflow.sdk.io.OffsetBasedSource;
import com.google.cloud.dataflow.sdk.io.OffsetBasedSource.OffsetBasedReader;
import com.google.cloud.dataflow.sdk.io.Read;
import com.google.cloud.dataflow.sdk.options.PipelineOptions;
import com.google.cloud.dataflow.sdk.transforms.Create;
import com.google.cloud.dataflow.sdk.transforms.Create.Values;
import com.google.cloud.dataflow.sdk.transforms.PTransform;
import com.google.cloud.dataflow.sdk.util.CoderUtils;
import com.google.cloud.dataflow.sdk.values.PCollection;
import com.google.cloud.dataflow.sdk.values.PInput;
import com.google.cloud.dataflow.sdk.values.POutput;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Optional;
import com.google.common.collect.ImmutableList;
import java.io.IOException;
import java.util.List;
import java.util.NoSuchElementException;
import javax.annotation.Nullable;
/**
* An in-process implementation of the {@link Values Create.Values} {@link PTransform}, implemented
* using a {@link BoundedSource}.
*
* The coder is inferred via the {@link Values#getDefaultOutputCoder(PInput)} method on the
* original transform.
*/
class InProcessCreate extends ForwardingPTransform> {
private final Create.Values original;
/**
* A {@link PTransformOverrideFactory} for {@link InProcessCreate}.
*/
public static class InProcessCreateOverrideFactory implements PTransformOverrideFactory {
@Override
public PTransform override(
PTransform transform) {
if (transform instanceof Create.Values) {
@SuppressWarnings("unchecked")
PTransform override =
(PTransform) from((Create.Values) transform);
return override;
}
return transform;
}
}
public static InProcessCreate from(Create.Values original) {
return new InProcessCreate<>(original);
}
private InProcessCreate(Values original) {
this.original = original;
}
@Override
public PCollection apply(PInput input) {
Coder elementCoder;
try {
elementCoder = original.getDefaultOutputCoder(input);
} catch (CannotProvideCoderException e) {
throw new IllegalArgumentException(
"Unable to infer a coder and no Coder was specified. "
+ "Please set a coder by invoking Create.withCoder() explicitly.",
e);
}
InMemorySource source;
try {
source = InMemorySource.fromIterable(original.getElements(), elementCoder);
} catch (IOException e) {
throw new RuntimeException(e);
}
PCollection result = input.getPipeline().apply(Read.from(source));
result.setCoder(elementCoder);
return result;
}
@Override
public PTransform> delegate() {
return original;
}
@VisibleForTesting
static class InMemorySource extends OffsetBasedSource {
private final List allElementsBytes;
private final long totalSize;
private final Coder coder;
public static InMemorySource fromIterable(Iterable elements, Coder elemCoder)
throws CoderException, IOException {
ImmutableList.Builder allElementsBytes = ImmutableList.builder();
long totalSize = 0L;
for (T element : elements) {
byte[] bytes = CoderUtils.encodeToByteArray(elemCoder, element);
allElementsBytes.add(bytes);
totalSize += bytes.length;
}
return new InMemorySource<>(allElementsBytes.build(), totalSize, elemCoder);
}
/**
* Create a new source with the specified bytes. The new source owns the input element bytes,
* which must not be modified after this constructor is called.
*/
private InMemorySource(List elementBytes, long totalSize, Coder coder) {
super(0, elementBytes.size(), 1);
this.allElementsBytes = ImmutableList.copyOf(elementBytes);
this.totalSize = totalSize;
this.coder = coder;
}
@Override
public long getEstimatedSizeBytes(PipelineOptions options) throws Exception {
return totalSize;
}
@Override
public boolean producesSortedKeys(PipelineOptions options) throws Exception {
return false;
}
@Override
public BoundedSource.BoundedReader createReader(PipelineOptions options) throws IOException {
return new BytesReader<>(this);
}
@Override
public void validate() {}
@Override
public Coder getDefaultOutputCoder() {
return coder;
}
@Override
public long getMaxEndOffset(PipelineOptions options) throws Exception {
return allElementsBytes.size();
}
@Override
public OffsetBasedSource createSourceForSubrange(long start, long end) {
List primaryElems = allElementsBytes.subList((int) start, (int) end);
long primarySizeEstimate =
(long) (totalSize * primaryElems.size() / (double) allElementsBytes.size());
return new InMemorySource<>(primaryElems, primarySizeEstimate, coder);
}
@Override
public long getBytesPerOffset() {
if (allElementsBytes.size() == 0) {
return 1L;
}
return Math.max(1L, totalSize / allElementsBytes.size());
}
}
private static class BytesReader extends OffsetBasedReader {
private int index;
/**
* Use an optional to distinguish between null next element (as Optional.absent()) and no next
* element (next is null).
*/
@Nullable private Optional next;
public BytesReader(InMemorySource source) {
super(source);
index = -1;
}
@Override
@Nullable
public T getCurrent() throws NoSuchElementException {
if (next == null) {
throw new NoSuchElementException();
}
return next.orNull();
}
@Override
public void close() throws IOException {}
@Override
protected long getCurrentOffset() {
return index;
}
@Override
protected boolean startImpl() throws IOException {
return advanceImpl();
}
@Override
public synchronized InMemorySource getCurrentSource() {
return (InMemorySource) super.getCurrentSource();
}
@Override
protected boolean advanceImpl() throws IOException {
InMemorySource source = getCurrentSource();
index++;
if (index >= source.allElementsBytes.size()) {
return false;
}
next =
Optional.fromNullable(
CoderUtils.decodeFromByteArray(
source.coder, source.allElementsBytes.get(index)));
return true;
}
}
}