All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.google.cloud.dataflow.sdk.runners.inprocess.InProcessCreate Maven / Gradle / Ivy

Go to download

Google Cloud Dataflow Java SDK provides a simple, Java-based interface for processing virtually any size data using Google cloud resources. This artifact includes entire Dataflow Java SDK.

There is a newer version: 2.5.0
Show newest version
/*
 * Copyright (C) 2015 Google Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */
package com.google.cloud.dataflow.sdk.runners.inprocess;

import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
import com.google.cloud.dataflow.sdk.coders.Coder;
import com.google.cloud.dataflow.sdk.coders.CoderException;
import com.google.cloud.dataflow.sdk.io.BoundedSource;
import com.google.cloud.dataflow.sdk.io.OffsetBasedSource;
import com.google.cloud.dataflow.sdk.io.OffsetBasedSource.OffsetBasedReader;
import com.google.cloud.dataflow.sdk.io.Read;
import com.google.cloud.dataflow.sdk.options.PipelineOptions;
import com.google.cloud.dataflow.sdk.transforms.Create;
import com.google.cloud.dataflow.sdk.transforms.Create.Values;
import com.google.cloud.dataflow.sdk.transforms.PTransform;
import com.google.cloud.dataflow.sdk.util.CoderUtils;
import com.google.cloud.dataflow.sdk.values.PCollection;
import com.google.cloud.dataflow.sdk.values.PInput;
import com.google.cloud.dataflow.sdk.values.POutput;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Optional;
import com.google.common.collect.ImmutableList;

import java.io.IOException;
import java.util.List;
import java.util.NoSuchElementException;

import javax.annotation.Nullable;

/**
 * An in-process implementation of the {@link Values Create.Values} {@link PTransform}, implemented
 * using a {@link BoundedSource}.
 *
 * 

The coder is inferred via the {@link Values#getDefaultOutputCoder(PInput)} method on the * original transform. */ class InProcessCreate extends ForwardingPTransform> { private final Create.Values original; /** * A {@link PTransformOverrideFactory} for {@link InProcessCreate}. */ public static class InProcessCreateOverrideFactory implements PTransformOverrideFactory { @Override public PTransform override( PTransform transform) { if (transform instanceof Create.Values) { @SuppressWarnings("unchecked") PTransform override = (PTransform) from((Create.Values) transform); return override; } return transform; } } public static InProcessCreate from(Create.Values original) { return new InProcessCreate<>(original); } private InProcessCreate(Values original) { this.original = original; } @Override public PCollection apply(PInput input) { Coder elementCoder; try { elementCoder = original.getDefaultOutputCoder(input); } catch (CannotProvideCoderException e) { throw new IllegalArgumentException( "Unable to infer a coder and no Coder was specified. " + "Please set a coder by invoking Create.withCoder() explicitly.", e); } InMemorySource source; try { source = InMemorySource.fromIterable(original.getElements(), elementCoder); } catch (IOException e) { throw new RuntimeException(e); } PCollection result = input.getPipeline().apply(Read.from(source)); result.setCoder(elementCoder); return result; } @Override public PTransform> delegate() { return original; } @VisibleForTesting static class InMemorySource extends OffsetBasedSource { private final List allElementsBytes; private final long totalSize; private final Coder coder; public static InMemorySource fromIterable(Iterable elements, Coder elemCoder) throws CoderException, IOException { ImmutableList.Builder allElementsBytes = ImmutableList.builder(); long totalSize = 0L; for (T element : elements) { byte[] bytes = CoderUtils.encodeToByteArray(elemCoder, element); allElementsBytes.add(bytes); totalSize += bytes.length; } return new InMemorySource<>(allElementsBytes.build(), totalSize, elemCoder); } /** * Create a new source with the specified bytes. The new source owns the input element bytes, * which must not be modified after this constructor is called. */ private InMemorySource(List elementBytes, long totalSize, Coder coder) { super(0, elementBytes.size(), 1); this.allElementsBytes = ImmutableList.copyOf(elementBytes); this.totalSize = totalSize; this.coder = coder; } @Override public long getEstimatedSizeBytes(PipelineOptions options) throws Exception { return totalSize; } @Override public boolean producesSortedKeys(PipelineOptions options) throws Exception { return false; } @Override public BoundedSource.BoundedReader createReader(PipelineOptions options) throws IOException { return new BytesReader<>(this); } @Override public void validate() {} @Override public Coder getDefaultOutputCoder() { return coder; } @Override public long getMaxEndOffset(PipelineOptions options) throws Exception { return allElementsBytes.size(); } @Override public OffsetBasedSource createSourceForSubrange(long start, long end) { List primaryElems = allElementsBytes.subList((int) start, (int) end); long primarySizeEstimate = (long) (totalSize * primaryElems.size() / (double) allElementsBytes.size()); return new InMemorySource<>(primaryElems, primarySizeEstimate, coder); } @Override public long getBytesPerOffset() { if (allElementsBytes.size() == 0) { return 1L; } return Math.max(1L, totalSize / allElementsBytes.size()); } } private static class BytesReader extends OffsetBasedReader { private int index; /** * Use an optional to distinguish between null next element (as Optional.absent()) and no next * element (next is null). */ @Nullable private Optional next; public BytesReader(InMemorySource source) { super(source); index = -1; } @Override @Nullable public T getCurrent() throws NoSuchElementException { if (next == null) { throw new NoSuchElementException(); } return next.orNull(); } @Override public void close() throws IOException {} @Override protected long getCurrentOffset() { return index; } @Override protected boolean startImpl() throws IOException { return advanceImpl(); } @Override public synchronized InMemorySource getCurrentSource() { return (InMemorySource) super.getCurrentSource(); } @Override protected boolean advanceImpl() throws IOException { InMemorySource source = getCurrentSource(); index++; if (index >= source.allElementsBytes.size()) { return false; } next = Optional.fromNullable( CoderUtils.decodeFromByteArray( source.coder, source.allElementsBytes.get(index))); return true; } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy