All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.google.cloud.dataflow.sdk.runners.worker.PartitioningShuffleReader Maven / Gradle / Ivy

/*
 * Copyright (C) 2015 Google Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */

package com.google.cloud.dataflow.sdk.runners.worker;

import com.google.api.client.util.Preconditions;
import com.google.cloud.dataflow.sdk.coders.Coder;
import com.google.cloud.dataflow.sdk.coders.KvCoder;
import com.google.cloud.dataflow.sdk.options.PipelineOptions;
import com.google.cloud.dataflow.sdk.util.CoderUtils;
import com.google.cloud.dataflow.sdk.util.WindowedValue;
import com.google.cloud.dataflow.sdk.util.WindowedValue.WindowedValueCoder;
import com.google.cloud.dataflow.sdk.util.common.worker.AbstractBoundedReaderIterator;
import com.google.cloud.dataflow.sdk.util.common.worker.BatchingShuffleEntryReader;
import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntry;
import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntryReader;
import com.google.cloud.dataflow.sdk.values.KV;

import java.io.IOException;
import java.util.Iterator;

/**
 * A source that reads from a key-sharded dataset, and returns KVs without
 * any values grouping.
 *
 * @param  the type of the keys read from the shuffle
 * @param  the type of the values read from the shuffle
 */
public class PartitioningShuffleReader extends Reader>> {
  final byte[] shuffleReaderConfig;
  final String startShufflePosition;
  final String stopShufflePosition;
  Coder keyCoder;
  WindowedValueCoder windowedValueCoder;

  public PartitioningShuffleReader(PipelineOptions options, byte[] shuffleReaderConfig,
      String startShufflePosition, String stopShufflePosition, Coder>> coder)
      throws Exception {
    this.shuffleReaderConfig = shuffleReaderConfig;
    this.startShufflePosition = startShufflePosition;
    this.stopShufflePosition = stopShufflePosition;
    initCoder(coder);
  }

  /**
   * Given a {@code WindowedValueCoder>}, splits it into a coder for K
   * and a {@code WindowedValueCoder} with the same kind of windows.
   */
  private void initCoder(Coder>> coder) throws Exception {
    if (!(coder instanceof WindowedValueCoder)) {
      throw new Exception("unexpected kind of coder for WindowedValue: " + coder);
    }
    WindowedValueCoder> windowedElemCoder = ((WindowedValueCoder>) coder);
    Coder> elemCoder = windowedElemCoder.getValueCoder();
    if (!(elemCoder instanceof KvCoder)) {
      throw new Exception("unexpected kind of coder for elements read from "
          + "a key-partitioning shuffle: " + elemCoder);
    }
    @SuppressWarnings("unchecked")
    KvCoder kvCoder = (KvCoder) elemCoder;
    this.keyCoder = kvCoder.getKeyCoder();
    windowedValueCoder = windowedElemCoder.withValueCoder(kvCoder.getValueCoder());
  }

  @Override
  public ReaderIterator>> iterator() throws IOException {
    Preconditions.checkArgument(shuffleReaderConfig != null);
    return iterator(new BatchingShuffleEntryReader(
        new ChunkingShuffleBatchReader(new ApplianceShuffleReader(shuffleReaderConfig))));
  }

  ReaderIterator>> iterator(ShuffleEntryReader reader) {
    return new PartitioningShuffleReaderIterator(reader);
  }

  /**
   * A ReaderIterator that reads from a ShuffleEntryReader,
   * extracts K and {@code WindowedValue}, and returns a constructed
   * {@code WindowedValue}.
   */
  class PartitioningShuffleReaderIterator
      extends AbstractBoundedReaderIterator>> {
    Iterator iterator;

    PartitioningShuffleReaderIterator(ShuffleEntryReader reader) {
      this.iterator = reader.read(
          ByteArrayShufflePosition.fromBase64(startShufflePosition),
          ByteArrayShufflePosition.fromBase64(stopShufflePosition));
    }

    @Override
    protected boolean hasNextImpl() throws IOException {
      return iterator.hasNext();
    }

    @Override
    protected WindowedValue> nextImpl() throws IOException {
      ShuffleEntry record = iterator.next();
      K key = CoderUtils.decodeFromByteArray(keyCoder, record.getKey());
      WindowedValue windowedValue =
          CoderUtils.decodeFromByteArray(windowedValueCoder, record.getValue());
      notifyElementRead(record.length());
      return windowedValue.withValue(KV.of(key, windowedValue.getValue()));
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy