Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
Google Cloud Dataflow Java SDK provides a simple, Java-based
interface for processing virtually any size data using Google cloud
resources. This artifact includes entire Dataflow Java SDK.
/*
* Copyright (C) 2015 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package com.google.cloud.dataflow.sdk.transforms.join;
import static com.google.cloud.dataflow.sdk.util.Structs.addObject;
import com.google.cloud.dataflow.sdk.coders.Coder;
import com.google.cloud.dataflow.sdk.coders.CoderException;
import com.google.cloud.dataflow.sdk.coders.IterableCoder;
import com.google.cloud.dataflow.sdk.coders.StandardCoder;
import com.google.cloud.dataflow.sdk.util.CloudObject;
import com.google.cloud.dataflow.sdk.util.PropertyNames;
import com.google.cloud.dataflow.sdk.util.common.Reiterator;
import com.google.cloud.dataflow.sdk.values.TupleTag;
import com.google.cloud.dataflow.sdk.values.TupleTagList;
import com.google.common.base.Preconditions;
import com.google.common.collect.Iterators;
import com.google.common.collect.PeekingIterator;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Objects;
/**
* A row result of a {@link CoGroupByKey}. This is a tuple of {@link Iterable}s produced for
* a given key, and these can be accessed in different ways.
*/
public class CoGbkResult {
/**
* A map of integer union tags to a list of union objects.
* Note: the key and the embedded union tag are the same, so it is redundant
* to store it multiple times, but for now it makes encoding easier.
*/
private final List> valueMap;
private final CoGbkResultSchema schema;
private static final int DEFAULT_IN_MEMORY_ELEMENT_COUNT = 10_000;
private static final Logger LOG = LoggerFactory.getLogger(CoGbkResult.class);
/**
* A row in the {@link PCollection} resulting from a {@link CoGroupByKey} transform.
* Currently, this row must fit into memory.
*
* @param schema the set of tuple tags used to refer to input tables and
* result values
* @param taggedValues the raw results from a group-by-key
*/
public CoGbkResult(
CoGbkResultSchema schema,
Iterable taggedValues) {
this(schema, taggedValues, DEFAULT_IN_MEMORY_ELEMENT_COUNT);
}
@SuppressWarnings("unchecked")
public CoGbkResult(
CoGbkResultSchema schema,
Iterable taggedValues,
int inMemoryElementCount) {
this.schema = schema;
valueMap = new ArrayList<>();
for (int unionTag = 0; unionTag < schema.size(); unionTag++) {
valueMap.add(new ArrayList<>());
}
// Demultiplex the first imMemoryElementCount tagged union values
// according to their tag.
final Iterator taggedIter = taggedValues.iterator();
int elementCount = 0;
while (taggedIter.hasNext()) {
if (elementCount++ >= inMemoryElementCount && taggedIter instanceof Reiterator) {
// Let the tails be lazy.
break;
}
RawUnionValue value = taggedIter.next();
// Make sure the given union tag has a corresponding tuple tag in the
// schema.
int unionTag = value.getUnionTag();
if (schema.size() <= unionTag) {
throw new IllegalStateException("union tag " + unionTag +
" has no corresponding tuple tag in the result schema");
}
List