All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.google.cloud.genomics.dataflow.functions.pca.ExtractSimilarCallsets Maven / Gradle / Ivy

There is a newer version: v1-0.8
Show newest version
/*
 * Copyright (C) 2014 Google Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 * in compliance with the License. You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License
 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 * or implied. See the License for the specific language governing permissions and limitations under
 * the License.
 */
package com.google.cloud.genomics.dataflow.functions.pca;

import com.google.cloud.dataflow.sdk.transforms.DoFn;
import com.google.cloud.dataflow.sdk.values.KV;
import com.google.cloud.genomics.dataflow.utils.CallFilters;
import com.google.cloud.genomics.dataflow.utils.PairGenerator;
import com.google.common.base.Function;
import com.google.common.collect.FluentIterable;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMultiset;
import com.google.common.collect.Iterables;
import com.google.common.collect.Multiset;
import com.google.common.collect.Ordering;
import com.google.genomics.v1.Variant;
import com.google.genomics.v1.VariantCall;

/**
 * Emits a callset pair every time they share a variant.
 */
public class ExtractSimilarCallsets extends DoFn, Long>> {

  private ImmutableMultiset.Builder> accumulator;

  @Override
  public void startBundle(Context c) {
    accumulator = ImmutableMultiset.builder();
  }

  @Override
  public void processElement(ProcessContext context) {
    FluentIterable> pairs = PairGenerator.WITH_REPLACEMENT.allPairs(
        getSamplesWithVariant(context.element()), Ordering.natural());
    for (KV pair : pairs) {
      accumulator.add(pair);
    }
  }

  @Override
  public void finishBundle(Context context) {
    for (Multiset.Entry> entry : accumulator.build().entrySet()) {
      context.output(KV.of(entry.getElement(), Long.valueOf(entry.getCount())));
    }
  }

  protected ImmutableList getSamplesWithVariant(Variant variant) {
    return ImmutableList.copyOf(Iterables.transform(
        CallFilters.getSamplesWithVariantOfMinGenotype(variant, 1), new Function() {

          @Override
          public String apply(VariantCall call) {
            return call.getCallSetName();
          }

        }));
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy