All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.crunch.impl.spark.SparkComparator Maven / Gradle / Ivy

There is a newer version: 1.0.0
Show newest version
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.crunch.impl.spark;

import org.apache.avro.mapred.AvroKeyComparator;
import org.apache.crunch.CrunchRuntimeException;
import org.apache.crunch.GroupingOptions;
import org.apache.crunch.types.PGroupedTableType;
import org.apache.crunch.types.avro.AvroTypeFamily;
import org.apache.crunch.types.writable.WritableType;
import org.apache.hadoop.io.ByteWritable;
import org.apache.hadoop.io.RawComparator;
import org.apache.hadoop.io.WritableComparator;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.util.ReflectionUtils;

import java.io.IOException;
import java.io.Serializable;
import java.util.Comparator;

public class SparkComparator implements Comparator, Serializable {

  private final Class cmpClass;
  private final GroupingOptions options;
  private final PGroupedTableType ptype;
  private final SparkRuntimeContext ctxt;
  private transient RawComparator cmp;

  public SparkComparator(GroupingOptions options,
                         PGroupedTableType ptype,
                         SparkRuntimeContext ctxt) {
    if (options.getSortComparatorClass() != null) {
      this.cmpClass = options.getSortComparatorClass();
    } else if (AvroTypeFamily.getInstance().equals(ptype.getFamily())) {
      this.cmpClass = AvroKeyComparator.class;
    } else {
      this.cmpClass = null;
    }
    this.options = options;
    this.ptype = ptype;
    this.ctxt = ctxt;
  }

  @Override
  public int compare(ByteArray s1, ByteArray s2) {
    byte[] b1 = s1.value;
    byte[] b2 = s2.value;
    return rawComparator().compare(b1, 0, b1.length, b2, 0, b2.length);
  }

  private RawComparator rawComparator() {
    if (cmp == null) {
      try {
        ptype.initialize(ctxt.getConfiguration());
        Job job = new Job(ctxt.getConfiguration());
        ptype.configureShuffle(job, options);
        if (cmpClass != null) {
          cmp = ReflectionUtils.newInstance(cmpClass, job.getConfiguration());
        } else {
          cmp = WritableComparator.get(((WritableType) ptype.getTableType().getKeyType()).getSerializationClass());
          if (cmp == null) {
            cmp = new ByteWritable.Comparator();
          }
        }
      } catch (IOException e) {
        throw new CrunchRuntimeException("Error configuring sort comparator", e);
      }
    }
    return cmp;
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy