All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.drill.exec.physical.config.AbstractDeMuxExchange Maven / Gradle / Ivy

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.drill.exec.physical.config;

import java.util.List;
import java.util.Map;

import com.google.common.base.Preconditions;
import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import org.apache.drill.common.expression.LogicalExpression;
import org.apache.drill.exec.physical.EndpointAffinity;
import org.apache.drill.exec.physical.MinorFragmentEndpoint;
import org.apache.drill.exec.physical.base.AbstractExchange;
import org.apache.drill.exec.physical.base.PhysicalOperator;
import org.apache.drill.exec.physical.base.Sender;
import org.apache.drill.exec.planner.fragment.ParallelizationInfo;
import org.apache.drill.exec.proto.CoordinationProtos.DrillbitEndpoint;

import com.fasterxml.jackson.annotation.JsonProperty;

/**
 * DeMuxExchange is opposite of MuxExchange. It is used when the sender has overhead that is proportional to the
 * number of receivers. DeMuxExchange is run one instance per Drillbit endpoint which collects and distributes data
 * belonging to local receiving fragments running on the same Drillbit.
 *
 * Example:
 * On a 3 node cluster, if the sender has 10 receivers on each node each sender requires 30 buffers. By inserting
 * DeMuxExchange, we create one receiver per node which means total of 3 receivers for each sender. If the number of
 * senders is 10, we use 10*3 buffers instead of 10*30. DeMuxExchange has a overhead of buffer space that is equal to
 * number of local receivers. In this case each DeMuxExchange needs 10 buffers, so total of 3*10 buffers.
 */
public abstract class AbstractDeMuxExchange extends AbstractExchange {
  protected final LogicalExpression expr;

  // Ephemeral info used when creating execution fragments.
  protected Map receiverToSenderMapping;
  protected ArrayListMultimap senderToReceiversMapping;
  private boolean isSenderReceiverMappingCreated;

  public AbstractDeMuxExchange(@JsonProperty("child") PhysicalOperator child, @JsonProperty("expr") LogicalExpression expr) {
    super(child);
    this.expr = expr;
  }

  @JsonProperty("expr")
  public LogicalExpression getExpression(){
    return expr;
  }

  @Override
  public ParallelizationInfo getSenderParallelizationInfo(List receiverFragmentEndpoints) {
    Preconditions.checkArgument(receiverFragmentEndpoints != null && receiverFragmentEndpoints.size() > 0,
        "Receiver fragment endpoint list should not be empty");

    // We want to run one demux sender per Drillbit endpoint.
    // Identify the number of unique Drillbit endpoints in receiver fragment endpoints.
    List drillbitEndpoints = ImmutableSet.copyOf(receiverFragmentEndpoints).asList();

    List affinities = Lists.newArrayList();
    for(DrillbitEndpoint ep : drillbitEndpoints) {
      affinities.add(new EndpointAffinity(ep, Double.POSITIVE_INFINITY));
    }

    return ParallelizationInfo.create(affinities.size(), affinities.size(), affinities);
  }

  @Override
  public ParallelizationInfo getReceiverParallelizationInfo(List senderFragmentEndpoints) {
    return ParallelizationInfo.UNLIMITED_WIDTH_NO_ENDPOINT_AFFINITY;
  }

  @Override
  public Sender getSender(int minorFragmentId, PhysicalOperator child) {
    createSenderReceiverMapping();

    List receivers = senderToReceiversMapping.get(minorFragmentId);
    if (receivers == null || receivers.size() <= 0) {
      throw new IllegalStateException(String.format("Failed to find receivers for sender [%d]", minorFragmentId));
    }

    return new HashPartitionSender(receiverMajorFragmentId, child, expr, receivers);
  }

  /**
   * In DeMuxExchange, sender fragment parallelization and endpoint assignment depends on receiver fragment endpoint
   * assignments.
   */
  @Override
  public ParallelizationDependency getParallelizationDependency() {
    return ParallelizationDependency.SENDER_DEPENDS_ON_RECEIVER;
  }

  protected void createSenderReceiverMapping() {
    if (isSenderReceiverMappingCreated) {
      return;
    }

    senderToReceiversMapping = ArrayListMultimap.create();
    receiverToSenderMapping = Maps.newHashMap();

    // Find the list of receiver fragment ids assigned to each Drillbit endpoint
    ArrayListMultimap endpointReceiverList = ArrayListMultimap.create();

    int receiverFragmentId = 0;
    for(DrillbitEndpoint receiverLocation : receiverLocations) {
      endpointReceiverList.put(receiverLocation, receiverFragmentId);
      receiverFragmentId++;
    }

    int senderFragmentId = 0;
    for(DrillbitEndpoint senderLocation : senderLocations) {
      final List receiverMinorFragmentIds = endpointReceiverList.get(senderLocation);

      for(Integer receiverId : receiverMinorFragmentIds) {
        receiverToSenderMapping.put(receiverId, new MinorFragmentEndpoint(senderFragmentId, senderLocation));

        senderToReceiversMapping.put(senderFragmentId,
            new MinorFragmentEndpoint(receiverId, receiverLocations.get(receiverId)));
      }
      senderFragmentId++;
    }

    isSenderReceiverMappingCreated = true;
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy