All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.drill.exec.physical.config.AbstractMuxExchange Maven / Gradle / Ivy

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.drill.exec.physical.config;

import java.util.List;
import java.util.Map;

import com.google.common.base.Preconditions;
import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import org.apache.drill.exec.physical.EndpointAffinity;
import org.apache.drill.exec.physical.MinorFragmentEndpoint;
import org.apache.drill.exec.physical.base.AbstractExchange;
import org.apache.drill.exec.physical.base.PhysicalOperator;
import org.apache.drill.exec.physical.base.Sender;
import org.apache.drill.exec.planner.fragment.ParallelizationInfo;
import org.apache.drill.exec.proto.CoordinationProtos.DrillbitEndpoint;

import com.fasterxml.jackson.annotation.JsonProperty;

/**
 * Multiplexing Exchange (MuxExchange) is used when results from multiple minor fragments belonging to the same
 * major fragment running on a node need to be collected at one fragment on the same node before distributing the
 * results further. This helps when the sender that is distributing the results has overhead that is proportional to
 * the number of sender instances. An example of such sender is PartitionSender. Each instance of PartitionSender
 * allocates "r" buffers where "r" is the number of receivers.
 *
 * Ex. Drillbit A is assigned 10 minor fragments belonging to the same major fragment. Each of these fragments
 * has a PartitionSender instance which is sending data to 300 receivers. Each PartitionSender needs 300 buffers,
 * so total of 10*300 buffers are needed. With MuxExchange, all 10 fragments send the data directly (without
 * partitioning) to MuxExchange which uses the PartitionSender to partition the incoming data and distribute
 * to receivers. MuxExchange has only one instance per Drillbit per major fragment which means only one instance of
 * PartitionSender per Drillbit per major fragment. With MuxExchange total number of buffers used by PartitionSender
 * for the 10 fragments is 300 instead of earlier number 10*300.
 */
public abstract class AbstractMuxExchange extends AbstractExchange {

  // Ephemeral info used when creating execution fragments.
  protected Map senderToReceiverMapping;
  protected ArrayListMultimap receiverToSenderMapping;
  private boolean isSenderReceiverMappingCreated;

  public AbstractMuxExchange(@JsonProperty("child") PhysicalOperator child) {
    super(child);
  }

  @Override
  public ParallelizationInfo getReceiverParallelizationInfo(List senderFragmentEndpoints) {
    Preconditions.checkArgument(senderFragmentEndpoints != null && senderFragmentEndpoints.size() > 0,
        "Sender fragment endpoint list should not be empty");

    // We want to run one mux receiver per Drillbit endpoint.
    // Identify the number of unique Drillbit endpoints in sender fragment endpoints.
    List drillbitEndpoints = ImmutableSet.copyOf(senderFragmentEndpoints).asList();

    List affinities = Lists.newArrayList();
    for(DrillbitEndpoint ep : drillbitEndpoints) {
      affinities.add(new EndpointAffinity(ep, Double.POSITIVE_INFINITY));
    }

    return ParallelizationInfo.create(affinities.size(), affinities.size(), affinities);
  }

  @Override
  public Sender getSender(int minorFragmentId, PhysicalOperator child) {
    createSenderReceiverMapping();

    MinorFragmentEndpoint receiver = senderToReceiverMapping.get(minorFragmentId);
    if (receiver == null) {
      throw new IllegalStateException(String.format("Failed to find receiver for sender [%d]", minorFragmentId));
    }

    return new SingleSender(receiverMajorFragmentId, receiver.getId(), child, receiver.getEndpoint());
  }

  protected void createSenderReceiverMapping() {
    if (isSenderReceiverMappingCreated) {
      return;
    }

    senderToReceiverMapping = Maps.newHashMap();
    receiverToSenderMapping = ArrayListMultimap.create();

    // Find the list of sender fragment ids assigned to each Drillbit endpoint.
    ArrayListMultimap endpointSenderList = ArrayListMultimap.create();

    int senderFragmentId = 0;
    for(DrillbitEndpoint senderLocation : senderLocations) {
      endpointSenderList.put(senderLocation, senderFragmentId);
      senderFragmentId++;
    }

    int receiverFragmentId = 0;
    for(DrillbitEndpoint receiverLocation : receiverLocations) {
      List senderFragmentIds = endpointSenderList.get(receiverLocation);

      for(Integer senderId : senderFragmentIds) {
        senderToReceiverMapping.put(senderId, new MinorFragmentEndpoint(receiverFragmentId, receiverLocation));

        receiverToSenderMapping.put(receiverFragmentId,
            new MinorFragmentEndpoint(senderId, senderLocations.get(senderId)));
      }
      receiverFragmentId++;
    }

    isSenderReceiverMappingCreated = true;
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy