All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.signalcollect.triplerush.mapper.DistributedTripleMapper.scala Maven / Gradle / Ivy

The newest version!
/*
 *  @author Philip Stutz
 *  @author Mihaela Verman
 *
 *  Copyright 2013 University of Zurich
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *         http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 *
 */

package com.signalcollect.triplerush.mapper

import com.signalcollect.interfaces.VertexToWorkerMapper
import com.signalcollect.interfaces.MapperFactory
import com.signalcollect.triplerush.EfficientIndexPattern._
import scala.util.hashing.MurmurHash3._

class DistributedTripleMapper(val numberOfNodes: Int, val workersPerNode: Int) extends VertexToWorkerMapper[Long] {
  val numberOfWorkers = numberOfNodes * workersPerNode

  def getWorkerIdForVertexId(vertexId: Long): Int = {
    val first = vertexId.extractFirst
    val second = vertexId.extractSecond
    if (first < 0) {
      if (second < 0) {
        // It's a query id, map to first node and load balance on the workers there.
        ((first + second) & Int.MaxValue) % workersPerNode
      } else {
        // First encodes a predicate, second encodes an object.
        if (second > 0) {
          // Object is not a wildcard and we use it for node assignment.
          val loadBalanceId = (first + second) & Int.MaxValue
          workerIdOptimized(nodeAssignmentId = second, workerAssignmentId = loadBalanceId)
        } else {
          // Everything but the predicate is a wildcard. We use the predicate for both node assignment and load balancing.
          val p = first & Int.MaxValue
          workerIdOptimized(nodeAssignmentId = p, workerAssignmentId = p)
        }
      }
    } else if (first > 0) {
      // First represents the subject and we use it for node assignment..
      val loadBalanceId = (first + second) & Int.MaxValue
      workerIdOptimized(nodeAssignmentId = first, workerAssignmentId = loadBalanceId)
    } else {
      // Subject is a wildcard, we use whatever is in second for node assignment and load balancing.
      val predicateOrObject = second & Int.MaxValue
      workerIdOptimized(nodeAssignmentId = predicateOrObject, workerAssignmentId = predicateOrObject)
    }
  }

  /**
   * Asserts that both nodeAssignmentId and nodeBalanceId
   * are larger than or equal to zero.
   */
  def workerIdOptimized(nodeAssignmentId: Int, workerAssignmentId: Int): Int = {
    val nodeId = nodeAssignmentId % numberOfNodes
    val workerOnNode = workerAssignmentId % workersPerNode
    nodeId * workersPerNode + workerOnNode
  }

  def getWorkerIdForVertexIdHash(vertexIdHash: Int): Int = throw new UnsupportedOperationException("This mapper does not support mapping by vertex hash.")
}

object DistributedTripleMapperFactory extends MapperFactory[Long] {
  def createInstance(numberOfNodes: Int, workersPerNode: Int) = new DistributedTripleMapper(numberOfNodes, workersPerNode)
  override def toString = "DistributedTripleMapperFactory"
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy