
com.signalcollect.triplerush.mapper.DistributedTripleMapper.scala Maven / Gradle / Ivy
The newest version!
/*
* @author Philip Stutz
* @author Mihaela Verman
*
* Copyright 2013 University of Zurich
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package com.signalcollect.triplerush.mapper
import com.signalcollect.interfaces.VertexToWorkerMapper
import com.signalcollect.interfaces.MapperFactory
import com.signalcollect.triplerush.EfficientIndexPattern._
import scala.util.hashing.MurmurHash3._
class DistributedTripleMapper(val numberOfNodes: Int, val workersPerNode: Int) extends VertexToWorkerMapper[Long] {
val numberOfWorkers = numberOfNodes * workersPerNode
def getWorkerIdForVertexId(vertexId: Long): Int = {
val first = vertexId.extractFirst
val second = vertexId.extractSecond
if (first < 0) {
if (second < 0) {
// It's a query id, map to first node and load balance on the workers there.
((first + second) & Int.MaxValue) % workersPerNode
} else {
// First encodes a predicate, second encodes an object.
if (second > 0) {
// Object is not a wildcard and we use it for node assignment.
val loadBalanceId = (first + second) & Int.MaxValue
workerIdOptimized(nodeAssignmentId = second, workerAssignmentId = loadBalanceId)
} else {
// Everything but the predicate is a wildcard. We use the predicate for both node assignment and load balancing.
val p = first & Int.MaxValue
workerIdOptimized(nodeAssignmentId = p, workerAssignmentId = p)
}
}
} else if (first > 0) {
// First represents the subject and we use it for node assignment..
val loadBalanceId = (first + second) & Int.MaxValue
workerIdOptimized(nodeAssignmentId = first, workerAssignmentId = loadBalanceId)
} else {
// Subject is a wildcard, we use whatever is in second for node assignment and load balancing.
val predicateOrObject = second & Int.MaxValue
workerIdOptimized(nodeAssignmentId = predicateOrObject, workerAssignmentId = predicateOrObject)
}
}
/**
* Asserts that both nodeAssignmentId and nodeBalanceId
* are larger than or equal to zero.
*/
def workerIdOptimized(nodeAssignmentId: Int, workerAssignmentId: Int): Int = {
val nodeId = nodeAssignmentId % numberOfNodes
val workerOnNode = workerAssignmentId % workersPerNode
nodeId * workersPerNode + workerOnNode
}
def getWorkerIdForVertexIdHash(vertexIdHash: Int): Int = throw new UnsupportedOperationException("This mapper does not support mapping by vertex hash.")
}
object DistributedTripleMapperFactory extends MapperFactory[Long] {
def createInstance(numberOfNodes: Int, workersPerNode: Int) = new DistributedTripleMapper(numberOfNodes, workersPerNode)
override def toString = "DistributedTripleMapperFactory"
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy