All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.tencent.angel.spark.ml.psf.embedding.NEModelRandomize.scala Maven / Gradle / Ivy

/*
 * Tencent is pleased to support the open source community by making Angel available.
 *
 * Copyright (C) 2017-2018 THL A29 Limited, a Tencent company. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in 
 * compliance with the License. You may obtain a copy of the License at
 *
 * https://opensource.org/licenses/Apache-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License
 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 * or implied. See the License for the specific language governing permissions and limitations under
 * the License.
 *
 */


package com.tencent.angel.spark.ml.psf.embedding

import scala.collection.JavaConversions._
import scala.util.Random
import io.netty.buffer.ByteBuf
import com.tencent.angel.PartitionKey
import com.tencent.angel.ml.math2.storage.IntFloatDenseVectorStorage
import com.tencent.angel.ml.matrix.psf.update.base.{PartitionUpdateParam, UpdateFunc, UpdateParam}
import com.tencent.angel.ps.storage.matrix.ServerPartition
import com.tencent.angel.psagent.PSAgentContext
import com.tencent.angel.spark.ml.psf.embedding.NEModelRandomize.{RandomizePartitionUpdateParam, RandomizeUpdateParam}

/**
  * initialize ps matrix with Uniform distribution U(-1/(2*dim), 1/(2*dim))
  */
class NEModelRandomize(param: RandomizeUpdateParam) extends UpdateFunc(param) {
  def this(matrixId: Int, partDim: Int, dimension: Int, order: Int, seed: Int) =
    this(new RandomizeUpdateParam(matrixId, partDim, dimension, order, seed))

  def this() = this(null)

  override def partitionUpdate(partParam: PartitionUpdateParam): Unit = {
    val part = psContext.getMatrixStorageManager.getPart(partParam.getMatrixId, partParam.getPartKey.getPartitionId)
    if (part != null) {
      val ff = partParam.asInstanceOf[RandomizePartitionUpdateParam]
      update(part, partParam.getPartKey, ff.partDim, ff.dim, ff.order, ff.seed)
    }
  }

  private def update(part: ServerPartition, key: PartitionKey, partDim: Int, dim: Int, order: Int, seed: Int): Unit = {
    val startRow = key.getStartRow
    val endRow = key.getEndRow
    val rand = new Random(seed)
    (startRow until endRow).map(rowId =>
      (rowId, rand.nextInt)
    ).par.foreach { case (rowId, rowSeed) =>
      val rowRandom = new Random(rowSeed)
      val data = part.getRow(rowId).getSplit.getStorage.asInstanceOf[IntFloatDenseVectorStorage].getValues
      if (order == 1)
        data.indices.foreach(data(_) = (rowRandom.nextFloat() - 0.5f) / dim)
      else {
        val nodeOccupied = 2 * partDim
        data.indices.foreach(i =>
          data(i) = if (i % nodeOccupied < partDim) (rowRandom.nextFloat() - 0.5f) / dim else 0.0f)
//          data(i) = if (i % nodeOccupied < partDim) 0.01f else 0.0f)
      }
    }
  }
}

object NEModelRandomize {

  class RandomizePartitionUpdateParam(matrixId: Int,
                                      partKey: PartitionKey,
                                      var partDim: Int,
                                      var dim: Int,
                                      var order: Int,
                                      var seed: Int)
    extends PartitionUpdateParam(matrixId, partKey) {
    def this() = this(-1, null, -1, -1, -1, -1)

    override def serialize(buf: ByteBuf): Unit = {
      super.serialize(buf)
      buf.writeInt(partDim)
      buf.writeInt(dim)
      buf.writeInt(order)
      buf.writeInt(seed)
    }

    override def deserialize(buf: ByteBuf): Unit = {
      super.deserialize(buf)
      this.partDim = buf.readInt()
      this.dim = buf.readInt()
      this.order = buf.readInt()
      this.seed = buf.readInt()
    }

    override def bufferLen: Int = super.bufferLen + 16
  }


  class RandomizeUpdateParam(matrixId: Int, partDim: Int, dim: Int, order: Int, seed: Int)
    extends UpdateParam(matrixId) {
    override def split: java.util.List[PartitionUpdateParam] = {
      PSAgentContext.get.getMatrixMetaManager.getPartitions(matrixId).map { part =>
        new RandomizePartitionUpdateParam(matrixId, part, partDim, dim, order, seed + part.getPartitionId)
      }
    }
  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy