All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.twitter.cassovary.util.Sampling.scala Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2014 Twitter, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
 * file except in compliance with the License. You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed
 * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
 * CONDITIONS OF ANY KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations under the License.
 */
package com.twitter.cassovary.util

import it.unimi.dsi.fastutil.ints.IntOpenHashSet
import scala.collection.JavaConversions._
import scala.util.Random

object Sampling {
  /**
   * O(size) time algorithm for random subset of a given range known as
   * Fischer-Yates shuffle.
   *
   * If ```size > from.size``` returns all elements.
   *
   * Be aware, that this method changes the ```from``` Array.
   */
  def randomSubset[@specialized(Int) A](size: Int, from: Array[A], rng: Random): Array[A] = {
    if (size > from.size) {
      from
    } else {
      (0 until size).foreach {
        i =>
          val swapIndex = rng.nextInt(from.size - i) + i
          val temp = from(i)
          from(i) = from(swapIndex)
          from(swapIndex) = temp
      }
      from.slice(0, size)
    }
  }

  /**
   * Expected ```O(size)``` time algorithm for random subset of a given range.
   *
   * If ```size > from.size``` returns all elements.
   */
  def randomSubset(elements: Int, range: Range, rng: Random): Array[Int] = {
    if (elements >= range.size) {
      range.toArray
    } else {
      if (elements > range.size / 2) {
        val complement = new IntOpenHashSet(randomSubset(range.size - elements, range, rng).toIterator)
        range.filterNot(complement.contains).toArray
      } else {
        val result = new IntOpenHashSet()
        while (result.size < elements) {
          val randomElement = range(rng.nextInt(range.size))
          result.add(randomElement)
        }
        result.toIntArray
      }
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy