Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
com.intel.analytics.bigdl.nn.TimeDistributed.scala Maven / Gradle / Ivy
/*
* Copyright 2016 The BigDL Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.intel.analytics.bigdl.nn
import com.intel.analytics.bigdl.Module
import com.intel.analytics.bigdl.nn.abstractnn.{AbstractModule, Activity, TensorModule}
import com.intel.analytics.bigdl.tensor.Tensor
import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric
import com.intel.analytics.bigdl.utils.Table
import com.intel.analytics.bigdl.utils.serializer.{DeserializeContext, ModuleSerializable}
import com.intel.analytics.bigdl.utils.serializer.converters.DataConverter
import scala.collection.mutable.ArrayBuffer
import scala.reflect.ClassTag
/**
* This layer is intended to apply contained layer to each temporal time slice
* of input tensor.
*
* For instance, The TimeDistributed Layer can feed each time slice of input tensor
* to the Linear layer.
*
* The input data format is [Batch, Time, Other dims]. For the contained layer, it must not change
* the Other dims length.
*
* @param maskZero: if `maskZero` is set to true, if the input including zero vectors, the
* corresponding output will be set to zero vecotrs.
* @tparam T data type, which can be [[Double]] or [[Float]]
*/
class TimeDistributed[T : ClassTag] (
val layer: AbstractModule[Tensor[T], Tensor[T], T],
maskZero: Boolean = false)
(implicit ev: TensorNumeric[T]) extends TensorModule[T] {
private var inputSize: Array[Int] = _
private var gradOutputSize: Array[Int] = _
private var outputSize: Array[Int] = _
private val timeBuffer =
new ArrayBuffer[(AbstractModule[_ <: Activity, _ <: Activity, T], Long, Long)]
private var maskBuffer: Tensor[T] = _
private var indexBuffer: Tensor[T] = _
private var inputBuffer: Tensor[T] = _
private def combine(src: Array[Int], target: Array[Int]): Unit = {
require(src.length == target.length + 1,
"TimeDistributed: combine method requires src.length == target.length + 1" +
s" Current src.length = ${src.length}" +
s" Current target.length = ${target.length}")
target(0) = src(0) * src(1)
var j = 1
while (j < target.length) {
target(j) = src(j + 1)
j += 1
}
}
private def split(src: Array[Int], target: Array[Int], dim1: Int, dim2: Int): Unit = {
require(src.length == target.length - 1,
"TimeDistributed: split method requires src.length == target.length - 1" +
s" Current src.length = ${src.length}" +
s" Current target.length = ${target.length}")
require(dim1 * dim2 == src(0),
"TimeDistributed: split method requires dim1 * dim2 == src(0), " +
s"Current dim1 = ${dim1}, dim2 = ${dim2}, src(0) = ${src(0)}")
target(0) = dim1
target(1) = dim2
var j = 1
while (j < src.length) {
target(j + 1) = src(j)
j += 1
}
}
override def updateOutput(input: Tensor[T]): Tensor[T] = {
require(input.dim >= 3,
"TimeDistributed: input should be at least a 3D Tensor, e.g [batch, time, inputDim]. " +
s"Current input.dim = ${input.dim}")
if (inputSize == null) {
inputSize = new Array[Int](input.size.length - 1)
}
if (outputSize == null) {
outputSize = new Array[Int](input.size.length)
}
/**
* combine: [B, T, D] => [B * T, D]
* split: [B * T, D] => [B, T, D]
*/
val _inputSize = input.size
combine(_inputSize, inputSize)
input.resize(inputSize)
val _output = layer.forward(input).toTensor[T]
split(_output.size, outputSize, _inputSize(0), _inputSize(1))
input.resize(_inputSize)
output.set(_output).resize(outputSize)
if (maskZero) {
if (maskBuffer == null) {
maskBuffer = Tensor()
}
if (indexBuffer == null) {
indexBuffer = Tensor()
}
if (inputBuffer == null) {
inputBuffer = Tensor()
}
inputBuffer.resizeAs(input).abs(input).max(maskBuffer, indexBuffer, 3)._1
for (i <- 1 to maskBuffer.size(1)) {
for (j <- 1 to maskBuffer.size(2)) {
if (maskBuffer(Array(i, j, 1)) == ev.zero) {
output.select(1, i).select(1, j).zero()
}
}
}
}
output
}
override def updateGradInput(input: Tensor[T], gradOutput: Tensor[T]): Tensor[T] = {
if (gradOutputSize == null) {
gradOutputSize = new Array[Int](gradOutput.size.length - 1)
}
val _inputSize = input.size
val _gradOutputSize = gradOutput.size
combine(_gradOutputSize, gradOutputSize)
input.resize(inputSize)
gradOutput.resize(gradOutputSize)
val _gradInput = layer.updateGradInput(input, gradOutput).toTensor[T]
gradInput.set(_gradInput).resize(_inputSize)
input.resize(_inputSize)
gradOutput.resize(_gradOutputSize)
gradInput
}
override def accGradParameters(input: Tensor[T], gradOutput: Tensor[T]): Unit = {
val _inputSize = input.size
val _gradOutputSize = gradOutput.size
input.resize(inputSize)
gradOutput.resize(gradOutputSize)
layer.accGradParameters(input, gradOutput)
input.resize(_inputSize)
gradOutput.resize(_gradOutputSize)
}
override def backward(input: Tensor[T], gradOutput: Tensor[T]): Tensor[T] = {
val st = System.nanoTime
if (gradOutputSize == null) {
gradOutputSize = new Array[Int](gradOutput.size.length - 1)
}
val _inputSize = input.size
val _gradOutputSize = gradOutput.size
combine(_gradOutputSize, gradOutputSize)
input.resize(inputSize)
gradOutput.resize(gradOutputSize)
val _gradInput = layer.backward(input, gradOutput).toTensor[T]
gradInput.set(_gradInput).resize(_inputSize)
input.resize(_inputSize)
gradOutput.resize(_gradOutputSize)
backwardTime += System.nanoTime - st
if (maskZero) {
for (i <- 1 to maskBuffer.size(1)) {
for (j <- 1 to maskBuffer.size(2)) {
if (maskBuffer(Array(i, j, 1)) == ev.zero) {
gradInput.select(1, i).select(1, j).zero()
}
}
}
}
gradInput
}
override def reset(): Unit = layer.reset()
override def training(): TimeDistributed.this.type = {
layer.training()
super.training()
}
/**
* get execution engine type
*/
override def checkEngineType(): TimeDistributed.this.type = {
layer.checkEngineType()
super.checkEngineType()
}
override def resetTimes(): Unit = {
layer.resetTimes()
this.forwardTime = 0
this.backwardTime = 0
}
override def getTimes(): Array[(AbstractModule[_ <: Activity, _ <: Activity, T], Long, Long)] = {
timeBuffer.clear
var modulesForwardTime = 0L
var modulesBackwardTime = 0L
layer.getTimes.foreach(x => {
timeBuffer.append(x)
modulesForwardTime += x._2
modulesBackwardTime += x._3
})
timeBuffer.append((this,
this.forwardTime - modulesForwardTime,
this.backwardTime - modulesBackwardTime))
timeBuffer.toArray
}
override def evaluate(): TimeDistributed.this.type = {
layer.evaluate()
super.evaluate()
}
/**
* This function returns two arrays. One for the weights and the other the gradients
* Custom modules should override this function if they have parameters
*
* @return (Array of weights, Array of grad)
*/
override def parameters(): (Array[Tensor[T]], Array[Tensor[T]]) = layer.parameters()
/**
* This method will return a table indicating the name and corresponding parameters.
* @return Table
*/
override def getParametersTable(): Table = layer.getParametersTable()
override def getExtraParameter(): Array[Tensor[T]] = {
layer.getExtraParameter()
}
override def clearState(): TimeDistributed.this.type = {
super.clearState()
layer.clearState()
inputSize = null
gradOutputSize = null
outputSize = null
timeBuffer.clear
maskBuffer = null
inputBuffer = null
indexBuffer = null
this
}
override def canEqual(other: Any): Boolean = other.isInstanceOf[TimeDistributed[T]]
override def equals(other: Any): Boolean = other match {
case that: TimeDistributed[T] =>
super.equals(that) &&
(that canEqual this) &&
layer.equals(layer) &&
inputSize == that.inputSize &&
gradOutputSize == that.gradOutputSize &&
outputSize == that.outputSize
case _ => false
}
override def hashCode(): Int = {
val state = Seq(super.hashCode(),
layer, inputSize, gradOutputSize, outputSize)
state.filter(_ != null).map(_.hashCode()).foldLeft(0)((a, b) => 31 * a + b)
}
override def toString(): String = s"${getPrintName}${layer}"
}
object TimeDistributed extends ModuleSerializable {
def apply[@specialized(Float, Double) T: ClassTag](
layer: AbstractModule[Tensor[T], Tensor[T], T],
maskZero: Boolean = false
)(implicit ev: TensorNumeric[T]): TimeDistributed[T] = {
new TimeDistributed[T](layer, maskZero)
}
// To make ti compatible with release 0.4
override def doLoadModule[T: ClassTag](context: DeserializeContext)
(implicit ev: TensorNumeric[T]) : AbstractModule[Activity, Activity, T] = {
val attrMap = context.bigdlModule.getAttrMap
val layerAttr = attrMap.get("layer")
val layer = DataConverter.getAttributeValue(context, layerAttr).
asInstanceOf[AbstractModule[Tensor[T], Tensor[T], T]]
var maskZero = false
if (attrMap.containsKey("maskZero")) {
maskZero = DataConverter.getAttributeValue(context, attrMap.get("maskZero")).
asInstanceOf[Boolean]
}
TimeDistributed(layer, maskZero)
}
}