com.simiacryptus.mindseye.layers.cudnn.LRNLayer Maven / Gradle / Ivy

Go to download
/*
 * Copyright (c) 2019 by Andrew Charneski.
 *
 * The author licenses this file to you under the
 * Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance
 * with the License.  You may obtain a copy
 * of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package com.simiacryptus.mindseye.layers.cudnn;

import com.google.gson.JsonObject;
import com.google.gson.JsonPrimitive;
import com.simiacryptus.mindseye.lang.*;
import com.simiacryptus.mindseye.lang.cudnn.*;
import com.simiacryptus.ref.lang.RefUtil;
import com.simiacryptus.ref.wrappers.RefArrays;
import com.simiacryptus.ref.wrappers.RefFunction;
import com.simiacryptus.ref.wrappers.RefList;
import jcuda.jcudnn.cudnnLRNDescriptor;
import jcuda.jcudnn.cudnnLRNMode;
import org.jetbrains.annotations.NotNull;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import java.util.Map;
import java.util.UUID;

@SuppressWarnings("serial")
public class LRNLayer extends LayerBase implements MultiPrecision {
  private static final Logger log = LoggerFactory.getLogger(LRNLayer.class);

  private int width;
  private double alpha;
  private double beta;
  private double k;
  private Precision precision = CudaSettings.INSTANCE().getDefaultPrecision();

  private LRNLayer() {
  }

  public LRNLayer(int width) {
    this(width, 1e-4, 0.75, 2.0);
  }

  public LRNLayer(int width, double alpha, double beta, double k) {
    this.setWidth(width);
    setAlpha(alpha);
    setBeta(beta);
    setK(k);
  }

  protected LRNLayer(@Nonnull final JsonObject json) {
    super(json);
    setWidth(json.get("width").getAsInt());
    setAlpha(json.get("alpha").getAsDouble());
    setBeta(json.get("beta").getAsDouble());
    setK(json.get("k").getAsDouble());
    JsonPrimitive precision = json.getAsJsonPrimitive("precision");
    if (null != precision) {
      setPrecision(Precision.valueOf(precision.getAsString()));
    } else {
      setPrecision(CudaSettings.INSTANCE().getDefaultPrecision());
    }
    assert 0 < getWidth();
    assert 0 < getAlpha();
  }

  public double getAlpha() {
    return alpha;
  }

  public void setAlpha(double alpha) {
    this.alpha = alpha;
  }

  public double getBeta() {
    return beta;
  }

  public void setBeta(double beta) {
    this.beta = beta;
  }

  @Nonnull
  public Layer getCompatibilityLayer() {
    return null;
  }

  public double getK() {
    return k;
  }

  public void setK(double k) {
    this.k = k;
  }

  @Override
  public Precision getPrecision() {
    return precision;
  }

  @Override
  public void setPrecision(final Precision precision) {
    this.precision = precision;
  }

  public int getWidth() {
    return width;
  }

  public void setWidth(int width) {
    this.width = width;
  }

  @Nonnull
  @SuppressWarnings("unused")
  public static LRNLayer fromJson(@Nonnull final JsonObject json, Map rs) {
    return new LRNLayer(json);
  }


  @Nullable
  @Override
  public Result eval(@Nonnull final Result... inObj) {
    if (!CudaSystem.isEnabled()) {
      Layer compatibilityLayer = getCompatibilityLayer();
      Result result = compatibilityLayer.eval(inObj);
      compatibilityLayer.freeRef();
      return result;
    }
    final Result input = inObj[0].addRef();
    RefUtil.freeRef(inObj);
    final TensorList inputData = input.getData();
    @Nonnull final int[] inputSize = inputData.getDimensions();
    final int length = inputData.length();
    final int inputDims = Tensor.length(inputSize);
    @Nonnull final int[] outputSize = new int[]{length, inputSize[2], inputSize[1], inputSize[0]};
    final CudaTensor outputData = fwd(inputData.addRef(), outputSize);
    assert getPrecision() != null;
    boolean isAlive = input.isAlive();
    Result.Accumulator accumulator = new Accumulator(outputData.addRef(), inputData, length, inputSize, inputDims, LRNLayer.this.getPrecision(), getWidth(), getAlpha(), getBeta(), getK(), input.getAccumulator(), input.isAlive());
    input.freeRef();
    CudaTensorList data = new CudaTensorList(outputData, length, new int[]{outputSize[3], outputSize[2], outputSize[1]}, getPrecision());
    return new Result(data, accumulator, isAlive || !isFrozen());
  }

  @Nonnull
  @Override
  public JsonObject getJson(Map resources, DataSerializer dataSerializer) {
    @Nonnull final JsonObject json = super.getJsonStub();
    json.addProperty("alpha", getAlpha());
    json.addProperty("beta", getBeta());
    json.addProperty("k", getK());
    json.addProperty("width", getWidth());
    assert getPrecision() != null;
    json.addProperty("precision", getPrecision().name());
    return json;
  }

  @Nonnull
  @Override
  public RefList state() {
    return RefArrays.asList();
  }

  public @SuppressWarnings("unused")
  void _free() {
    super._free();
  }

  @Nonnull
  public @Override
  @SuppressWarnings("unused")
  LRNLayer addRef() {
    return (LRNLayer) super.addRef();
  }

  @NotNull
  private CudaTensor fwd(TensorList inputData, int[] outputSize) {
    return CudaSystem.run(RefUtil.wrapInterface((RefFunction) gpu -> {
      try {
        gpu.initThread();
        @Nonnull final CudaResource descriptor = gpu.createLRNDescriptor(getWidth(),
            getAlpha(), getBeta(), getK());
        assert getPrecision() != null;
        @Nullable final CudaTensor inputTensor = gpu.getTensor(inputData.addRef(), getPrecision(),
            MemoryType.Device, false);
        final CudaDevice.CudaTensorDescriptor outputDescriptor = gpu.newTensorDescriptor(getPrecision(), outputSize[0],
            outputSize[1], outputSize[2], outputSize[3], outputSize[1] * outputSize[2] * outputSize[3],
            outputSize[2] * outputSize[3], outputSize[3], 1);
        @Nonnull final CudaMemory outputTensor = gpu.allocate((long) getPrecision().size * Tensor.length(outputSize),
            MemoryType.Managed.ifEnabled(), true);
        CudaMemory inputDataMemory = inputTensor.getMemory(gpu.addRef());
        assert inputDataMemory != null;
        CudaSystem
            .handle(gpu.cudnnLRNCrossChannelForward(descriptor.getPtr(), cudnnLRNMode.CUDNN_LRN_CROSS_CHANNEL_DIM1,
                getPrecision().getPointer(1.0), inputTensor.descriptor.getPtr(), inputDataMemory.getPtr(),
                getPrecision().getPointer(0.0), outputDescriptor.getPtr(), outputTensor.getPtr()));
        inputTensor.freeRef();
        descriptor.freeRef();
        assert CudaDevice.isThreadDeviceId(gpu.getDeviceId());
        gpu.freeRef();
        inputDataMemory.dirty();
        inputDataMemory.freeRef();
        outputTensor.dirty();
        return new CudaTensor(outputTensor, outputDescriptor, getPrecision());
      } catch (@Nonnull final Throwable e) {
        throw new ComponentException("Error", e);
      }
    }, inputData.addRef(), addRef()), inputData);
  }

  private static class Accumulator extends Result.Accumulator {

    private final CudaTensor outputData;
    private final TensorList inputData;
    private final int length;
    private final int[] inputSize;
    private final int inputDims;
    private Precision precision;
    private int width;
    private double alpha;
    private double beta;
    private double k;
    private Result.Accumulator accumulator;
    private boolean alive;

    public Accumulator(CudaTensor outputData, TensorList inputData, int length, int[] inputSize, int inputDims, Precision precision, int width, double alpha, double beta, double k, Result.Accumulator accumulator, boolean alive) {
      this.outputData = outputData;
      this.inputData = inputData;
      this.length = length;
      this.inputSize = inputSize;
      this.inputDims = inputDims;
      this.precision = precision;
      this.width = width;
      this.alpha = alpha;
      this.beta = beta;
      this.k = k;
      this.accumulator = accumulator;
      this.alive = alive;
    }

    @Override
    public void accept(@Nullable DeltaSet buffer, @Nonnull TensorList error) {
      assert error.length() == inputData.length();
      if (alive) {
        TensorList data = CudaSystem
            .run(RefUtil.wrapInterface((RefFunction) gpu -> {
                  final CudaDevice.CudaTensorDescriptor passbackDescriptor = gpu.newTensorDescriptor(
                      precision, length, inputSize[2], inputSize[1], inputSize[0],
                      inputSize[2] * inputSize[1] * inputSize[0], inputSize[1] * inputSize[0], inputSize[0],
                      1);
                  @Nonnull final CudaResource descriptor = gpu.createLRNDescriptor(
                      width, alpha, beta, k);
                  @Nullable final CudaTensor inputTensor = gpu.getTensor(inputData.addRef(),
                      precision, MemoryType.Device, true);
                  @Nullable CudaTensor errorPtr = gpu.getTensor(error.addRef(),
                      precision, MemoryType.Device, true);
                  @Nonnull final CudaMemory passbackBuffer = gpu.allocate(
                      (long) inputDims * precision.size * length,
                      MemoryType.Managed.ifEnabled(), true);
                  assert outputData != null;
                  CudaMemory outputDataMemory = outputData.getMemory(gpu.addRef());
                  CudaMemory errorPtrMemory = errorPtr.getMemory(gpu.addRef());
                  CudaMemory inputDataMemory = inputTensor.getMemory(gpu.addRef());
                  assert inputDataMemory != null;
                  assert errorPtrMemory != null;
                  assert outputDataMemory != null;
                  CudaSystem.handle(gpu.cudnnLRNCrossChannelBackward(descriptor.getPtr(),
                      cudnnLRNMode.CUDNN_LRN_CROSS_CHANNEL_DIM1, precision.getPointer(1.0),
                      outputData.descriptor.getPtr(), outputDataMemory.getPtr(), errorPtr.descriptor.getPtr(),
                      errorPtrMemory.getPtr(), inputTensor.descriptor.getPtr(), inputDataMemory.getPtr(),
                      precision.getPointer(0.0), passbackDescriptor.getPtr(),
                      passbackBuffer.getPtr()));
                  gpu.freeRef();
                  errorPtr.freeRef();
                  inputTensor.freeRef();
                  descriptor.freeRef();
                  outputDataMemory.dirty();
                  outputDataMemory.freeRef();
                  errorPtrMemory.dirty();
                  errorPtrMemory.freeRef();
                  inputDataMemory.dirty();
                  inputDataMemory.freeRef();
                  passbackBuffer.dirty();
                  return new CudaTensorList(
                      new CudaTensor(passbackBuffer,
                          passbackDescriptor,
                          precision),
                      length, inputSize, precision);
                }, outputData == null ? null : outputData.addRef(), error.addRef(),
                inputData.addRef()), error.addRef());
        DeltaSet buffer1 = buffer == null ? null : buffer.addRef();
        this.accumulator.accept(buffer1, data);
      }
      error.freeRef();
      if (null != buffer)
        buffer.freeRef();
    }

    public @SuppressWarnings("unused")
    void _free() {
      super._free();
      outputData.freeRef();
      accumulator.freeRef();
      inputData.freeRef();
    }
  }
}