com.simiacryptus.mindseye.layers.cudnn.ImgPaddingLayer Maven / Gradle / Ivy

Go to download
/*
 * Copyright (c) 2019 by Andrew Charneski.
 *
 * The author licenses this file to you under the
 * Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance
 * with the License.  You may obtain a copy
 * of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package com.simiacryptus.mindseye.layers.cudnn;

import com.google.gson.JsonObject;
import com.simiacryptus.mindseye.lang.*;
import com.simiacryptus.mindseye.lang.cudnn.*;
import com.simiacryptus.mindseye.layers.cudnn.ImgCropLayer.Alignment;
import com.simiacryptus.ref.lang.RefUtil;
import com.simiacryptus.ref.lang.ReferenceCountingBase;
import com.simiacryptus.ref.wrappers.RefArrays;
import com.simiacryptus.ref.wrappers.RefFunction;
import com.simiacryptus.ref.wrappers.RefList;
import com.simiacryptus.ref.wrappers.RefString;
import org.jetbrains.annotations.NotNull;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import java.util.Map;
import java.util.UUID;

@SuppressWarnings("serial")
public class ImgPaddingLayer extends LayerBase implements MultiPrecision {
  private static final Logger log = LoggerFactory.getLogger(ImgPaddingLayer.class);
  private ImgCropLayer.Alignment verticalAlign = Alignment.Center;
  private Alignment horizontalAlign = Alignment.Center;
  private boolean roundUp = false;
  private int sizeX;
  private int sizeY; // SpatialReflectionPadding
  private Precision precision = CudaSettings.INSTANCE().getDefaultPrecision();

  private ImgPaddingLayer() {
  }

  public ImgPaddingLayer(int sizeX, int sizeY) {
    this.sizeX = sizeX;
    this.sizeY = sizeY;
    assert 0 < sizeX;
    assert 0 < sizeY;
  }

  protected ImgPaddingLayer(@Nonnull final JsonObject json) {
    super(json);
    sizeX = json.get("sizeX").getAsInt();
    sizeY = json.get("sizeY").getAsInt();
    roundUp = json.get("roundUp").getAsBoolean();
    setVerticalAlign(Alignment.valueOf(json.get("verticalAlign").getAsString()));
    setHorizontalAlign(Alignment.valueOf(json.get("horizontalAlign").getAsString()));
    this.precision = Precision.valueOf(json.getAsJsonPrimitive("precision").getAsString());
    assert 0 < sizeX;
    assert 0 < sizeY;
  }

  @Nonnull
  public Layer getCompatibilityLayer() {
    return this.as(com.simiacryptus.mindseye.layers.java.ImgCropLayer.class);
  }

  public Alignment getHorizontalAlign() {
    return horizontalAlign;
  }

  public void setHorizontalAlign(Alignment horizontalAlign) {
    this.horizontalAlign = horizontalAlign;
  }

  @Override
  public Precision getPrecision() {
    return precision;
  }

  @Override
  public void setPrecision(final Precision precision) {
    this.precision = precision;
  }

  public Alignment getVerticalAlign() {
    return verticalAlign;
  }

  public void setVerticalAlign(Alignment verticalAlign) {
    this.verticalAlign = verticalAlign;
  }

  public boolean isRoundUp() {
    return roundUp;
  }

  public void setRoundUp(boolean roundUp) {
    this.roundUp = roundUp;
  }

  @Nonnull
  @SuppressWarnings("unused")
  public static ImgPaddingLayer fromJson(@Nonnull final JsonObject json, Map rs) {
    return new ImgPaddingLayer(json);
  }

  public static void add(@Nonnull CudnnHandle gpu, @Nullable CudaTensor input, @Nonnull int[] input_dimensions, @Nonnull int[] output_dimensions,
                         @Nonnull int[] offset, int length, @Nonnull Precision precision, @Nullable CudaMemory output_memory) {
    CopyParams copyParams = getCopyParams(gpu, input, input_dimensions,
        output_dimensions, offset, length, precision, output_memory);
    if (null == copyParams) {
      return;
    }
    assert copyParams.input_view_descriptor != null;
    if (0 >= copyParams.input_view_descriptor.width) {
      copyParams.freeRef();
      return;
    }
    if (0 >= copyParams.input_view_descriptor.height) {
      copyParams.freeRef();
      return;
    }
    copyParams.add();
    copyParams.freeRef();
  }

  public static void set(@Nonnull CudnnHandle gpu, @Nullable CudaTensor input, @Nonnull int[] input_dimensions, @Nonnull int[] output_dimensions,
                         @Nonnull int[] offset, int length, @Nonnull Precision precision, @Nullable CudaMemory output_memory) {
    CopyParams copyParams = getCopyParams(gpu, input, input_dimensions,
        output_dimensions, offset, length, precision, output_memory);
    if (null == copyParams) {
      return;
    }
    assert copyParams.input_view_descriptor != null;
    if (0 >= copyParams.input_view_descriptor.width) {
      copyParams.freeRef();
      return;
    }
    if (0 >= copyParams.input_view_descriptor.height) {
      copyParams.freeRef();
      return;
    }
    copyParams.set();
    copyParams.freeRef();
  }

  @Nullable
  public static CopyParams getCopyParams(@Nonnull CudnnHandle gpu, @Nullable CudaTensor input, int[] input_dimensions,
                                         int[] output_dimensions, int[] offset, int length, @Nonnull Precision precision, @Nullable CudaMemory output_memory) {

    int offset_left = offset[0];
    int offset_top = offset[1];

    int input_offset = 0;
    int output_offset = 0;

    int input_channels = input_dimensions[2];
    int input_height = input_dimensions[1];
    int input_width = input_dimensions[0];

    int output_channels = output_dimensions[2];
    int output_height = output_dimensions[1];
    int output_width = output_dimensions[0];

    int view_channels = Math.min(input_channels, output_channels);
    if (input_channels != output_channels) {
      if (null != input)
        input.freeRef();
      if (null != output_memory)
        output_memory.freeRef();
      gpu.freeRef();
      throw new IllegalArgumentException(RefString.format("%d != %d", input_channels, output_channels));
    }

    assert input != null;
    int input_wStride = input.descriptor.wStride;
    if (input_width < 0) {
      input_width *= -1;
      input_offset += input_wStride * (input_width - 1);
      input_wStride *= -1;
    }
    int output_wStride = 1;
    if (output_width < 0) {
      output_width *= -1;
      output_offset += output_wStride * (output_width - 1);
      output_wStride *= -1;
    }
    int view_width;
    if (offset_left <= 0) {
      offset_left *= -1;
      view_width = Math.min(input_width - offset_left, output_width);
      input_offset += input_wStride * offset_left;
    } else {
      view_width = Math.min(input_width, output_width - offset_left);
      output_offset += output_wStride * offset_left;
    }
    if (view_width <= 0) {
      input.freeRef();
      if (null != output_memory)
        output_memory.freeRef();
      gpu.freeRef();
      return null;
    }

    int input_hStride = input.descriptor.hStride;
    if (input_height < 0) {
      input_height *= -1;
      input_offset += input_hStride * (input_height - 1);
      input_hStride *= -1;
    }
    int output_hStride = output_width;
    if (output_height < 0) {
      output_height *= -1;
      output_offset += output_hStride * (output_height - 1);
      output_hStride *= -1;
    }
    int view_height;
    if (offset_top <= 0) {
      offset_top *= -1;
      view_height = Math.min(input_height - offset_top, output_height);
      input_offset += input_hStride * offset_top;
    } else {
      view_height = Math.min(input_height, output_height - offset_top);
      output_offset += output_hStride * offset_top;
    }
    if (view_height <= 0) {
      input.freeRef();
      if (null != output_memory)
        output_memory.freeRef();
      gpu.freeRef();
      return null;
    }
    assert input_offset >= 0 : input_offset;
    assert output_offset >= 0 : output_offset;
    ImgPaddingLayer.CopyParams copyParams = new CopyParams(gpu.addRef());
    copyParams.setLength(length);
    copyParams.setPrecision(precision);
    copyParams.setOutput_memory(output_memory);
    copyParams.setInput_memory(input.getMemory(gpu.addRef()));
    copyParams.setInput_offset(input_offset);
    copyParams.setOutput_offset(output_offset);
    copyParams.setInput_view_descriptor(gpu.newTensorDescriptor(precision, length, view_channels, view_height, view_width,
        input.descriptor.nStride, input.descriptor.cStride, input_hStride, input_wStride));
    copyParams.setOutput_view_descriptor(gpu.newTensorDescriptor(precision, length, view_channels, view_height, view_width, //
        output_channels * output_height * output_width, //
        output_height * output_width, //
        output_hStride, //
        output_wStride));
    gpu.freeRef();
    input.freeRef();
    return copyParams;
  }

  public static int half(int i, Alignment alignment, boolean roundUp) {
    if (alignment == Alignment.Left)
      return 0;
    if (alignment == Alignment.Right)
      return i;
    if (i % 2 == 0)
      return i / 2;
    else if (roundUp)
      return (i + 1) / 2;
    else
      return (i - 1) / 2;
  }

  @Nullable
  public static CudaTensor copy_condense(@Nonnull CudnnHandle gpu, @Nullable CudaTensor inputTensor, @Nonnull int[] dimIn, @Nonnull int[] dimOut, int length,
                                         boolean dirty, Precision precision, Alignment horizontalAlign, Alignment verticalAlign, boolean roundUp) {
    if (3 != dimIn.length) {
      if (null != inputTensor)
        inputTensor.freeRef();
      gpu.freeRef();
      throw new IllegalArgumentException("dimOut.length");
    }
    if (3 != dimOut.length) {
      if (null != inputTensor)
        inputTensor.freeRef();
      gpu.freeRef();
      throw new IllegalArgumentException("dimIn.length");
    }
    int offset_left = half(dimOut[0] - dimIn[0], horizontalAlign, roundUp);
    int offset_top = half(dimOut[1] - dimIn[1], verticalAlign, roundUp);
    if (RefArrays.equals(dimIn, dimOut) && offset_left == 0 && offset_top == 0) {
      gpu.freeRef();
      return inputTensor;
    } else {
      CudaMemory output_memory = gpu.allocate((long) length * Tensor.length(dimOut) * precision.size, MemoryType.Device,
          dirty);
      set(gpu.addRef(), inputTensor == null ? null : inputTensor.addRef(), dimIn, dimOut, new int[]{offset_left, offset_top},
          length, precision, output_memory.addRef());
      add(gpu.addRef(), inputTensor == null ? null : inputTensor.addRef(), dimIn, new int[]{-dimOut[0], dimOut[1], dimOut[2]},
          new int[]{offset_left - dimOut[0], offset_top}, length, precision,
          output_memory.addRef());
      add(gpu.addRef(), inputTensor == null ? null : inputTensor.addRef(), dimIn, new int[]{-dimOut[0], dimOut[1], dimOut[2]},
          new int[]{offset_left + dimOut[0], offset_top}, length, precision,
          output_memory.addRef());
      add(gpu.addRef(), inputTensor == null ? null : inputTensor.addRef(), dimIn,
          new int[]{-dimOut[0], -dimOut[1], dimOut[2]},
          new int[]{offset_left + dimOut[0], offset_top + dimOut[1]}, length, precision,
          output_memory.addRef());
      add(gpu.addRef(), inputTensor == null ? null : inputTensor.addRef(), dimIn,
          new int[]{-dimOut[0], -dimOut[1], dimOut[2]},
          new int[]{offset_left + dimOut[0], offset_top - dimOut[1]}, length, precision,
          output_memory.addRef());
      add(gpu.addRef(), inputTensor == null ? null : inputTensor.addRef(), dimIn,
          new int[]{-dimOut[0], -dimOut[1], dimOut[2]},
          new int[]{offset_left - dimOut[0], offset_top + dimOut[1]}, length, precision,
          output_memory.addRef());
      add(gpu.addRef(), inputTensor == null ? null : inputTensor.addRef(), dimIn,
          new int[]{-dimOut[0], -dimOut[1], dimOut[2]},
          new int[]{offset_left - dimOut[0], offset_top - dimOut[1]}, length, precision,
          output_memory.addRef());
      add(gpu.addRef(), inputTensor == null ? null : inputTensor.addRef(), dimIn, new int[]{dimOut[0], -dimOut[1], dimOut[2]},
          new int[]{offset_left, offset_top - dimOut[1]}, length, precision,
          output_memory.addRef());
      add(gpu.addRef(), inputTensor, dimIn, new int[]{dimOut[0], -dimOut[1], dimOut[2]},
          new int[]{offset_left, offset_top + dimOut[1]}, length, precision,
          output_memory.addRef());

      return new CudaTensor(output_memory,
          simpleDescriptor(length, dimOut, gpu, precision), precision);
    }
  }

  public static CudaDevice.CudaTensorDescriptor simpleDescriptor(int length, int[] dimOut, @Nonnull CudnnHandle gpu, Precision precision) {
    CudaDevice.CudaTensorDescriptor tensorDescriptor = gpu.newTensorDescriptor(precision, //
        length, //
        dimOut[2], //
        dimOut[1], //
        dimOut[0], //
        dimOut[2] * dimOut[1] * dimOut[0], //
        dimOut[1] * dimOut[0], //
        dimOut[0], //
        1);
    gpu.freeRef();
    return tensorDescriptor;
  }

  @Nullable
  @Override
  public Result eval(@Nonnull final Result... inObj) {
    if (!CudaSystem.isEnabled()) {
      Layer compatibilityLayer = getCompatibilityLayer();
      Result result = compatibilityLayer.eval(inObj);
      compatibilityLayer.freeRef();
      return result;
    }
    assert 1 == inObj.length;
    final Result input = inObj[0].addRef();
    final TensorList inputData = input.getData();
    assert 3 == inputData.getDimensions().length;
    final int length = inputData.length();
    @Nonnull
    int[] dimIn = inputData.getDimensions();
    if (dimIn[0] == sizeX && dimIn[1] == sizeY) {
      inputData.freeRef();
      RefUtil.freeRef(inObj);
      return input;
    }
    @Nonnull final int[] dimOut = RefArrays.copyOf(dimIn, 3);
    dimOut[0] = sizeX;
    dimOut[1] = sizeY;
    final TensorList outputData = fwd(inputData, length, dimIn, dimOut);
    int[] output_dimensions = outputData.getDimensions();
    int output_length = outputData.length();
    Result.Accumulator accumulator = new Accumulator(output_dimensions, output_length, length, dimOut, dimIn, ImgPaddingLayer.this.precision, getHorizontalAlign(), getVerticalAlign(), isRoundUp(), input.getAccumulator(), input.isAlive());
    boolean isAlive = Result.anyAlive(inObj);
    input.freeRef();
    return new Result(outputData, accumulator, isAlive);
  }

  @Nullable
  public CudaTensor copy_expand(@Nonnull CudnnHandle gpu, @Nullable CudaTensor inputTensor, @Nonnull int[] dimIn, @Nonnull int[] dimOut, int length,
                                boolean dirty) {
    if (3 != dimOut.length) {
      if (null != inputTensor)
        inputTensor.freeRef();
      gpu.freeRef();
      throw new IllegalArgumentException("dimOut.length");
    }
    if (3 != dimIn.length) {
      if (null != inputTensor)
        inputTensor.freeRef();
      gpu.freeRef();
      throw new IllegalArgumentException("dimIn.length");
    }
    int offset_left = half(dimOut[0] - dimIn[0], getHorizontalAlign(), isRoundUp());
    int offset_top = half(dimOut[1] - dimIn[1], getVerticalAlign(), isRoundUp());
    if (RefArrays.equals(dimIn, dimOut) && offset_left == 0 && offset_top == 0) {
      gpu.freeRef();
      return inputTensor;
    } else {
      CudaMemory output_memory = gpu.allocate((long) length * Tensor.length(dimOut) * precision.size, MemoryType.Device,
          dirty);
      set(gpu.addRef(), inputTensor == null ? null : inputTensor.addRef(), dimIn, dimOut, new int[]{offset_left, offset_top},
          length, precision, output_memory.addRef());
      set(gpu.addRef(), inputTensor == null ? null : inputTensor.addRef(), new int[]{-dimIn[0], dimIn[1], dimIn[2]}, dimOut,
          new int[]{offset_left - dimIn[0], offset_top}, length, precision,
          output_memory.addRef());
      set(gpu.addRef(), inputTensor == null ? null : inputTensor.addRef(), new int[]{-dimIn[0], dimIn[1], dimIn[2]}, dimOut,
          new int[]{offset_left + dimIn[0], offset_top}, length, precision,
          output_memory.addRef());
      set(gpu.addRef(), inputTensor == null ? null : inputTensor.addRef(), new int[]{-dimIn[0], -dimIn[1], dimIn[2]}, dimOut,
          new int[]{offset_left + dimIn[0], offset_top + dimIn[1]}, length, precision,
          output_memory.addRef());
      set(gpu.addRef(), inputTensor == null ? null : inputTensor.addRef(), new int[]{-dimIn[0], -dimIn[1], dimIn[2]}, dimOut,
          new int[]{offset_left + dimIn[0], offset_top - dimIn[1]}, length, precision,
          output_memory.addRef());
      set(gpu.addRef(), inputTensor == null ? null : inputTensor.addRef(), new int[]{-dimIn[0], -dimIn[1], dimIn[2]}, dimOut,
          new int[]{offset_left - dimIn[0], offset_top + dimIn[1]}, length, precision,
          output_memory.addRef());
      set(gpu.addRef(), inputTensor == null ? null : inputTensor.addRef(), new int[]{-dimIn[0], -dimIn[1], dimIn[2]}, dimOut,
          new int[]{offset_left - dimIn[0], offset_top - dimIn[1]}, length, precision,
          output_memory.addRef());
      set(gpu.addRef(), inputTensor == null ? null : inputTensor.addRef(), new int[]{dimIn[0], -dimIn[1], dimIn[2]}, dimOut,
          new int[]{offset_left, offset_top - dimIn[1]}, length, precision,
          output_memory.addRef());
      set(gpu.addRef(), inputTensor, new int[]{dimIn[0], -dimIn[1], dimIn[2]}, dimOut,
          new int[]{offset_left, offset_top + dimIn[1]}, length, precision,
          output_memory.addRef());

      return new CudaTensor(output_memory,
          simpleDescriptor(length, dimOut, gpu, precision), precision);
    }
  }

  @Nonnull
  @Override
  public JsonObject getJson(Map resources, DataSerializer dataSerializer) {
    @Nonnull final JsonObject json = super.getJsonStub();
    json.addProperty("sizeY", sizeY);
    json.addProperty("sizeX", sizeX);
    json.addProperty("roundUp", roundUp);
    json.addProperty("horizontalAlign", getHorizontalAlign().toString());
    json.addProperty("verticalAlign", getVerticalAlign().toString());
    json.addProperty("precision", precision.name());
    return json;
  }

  @Nonnull
  @Override
  public RefList state() {
    return RefArrays.asList();
  }

  public @SuppressWarnings("unused")
  void _free() {
    super._free();
  }

  @Nonnull
  public @Override
  @SuppressWarnings("unused")
  ImgPaddingLayer addRef() {
    return (ImgPaddingLayer) super.addRef();
  }

  @NotNull
  private TensorList fwd(TensorList inputData, int length, int[] dimIn, int[] dimOut) {
    return CudaSystem.run(RefUtil.wrapInterface((RefFunction) gpu -> {
      @Nullable final CudaTensor inputTensor = gpu.getTensor(inputData.addRef(), precision,
          MemoryType.Device, false);
      //      boolean dirty = dimOut[0] <= dimIn[0] && dimOut[1] <= dimIn[1];
      boolean dirty = false;
      assert dimOut[0] > 0;
      assert dimOut[1] > 0;
      assert dimOut[2] > 0;
      if (3 != dimIn.length) {
        inputTensor.freeRef();
        gpu.freeRef();
        throw new IllegalArgumentException("inputDimensions.length");
      }
      if (3 != dimOut.length) {
        inputTensor.freeRef();
        gpu.freeRef();
        throw new IllegalArgumentException("dimOut.length");
      }
      //log.info(String.format("offset=%d,%d", offsetX, offsetY));
      return new CudaTensorList(
          copy_expand(gpu, inputTensor, dimIn, dimOut, length, false),
          length, dimOut, precision);
    }, inputData.addRef()), inputData);
  }

  private static class CopyParams extends ReferenceCountingBase {
    public final CudnnHandle gpu;
    public int length;
    public Precision precision;
    public int input_offset;
    public int output_offset;
    @Nullable
    public CudaMemory input_memory;
    public CudaDevice.CudaTensorDescriptor input_view_descriptor;
    @Nullable
    public CudaMemory output_memory;
    private CudaDevice.CudaTensorDescriptor output_view_descriptor;

    public CopyParams(CudnnHandle gpu) {
      this.gpu = gpu;
    }

    public void setInput_memory(@Nullable CudaMemory input_memory) {
      if (null != this.input_memory)
        this.input_memory.freeRef();
      this.input_memory = input_memory;
    }

    public void setInput_offset(int input_offset) {
      this.input_offset = input_offset;
    }

    public void setInput_view_descriptor(CudaDevice.CudaTensorDescriptor input_view_descriptor) {
      if (null != this.input_view_descriptor)
        this.input_view_descriptor.freeRef();
      this.input_view_descriptor = input_view_descriptor;
    }

    public void setLength(int length) {
      this.length = length;
    }

    public void setOutput_memory(@Nullable CudaMemory output_memory) {
      if (null != this.output_memory)
        this.output_memory.freeRef();
      this.output_memory = output_memory;
    }

    public void setOutput_offset(int output_offset) {
      this.output_offset = output_offset;
    }

    public void setOutput_view_descriptor(CudaDevice.CudaTensorDescriptor output_view_descriptor) {
      this.output_view_descriptor = output_view_descriptor;
    }

    public void setPrecision(Precision precision) {
      this.precision = precision;
    }

    public void set() {
      assert this.input_view_descriptor != null;
      final CudaDevice.CudaTensorDescriptor input_view_descriptor = this.input_view_descriptor.addRef();
      assert output_memory != null;
      CudaMemory output_with_offset = output_memory.withByteOffset(output_offset * precision.size);
      assert input_memory != null;
      CudaMemory input_with_offset = input_memory.withByteOffset(input_offset * precision.size);
      assert output_view_descriptor != null;
      CudaSystem.handle(gpu.cudnnTransformTensor(precision.getPointer(1.0), input_view_descriptor.getPtr(),
          input_with_offset.getPtr(), precision.getPointer(0.0), output_view_descriptor.getPtr(),
          output_with_offset.getPtr()));
      input_with_offset.freeRef();
      output_with_offset.freeRef();
      input_view_descriptor.freeRef();
      assert CudaDevice.isThreadDeviceId(gpu.getDeviceId());
      input_memory.dirty();
      output_memory.dirty();
    }

    public void add() {
      assert this.input_view_descriptor != null;
      final CudaDevice.CudaTensorDescriptor input_view_descriptor = this.input_view_descriptor.addRef();
      assert input_memory != null;
      CudaMemory input_with_offset = input_memory.withByteOffset(input_offset * precision.size);
      assert output_memory != null;
      CudaMemory output_with_offset = output_memory.withByteOffset(output_offset * precision.size);
      assert output_view_descriptor != null;
      CudaSystem.handle(gpu.cudnnTransformTensor(precision.getPointer(1.0), input_view_descriptor.getPtr(),
          input_with_offset.getPtr(), precision.getPointer(1.0), output_view_descriptor.getPtr(),
          output_with_offset.getPtr()));
      output_with_offset.freeRef();
      input_with_offset.freeRef();
      input_view_descriptor.freeRef();
      assert CudaDevice.isThreadDeviceId(gpu.getDeviceId());
      input_memory.dirty();
      output_memory.dirty();
    }

    public void _free() {
      if (null != output_view_descriptor)
        output_view_descriptor.freeRef();
      output_view_descriptor = null;
      if (null != output_memory)
        output_memory.freeRef();
      output_memory = null;
      if (null != input_view_descriptor)
        input_view_descriptor.freeRef();
      input_view_descriptor = null;
      if (null != input_memory)
        input_memory.freeRef();
      input_memory = null;
      if (null != gpu) gpu.freeRef();
      super._free();
    }

    @Nonnull
    public @Override
    @SuppressWarnings("unused")
    CopyParams addRef() {
      return (CopyParams) super.addRef();
    }
  }

  private static class Accumulator extends Result.Accumulator {

    private final int[] output_dimensions;
    private final int output_length;
    private final int length;
    private final int[] dimOut;
    private final int[] dimIn;
    private Precision precision;
    private Alignment horizontalAlign;
    private Alignment verticalAlign;
    private boolean roundUp;
    private Result.Accumulator accumulator;
    private boolean alive;

    public Accumulator(int[] output_dimensions, int output_length, int length, int[] dimOut, int[] dimIn, Precision precision, Alignment horizontalAlign, Alignment verticalAlign, boolean roundUp, Result.Accumulator accumulator, boolean alive) {
      this.output_dimensions = output_dimensions;
      this.output_length = output_length;
      this.length = length;
      this.dimOut = dimOut;
      this.dimIn = dimIn;
      this.precision = precision;
      this.horizontalAlign = horizontalAlign;
      this.verticalAlign = verticalAlign;
      this.roundUp = roundUp;
      this.accumulator = accumulator;
      this.alive = alive;
    }

    @Override
    public void accept(@Nullable DeltaSet buffer, @Nonnull TensorList delta) {
      if (!RefArrays.equals(delta.getDimensions(), output_dimensions)) {
        if (null != buffer)
          buffer.freeRef();
        AssertionError temp_05_0015 = new AssertionError(
            RefArrays.toString(delta.getDimensions()) + " != " + RefArrays.toString(output_dimensions));
        delta.freeRef();
        throw temp_05_0015;
      }
      if (delta.length() != output_length) {
        if (null != buffer)
          buffer.freeRef();
        AssertionError temp_05_0016 = new AssertionError(delta.length() + " != " + output_length);
        delta.freeRef();
        throw temp_05_0016;
      }
      assert delta.length() == length;

      if (alive) {
        final TensorList passbackTensorList = CudaSystem
            .run(RefUtil.wrapInterface((RefFunction) gpu -> {
              @Nullable final CudaTensor errorPtr = gpu.getTensor(delta.addRef(), precision,
                  MemoryType.Device, false);
              CudaTensor backpropTensor = copy_condense(gpu,
                  errorPtr, dimOut, dimIn, length,
                  dimOut[0] >= dimIn[0] && dimOut[1] >= dimIn[1], precision, horizontalAlign, verticalAlign, roundUp);
              return new CudaTensorList(
                  backpropTensor, length, dimIn, precision);
            }, delta.addRef()), delta.addRef());
        DeltaSet buffer1 = buffer == null ? null : buffer.addRef();
        this.accumulator.accept(buffer1, passbackTensorList);
      }
      delta.freeRef();
      if (null != buffer)
        buffer.freeRef();
    }

    public @SuppressWarnings("unused")
    void _free() {
      super._free();
      accumulator.freeRef();
    }
  }
}