All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.bytedeco.pytorch.TransformerDecoderLayerImpl Maven / Gradle / Ivy

// Targeted by JavaCPP version 1.5.8: DO NOT EDIT THIS FILE

package org.bytedeco.pytorch;

import org.bytedeco.pytorch.Allocator;
import org.bytedeco.pytorch.Function;
import org.bytedeco.pytorch.Module;
import java.nio.*;
import org.bytedeco.javacpp.*;
import org.bytedeco.javacpp.annotation.*;

import static org.bytedeco.javacpp.presets.javacpp.*;
import static org.bytedeco.openblas.global.openblas_nolapack.*;
import static org.bytedeco.openblas.global.openblas.*;

import static org.bytedeco.pytorch.global.torch.*;


// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ TransformerDecoderLayer ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

/** TransformerDecoderLayer is made up of self-attn, multi-head-attn and feedforward network.
 *  This standard decoder layer is based on the paper "Attention Is All You Need".
 *  Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez,
 *  Lukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. In Advances in
 *  Neural Information Processing Systems, pages 6000-6010. Users may modify or implement
 *  in a different way during application.
 *  See https://pytorch.org/docs/master/nn.html#transformer-layers to learn
 *  about the exact behavior of this module.
 * 
 *  See the documentation for {@code torch::nn::TransformerDecoderLayerOptions} class to learn what
 *  constructor arguments are supported for this module.
 * 
 *  Example:
 *  
{@code
 *  TransformerDecoderLayer model(TransformerDecoderLayerOptions(512, 8).dropout(0.2));
 *  }
*/ // NOLINTNEXTLINE(bugprone-exception-escape) @Namespace("torch::nn") @NoOffset @Properties(inherit = org.bytedeco.pytorch.presets.torch.class) public class TransformerDecoderLayerImpl extends TransformerDecoderLayerImplCloneable { static { Loader.load(); } /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */ public TransformerDecoderLayerImpl(Pointer p) { super(p); } public TransformerDecoderLayerImpl(@Cast("int64_t") long d_model, @Cast("int64_t") long nhead) { super((Pointer)null); allocate(d_model, nhead); } @NoDeallocator private native void allocate(@Cast("int64_t") long d_model, @Cast("int64_t") long nhead); public TransformerDecoderLayerImpl(@Const @ByRef TransformerDecoderLayerOptions options_) { super((Pointer)null); allocate(options_); } @NoDeallocator private native void allocate(@Const @ByRef TransformerDecoderLayerOptions options_); public native void reset(); public native void reset_parameters(); /** Pass the inputs (and mask) through the decoder layer. * Args: * tgt: the sequence to the decoder layer (required). * memory: the sequence from the last layer of the encoder (required). * tgt_mask: the mask for the tgt sequence (optional). * memory_mask: the mask for the memory sequence (optional). * tgt_key_padding_mask: the mask for the tgt keys per batch (optional). * memory_key_padding_mask: the mask for the memory keys per batch (optional). */ public native @ByVal Tensor forward(@ByVal Tensor tgt, @Const @ByRef Tensor memory, @Const @ByRef(nullValue = "at::Tensor{}") Tensor tgt_mask, @Const @ByRef(nullValue = "at::Tensor{}") Tensor memory_mask, @Const @ByRef(nullValue = "at::Tensor{}") Tensor tgt_key_padding_mask, @Const @ByRef(nullValue = "at::Tensor{}") Tensor memory_key_padding_mask); public native @ByVal Tensor forward(@ByVal Tensor tgt, @Const @ByRef Tensor memory); /** The options used to configure this module. */ public native @ByRef TransformerDecoderLayerOptions options(); public native TransformerDecoderLayerImpl options(TransformerDecoderLayerOptions setter); /**self attention */ public native @ByRef MultiheadAttention self_attn(); public native TransformerDecoderLayerImpl self_attn(MultiheadAttention setter); /**Dropout, post self attention */ public native @ByRef Dropout dropout1(); public native TransformerDecoderLayerImpl dropout1(Dropout setter); /**Normalization, post self attention */ public native @ByRef LayerNorm norm1(); public native TransformerDecoderLayerImpl norm1(LayerNorm setter); /**Multi-headed attention */ public native @ByRef MultiheadAttention multihead_attn(); public native TransformerDecoderLayerImpl multihead_attn(MultiheadAttention setter); /**Dropout, post multi-headed attention */ public native @ByRef Dropout dropout2(); public native TransformerDecoderLayerImpl dropout2(Dropout setter); /**Normalization, post multi-headed attention */ public native @ByRef LayerNorm norm2(); public native TransformerDecoderLayerImpl norm2(LayerNorm setter); /**Feed forward first linear layer */ public native @ByRef Linear linear1(); public native TransformerDecoderLayerImpl linear1(Linear setter); /**Feed forward dropout layer */ public native @ByRef Dropout dropout(); public native TransformerDecoderLayerImpl dropout(Dropout setter); /**Feed forward second linear layer */ public native @ByRef Linear linear2(); public native TransformerDecoderLayerImpl linear2(Linear setter); /**Dropout, post feed forward */ public native @ByRef Dropout dropout3(); public native TransformerDecoderLayerImpl dropout3(Dropout setter); /**Normalization, post feed forward */ public native @ByRef LayerNorm norm3(); public native TransformerDecoderLayerImpl norm3(LayerNorm setter); }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy