Source code for enchanter.addons.layers.positional_encoding

# ***************************************************
#  _____            _                 _
# | ____|_ __   ___| |__   __ _ _ __ | |_ ___ _ __
# |  _| | '_ \ / __| '_ \ / _` | '_ \| __/ _ \ '__|
# | |___| | | | (__| | | | (_| | | | | ||  __/ |
# |_____|_| |_|\___|_| |_|\__,_|_| |_|\__\___|_|
#
# ***************************************************

import math
import torch
import torch.nn as nn


__all__ = ["PositionalEncoding"]


[docs]class PositionalEncoding(nn.Module):
    """
    ``Positional Encoding`` used in the ``Transformer`` model proposed in `Attention is all you need`.

    References:
        `Sequence-to-Sequence Modeling with nn.Transformer and TorchText \
        <https://pytorch.org/tutorials/beginner/transformer_tutorial.html#define-the-model>`_

    """

    def __init__(self, d_model: int, seq_len: int, dropout: float = 0.1) -> None:
        """


        Args:
            d_model: the number of expected features in the encoder/decoder inputs.
            seq_len: length of input sequence.
            dropout: dropout rate.
        """
        super(PositionalEncoding, self).__init__()
        self.dropout: nn.Dropout = nn.Dropout(p=dropout)

        pe = torch.zeros(seq_len, d_model)
        position = torch.arange(0, seq_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer("pe", pe)

[docs]    def forward(self, x: torch.Tensor) -> torch.Tensor:
        """
        Forward propagation

        Args:
            x: input data ``[N, F, L]``

        Returns:

            (N, E, L)

        """
        x = x.permute(2, 0, 1)  # [L, N, E]
        x = x + self.pe[: x.size(0), :]  # type: ignore
        return self.dropout(x).permute(1, 2, 0)