Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 56 additions & 5 deletions python/paddle/nn/layer/norm.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@

from paddle import _C_ops, in_dynamic_mode, pir_utils
from paddle.device import get_all_custom_device_type
from paddle.utils.decorator_utils import param_one_alias

from ...base import dygraph_utils
from ...base.data_feeder import check_variable_and_dtype
Expand Down Expand Up @@ -64,6 +65,7 @@
DataLayoutND,
DTypeLike,
ParamAttrLike,
PlaceLike,
ShapeLike,
)

Expand Down Expand Up @@ -602,13 +604,34 @@ class LayerNorm(Layer):
which is expected to be of that specific size.
epsilon(float, optional): The small value added to the variance to prevent
division by zero. Default: 1e-05.
alias: ``eps``.
elementwise_affine(bool, optional): Whether to apply element-wise affine transformation
(i.e., learnable scale and bias). If set to ``False``, both the scale (:math:`g`) and
bias (:math:`b`) parameters will be disabled, regardless of the settings of `weight_attr`
and `bias_attr`. This parameter acts as a master switch. Defaults to True.
**Note: This argument must be passed as a keyword argument.**
bias(bool, optional): Whether to include a learnable bias term in the layer. This setting
only takes effect when `elementwise_affine` is ``True``. If set to ``False``, no bias
parameter will be created, even if `bias_attr` is specified. Defaults to True.
**Note: This argument must be passed as a keyword argument.**
weight_attr(ParamAttr|bool|None, optional): The parameter attribute for the learnable
gain :math:`g`. If False, weight is None. If is None, a default :code:`ParamAttr` would be added as scale. The
:attr:`param_attr` is initialized as 1 if it is added. Default: None. For more information, please refer to :ref:`api_paddle_ParamAttr` .
gain :math:`g` (scale). This setting only takes effect when `elementwise_affine` is ``True``.
- If set to ``False``, no gain parameter will be created.
- If set to ``None`` or ``True``, a default :code:`ParamAttr` will be used, and the
parameter will be initialized to 1.
- If set to a custom :code:`ParamAttr` object, it will be used to configure the parameter.
Default: None.
**Note: This argument must be passed as a keyword argument.**
bias_attr(ParamAttr|bool|None, optional): The parameter attribute for the learnable
bias :math:`b`. If is False, bias is None. If is None, a default :code:`ParamAttr` would be added as bias. The
:attr:`bias_attr` is initialized as 0 if it is added. Default: None. For more information, please refer to :ref:`api_paddle_ParamAttr` .
bias :math:`b`. This setting only takes effect when both `elementwise_affine` and `bias` are ``True``.
- If set to ``False``, no bias parameter will be created.
- If set to ``None`` or ``True``, a default :code:`ParamAttr` will be used, and the
parameter will be initialized to 0.
- If set to a custom :code:`ParamAttr` object, it will be used to configure the parameter.
Default: None.
**Note: This argument must be passed as a keyword argument.**
name(str|None, optional): Name for the LayerNorm, default is None. For more information, please refer to :ref:`api_guide_Name` .
**Note: This argument must be passed as a keyword argument.**

Shape:
- x: 2-D, 3-D, 4-D or 5-D tensor.
Expand Down Expand Up @@ -642,10 +665,16 @@ class LayerNorm(Layer):
weight: Tensor | None
bias: Tensor | None

@param_one_alias(["epsilon", "eps"])
def __init__(
self,
normalized_shape: int | Sequence[int],
epsilon: float = 1e-5,
*,
elementwise_affine: bool = True,
bias: bool = True,
device: PlaceLike | None = None,
dtype: DTypeLike | None = None,
weight_attr: bool | ParamAttr | None = None,
bias_attr: bool | ParamAttr | None = None,
name: str | None = None,
Expand All @@ -656,6 +685,21 @@ def __init__(

self._normalized_shape = list(normalized_shape)
self._epsilon = epsilon
self._device = device
self._dtype = (
self._helper.get_default_dtype() if dtype is None else dtype
)

if not elementwise_affine:
weight_attr = False
bias_attr = False
else:
weight_attr = weight_attr if weight_attr is not False else None
if not bias:
bias_attr = False
else:
bias_attr = bias_attr if bias_attr is not False else None

self._weight_attr = weight_attr
self._bias_attr = bias_attr
param_shape = [np.prod(self._normalized_shape)]
Expand All @@ -665,15 +709,22 @@ def __init__(
else:
self.weight = self.create_parameter(
attr=self._weight_attr,
dtype=self._dtype,
shape=param_shape,
default_initializer=Constant(1.0),
device=self._device,
)

if bias_attr is False:
self.bias = None
else:
self.bias = self.create_parameter(
attr=self._bias_attr, shape=param_shape, is_bias=True
attr=self._bias_attr,
dtype=self._dtype,
shape=param_shape,
default_initializer=Constant(0.0),
device=self._device,
is_bias=True,
)

def forward(self, input: Tensor) -> Tensor:
Expand Down
Loading
Loading