PaddlePaddle · lijialin03 · Nov 4, 2025
diff --git a/python/paddle/nn/layer/norm.py b/python/paddle/nn/layer/norm.py
@@ -35,6 +35,7 @@
 
 from paddle import _C_ops, in_dynamic_mode, pir_utils
 from paddle.device import get_all_custom_device_type
+from paddle.utils.decorator_utils import param_one_alias
 
 from ...base import dygraph_utils
 from ...base.data_feeder import check_variable_and_dtype
@@ -64,6 +65,7 @@
         DataLayoutND,
         DTypeLike,
         ParamAttrLike,
+        PlaceLike,
         ShapeLike,
     )
 
@@ -602,13 +604,34 @@ class LayerNorm(Layer):
             which is expected to be of that specific size.
         epsilon(float, optional): The small value added to the variance to prevent
             division by zero. Default: 1e-05.
+            alias: ``eps``.
+        elementwise_affine(bool, optional): Whether to apply element-wise affine transformation
+            (i.e., learnable scale and bias). If set to ``False``, both the scale (:math:`g`) and
+            bias (:math:`b`) parameters will be disabled, regardless of the settings of `weight_attr`
+            and `bias_attr`. This parameter acts as a master switch. Defaults to True.
+            **Note: This argument must be passed as a keyword argument.**
+        bias(bool, optional): Whether to include a learnable bias term in the layer. This setting
+            only takes effect when `elementwise_affine` is ``True``. If set to ``False``, no bias
+            parameter will be created, even if `bias_attr` is specified. Defaults to True.
+            **Note: This argument must be passed as a keyword argument.**
         weight_attr(ParamAttr|bool|None, optional): The parameter attribute for the learnable
-            gain :math:`g`. If False, weight is None. If is None, a default :code:`ParamAttr` would be added as scale. The
-            :attr:`param_attr` is initialized as 1 if it is added. Default: None. For more information, please refer to :ref:`api_paddle_ParamAttr` .
+            gain :math:`g` (scale). This setting only takes effect when `elementwise_affine` is ``True``.
+            - If set to ``False``, no gain parameter will be created.
+            - If set to ``None`` or ``True``, a default :code:`ParamAttr` will be used, and the
+              parameter will be initialized to 1.
+            - If set to a custom :code:`ParamAttr` object, it will be used to configure the parameter.
+            Default: None.
+            **Note: This argument must be passed as a keyword argument.**
         bias_attr(ParamAttr|bool|None, optional): The parameter attribute for the learnable
-            bias :math:`b`. If is False, bias is None. If is None, a default :code:`ParamAttr` would be added as bias. The
-            :attr:`bias_attr` is initialized as 0 if it is added. Default: None. For more information, please refer to :ref:`api_paddle_ParamAttr` .
+            bias :math:`b`. This setting only takes effect when both `elementwise_affine` and `bias` are ``True``.
+            - If set to ``False``, no bias parameter will be created.
+            - If set to ``None`` or ``True``, a default :code:`ParamAttr` will be used, and the
+              parameter will be initialized to 0.
+            - If set to a custom :code:`ParamAttr` object, it will be used to configure the parameter.
+            Default: None.
+            **Note: This argument must be passed as a keyword argument.**
         name(str|None, optional): Name for the LayerNorm, default is None. For more information, please refer to :ref:`api_guide_Name` .
+            **Note: This argument must be passed as a keyword argument.**
 
     Shape:
         - x: 2-D, 3-D, 4-D or 5-D tensor.
@@ -642,10 +665,16 @@ class LayerNorm(Layer):
     weight: Tensor | None
     bias: Tensor | None
 
+    @param_one_alias(["epsilon", "eps"])
     def __init__(
         self,
         normalized_shape: int | Sequence[int],
         epsilon: float = 1e-5,
+        *,
+        elementwise_affine: bool = True,
+        bias: bool = True,
+        device: PlaceLike | None = None,
+        dtype: DTypeLike | None = None,
         weight_attr: bool | ParamAttr | None = None,
         bias_attr: bool | ParamAttr | None = None,
         name: str | None = None,
@@ -656,6 +685,21 @@ def __init__(
 
         self._normalized_shape = list(normalized_shape)
         self._epsilon = epsilon
+        self._device = device
+        self._dtype = (
+            self._helper.get_default_dtype() if dtype is None else dtype
+        )
+
+        if not elementwise_affine:
+            weight_attr = False
+            bias_attr = False
+        else:
+            weight_attr = weight_attr if weight_attr is not False else None
+            if not bias:
+                bias_attr = False
+            else:
+                bias_attr = bias_attr if bias_attr is not False else None
+
         self._weight_attr = weight_attr
         self._bias_attr = bias_attr
         param_shape = [np.prod(self._normalized_shape)]
@@ -665,15 +709,22 @@ def __init__(
         else:
             self.weight = self.create_parameter(
                 attr=self._weight_attr,
+                dtype=self._dtype,
                 shape=param_shape,
                 default_initializer=Constant(1.0),
+                device=self._device,
             )
 
         if bias_attr is False:
             self.bias = None
         else:
             self.bias = self.create_parameter(
-                attr=self._bias_attr, shape=param_shape, is_bias=True
+                attr=self._bias_attr,
+                dtype=self._dtype,
+                shape=param_shape,
+                default_initializer=Constant(0.0),
+                device=self._device,
+                is_bias=True,
             )
 
     def forward(self, input: Tensor) -> Tensor: