diff --git a/machine_learning/linear_regression.py b/machine_learning/linear_regression.py
index 5b1e663116cc..fc28ffc4aed4 100644
--- a/machine_learning/linear_regression.py
+++ b/machine_learning/linear_regression.py
@@ -1,11 +1,28 @@
-"""
-Linear regression is the most basic type of regression commonly used for
-predictive analysis. The idea is pretty simple: we have a dataset and we have
-features associated with it. Features should be chosen very cautiously
-as they determine how much our model will be able to make future predictions.
-We try to set the weight of these features, over many iterations, so that they best
-fit our dataset. In this particular code, I had used a CSGO dataset (ADR vs
-Rating). We try to best fit a line through dataset and estimate the parameters.
+"""Linear Regression Implementation.
+
+Linear regression is a fundamental supervised machine learning algorithm used for
+predictive analysis. It models the relationship between a dependent variable (y)
+and one or more independent variables (x) by fitting a linear equation.
+
+Mathematical Foundation:
+    The model assumes: y = θ₀ + θ₁x₁ + θ₂x₂ + ... + θₙxₙ + ε
+    where θ are the parameters (weights) and ε is the error term.
+
+    The cost function (Mean Squared Error) is minimized using gradient descent:
+    J(θ) = (1/2m) * Σ(h(x⁽ⁱ⁾) - y⁽ⁱ⁾)²
+
+    Gradient descent update rule:
+    θⱼ := θⱼ - α * (∂J/∂θⱼ)
+
+Time Complexity:
+    - Training: O(n * m * iterations) where n = features, m = samples
+    - Prediction: O(n) per sample
+
+Space Complexity: O(n * m) for storing the dataset
+
+References:
+    - https://en.wikipedia.org/wiki/Linear_regression
+    - https://en.wikipedia.org/wiki/Gradient_descent
 """
 
 # /// script
@@ -18,12 +35,26 @@
 
 import httpx
 import numpy as np
+from numpy.typing import NDArray
+
+
+def collect_dataset() -> NDArray:
+    """Collect dataset of CSGO player statistics.
 
+    Fetches a CSV dataset containing ADR (Average Damage per Round) vs Rating
+    of CSGO players from an external source.
 
-def collect_dataset():
-    """Collect dataset of CSGO
-    The dataset contains ADR vs Rating of a Player
-    :return : dataset obtained from the link, as matrix
+    Returns:
+        NDArray: A numpy matrix containing the dataset with ADR and Rating values.
+
+    Raises:
+        httpx.TimeoutException: If the request times out after 10 seconds.
+        httpx.HTTPError: If there's an error fetching the dataset.
+
+    Example:
+        >>> dataset = collect_dataset()  # doctest: +SKIP
+        >>> dataset.shape[1] == 2  # doctest: +SKIP
+        True
     """
     response = httpx.get(
         "https://raw.githubusercontent.com/yashLadha/The_Math_of_Intelligence/"
@@ -35,31 +66,47 @@ def collect_dataset():
     for item in lines:
         item = item.split(",")
         data.append(item)
-    data.pop(0)  # This is for removing the labels from the list
+    data.pop(0)  # Remove the header labels
     dataset = np.matrix(data)
     return dataset
 
 
-def run_steep_gradient_descent(data_x, data_y, len_data, alpha, theta):
-    """Run steep gradient descent and updates the Feature vector accordingly_
-    :param data_x   : contains the dataset
-    :param data_y   : contains the output associated with each data-entry
-    :param len_data : length of the data_
-    :param alpha    : Learning rate of the model
-    :param theta    : Feature vector (weight's for our model)
-    ;param return    : Updated Feature's, using
-                       curr_features - alpha_ * gradient(w.r.t. feature)
-    >>> import numpy as np
-    >>> data_x = np.array([[1, 2], [3, 4]])
-    >>> data_y = np.array([5, 6])
-    >>> len_data = len(data_x)
-    >>> alpha = 0.01
-    >>> theta = np.array([0.1, 0.2])
-    >>> run_steep_gradient_descent(data_x, data_y, len_data, alpha, theta)
-    array([0.196, 0.343])
+def run_steep_gradient_descent(
+    data_x: NDArray,
+    data_y: NDArray,
+    len_data: int,
+    alpha: float,
+    theta: NDArray,
+) -> NDArray:
+    """Perform one iteration of gradient descent to update feature weights.
+
+    Gradient descent is an optimization algorithm that iteratively adjusts
+    parameters to minimize the cost function.
+
+    Args:
+        data_x: Input feature matrix of shape (m, n) where m = samples, n = features.
+        data_y: Target values array of shape (m,).
+        len_data: Number of training samples.
+        alpha: Learning rate controlling the step size (typically 0.001 to 0.1).
+        theta: Current weight vector of shape (1, n).
+
+    Returns:
+        NDArray: Updated weight vector after one gradient descent step.
+
+    Time Complexity: O(m * n) for matrix operations.
+    Space Complexity: O(m * n) for intermediate calculations.
+
+    Example:
+        >>> import numpy as np
+        >>> data_x = np.array([[1, 2], [3, 4]])
+        >>> data_y = np.array([5, 6])
+        >>> len_data = len(data_x)
+        >>> alpha = 0.01
+        >>> theta = np.array([0.1, 0.2])
+        >>> run_steep_gradient_descent(data_x, data_y, len_data, alpha, theta)
+        array([0.196, 0.343])
     """
     n = len_data
-
     prod = np.dot(theta, data_x.transpose())
     prod -= data_y.transpose()
     sum_grad = np.dot(prod, data_x)
@@ -67,19 +114,41 @@ def run_steep_gradient_descent(data_x, data_y, len_data, alpha, theta):
     return theta
 
 
-def sum_of_square_error(data_x, data_y, len_data, theta):
-    """Return sum of square error for error calculation
-    :param data_x    : contains our dataset
-    :param data_y    : contains the output (result vector)
-    :param len_data  : len of the dataset
-    :param theta     : contains the feature vector
-    :return          : sum of square error computed from given feature's
+def sum_of_square_error(
+    data_x: NDArray,
+    data_y: NDArray,
+    len_data: int,
+    theta: NDArray,
+) -> float:
+    """Calculate the Sum of Squared Errors (SSE) for the current model.
+
+    SSE measures how well the model fits the data by computing the sum of
+    squared differences between predicted and actual values.
+
+    Args:
+        data_x: Input feature matrix of shape (m, n).
+        data_y: Actual target values of shape (m,).
+        len_data: Number of data samples.
+        theta: Current weight vector of shape (1, n).
+
+    Returns:
+        float: The mean squared error value (SSE divided by 2m).
+
+    Time Complexity: O(m * n) for prediction and error calculation.
+    Space Complexity: O(m) for storing predictions.
 
     Example:
-    >>> vc_x = np.array([[1.1], [2.1], [3.1]])
-    >>> vc_y = np.array([1.2, 2.2, 3.2])
-    >>> round(sum_of_square_error(vc_x, vc_y, 3, np.array([1])),3)
-    np.float64(0.005)
+        >>> import numpy as np
+        >>> vc_x = np.array([[1.1], [2.1], [3.1]])
+        >>> vc_y = np.array([1.2, 2.2, 3.2])
+        >>> round(sum_of_square_error(vc_x, vc_y, 3, np.array([1])), 3)
+        np.float64(0.005)
+
+        >>> # Test with perfect fit
+        >>> x = np.array([[1], [2], [3]])
+        >>> y = np.array([1, 2, 3])
+        >>> sum_of_square_error(x, y, 3, np.array([1]))
+        np.float64(0.0)
     """
     prod = np.dot(theta, data_x.transpose())
     prod -= data_y.transpose()
@@ -88,18 +157,30 @@ def sum_of_square_error(data_x, data_y, len_data, theta):
     return error
 
 
-def run_linear_regression(data_x, data_y):
-    """Implement Linear regression over the dataset
-    :param data_x  : contains our dataset
-    :param data_y  : contains the output (result vector)
-    :return        : feature for line of best fit (Feature vector)
+def run_linear_regression(data_x: NDArray, data_y: NDArray) -> NDArray:
+    """Train a linear regression model using gradient descent.
+
+    Iteratively optimizes the weight parameters to minimize the cost function
+    (mean squared error) over the training data.
+
+    Args:
+        data_x: Input feature matrix of shape (m, n).
+        data_y: Target values of shape (m,).
+
+    Returns:
+        NDArray: Optimized weight vector (theta) of shape (1, n).
+
+    Time Complexity: O(iterations * m * n) where default iterations = 100000.
+    Space Complexity: O(m * n) for storing the dataset.
+
+    Note:
+        The learning rate (alpha) is set to 0.0001550 and may need tuning
+        for different datasets.
     """
     iterations = 100000
     alpha = 0.0001550
-
     no_features = data_x.shape[1]
     len_data = data_x.shape[0] - 1
-
     theta = np.zeros((1, no_features))
 
     for i in range(iterations):
@@ -110,25 +191,47 @@ def run_linear_regression(data_x, data_y):
     return theta
 
 
-def mean_absolute_error(predicted_y, original_y):
-    """Return sum of square error for error calculation
-    :param predicted_y   : contains the output of prediction (result vector)
-    :param original_y    : contains values of expected outcome
-    :return          : mean absolute error computed from given feature's
+def mean_absolute_error(predicted_y: list, original_y: list) -> float:
+    """Calculate Mean Absolute Error (MAE) between predicted and actual values.
+
+    MAE is a common metric for regression models that measures the average
+    magnitude of errors without considering direction.
+
+    Args:
+        predicted_y: List of predicted values.
+        original_y: List of actual/expected values.
+
+    Returns:
+        float: The mean absolute error.
+
+    Time Complexity: O(n) where n is the number of samples.
+    Space Complexity: O(1) for accumulator.
 
-    >>> predicted_y = [3, -0.5, 2, 7]
-    >>> original_y = [2.5, 0.0, 2, 8]
-    >>> mean_absolute_error(predicted_y, original_y)
-    0.5
+    Example:
+        >>> predicted_y = [3, -0.5, 2, 7]
+        >>> original_y = [2.5, 0.0, 2, 8]
+        >>> mean_absolute_error(predicted_y, original_y)
+        0.5
+
+        >>> # Test with identical values (perfect prediction)
+        >>> mean_absolute_error([1, 2, 3], [1, 2, 3])
+        0.0
+
+        >>> # Test with negative values
+        >>> mean_absolute_error([-1, -2], [1, 2])
+        3.0
     """
     total = sum(abs(y - predicted_y[i]) for i, y in enumerate(original_y))
     return total / len(original_y)
 
 
-def main():
-    """Driver function"""
-    data = collect_dataset()
+def main() -> None:
+    """Driver function to demonstrate linear regression.
 
+    Loads the CSGO dataset, trains a linear regression model,
+    and prints the resulting feature vector.
+    """
+    data = collect_dataset()
     len_data = data.shape[0]
     data_x = np.c_[np.ones(len_data), data[:, :-1]].astype(float)
     data_y = data[:, -1].astype(float)