From f4dc224305c81c91824f5cf2ec668fc31e0befc7 Mon Sep 17 00:00:00 2001
From: Tony Dang <tungtonydang@gmail.com>
Date: Sun, 5 Oct 2025 18:10:27 +0100
Subject: [PATCH 01/27] Genetic algorithm for Knapsack

---
 genetic_algorithm/knapsack.py | 168 ++++++++++++++++++++++++++++++++++
 1 file changed, 168 insertions(+)
 create mode 100644 genetic_algorithm/knapsack.py

diff --git a/genetic_algorithm/knapsack.py b/genetic_algorithm/knapsack.py
new file mode 100644
index 000000000000..0be7e0ce1cfb
--- /dev/null
+++ b/genetic_algorithm/knapsack.py
@@ -0,0 +1,168 @@
+"""Did you know that Genetic Algorithms can be used to quickly approximate combinatorial optimization problems such as knapsack?"""
+
+import random
+from dataclasses import dataclass
+
+random.seed(42)
+
+# =========================== Problem setup: Knapsack ===========================
+
+KNAPSACK_N_ITEMS = 42                   # Number of items in the knapsack problem
+KNAPSACK_VALUE_RANGE = (10, 100)        # Range of item values
+KNAPSACK_WEIGHT_RANGE = (5, 50)         # Range of item weights
+KNAPSACK_CAPACITY_RATIO = 0.5           # Capacity as a fraction of total weight
+
+@dataclass
+class Item:
+    value: int
+    weight: int
+
+def generate_knapsack_instance(n_items: int, value_range: tuple[int, int], weight_range: tuple[int, int], capacity_ratio=float) -> tuple[list[Item], int]:
+    """Generates a random knapsack problem instance."""
+    items = []
+    for _ in range(n_items):
+        value = random.randint(*value_range)
+        weight = random.randint(*weight_range)
+        items.append(Item(value=value, weight=weight))
+    # We set capacity as a fraction of total weight
+    capacity = int(sum(it.weight for it in items) * capacity_ratio)
+    return items, capacity
+
+items, capacity = generate_knapsack_instance(n_items=KNAPSACK_N_ITEMS, value_range=KNAPSACK_VALUE_RANGE, weight_range=KNAPSACK_WEIGHT_RANGE, capacity_ratio=KNAPSACK_CAPACITY_RATIO)
+
+
+
+# ============================== GA Representation ==============================
+
+# HYPERPARAMETERS (For tuning the GA)
+
+POPULATION_SIZE = 120
+GENERATIONS = 200
+CROSSOVER_PROBABILITY = 0.9
+MUTATION_PROBABILITY = 0.01
+TOURNAMENT_K = 3
+ELITISM = 2
+
+OVERWEIGHT_PENALTY_FACTOR = 10
+
+Genome = list[int] # An index list where 1 means item is included, 0 means excluded
+
+def evaluate(genome: Genome, items: list[Item], capacity: int) -> tuple[int, int]:
+    """Evaluation function - calculates the fitness of each candidate based on total value and weight."""
+    total_value = 0
+    total_weight = 0
+    for gene, item in zip(genome, items):
+        if gene:
+            total_value += item.value
+            total_weight += item.weight
+    if total_weight > capacity:
+        # Penalize overweight solutions: return small value scaled by overflow
+        overflow = (total_weight - capacity)
+        total_value = max(0, total_value - overflow * OVERWEIGHT_PENALTY_FACTOR)
+    return total_value, total_weight
+
+def random_genome(n: int) -> Genome:
+    """Generates a random genome of length n."""
+    return [random.randint(0,1) for _ in range(n)]
+
+def selection(population: list[Genome], fitnesses: list[int], k: int) -> Genome:
+    """Performs tournament selection to choose genomes from the population.
+    Note that other selection strategies exist such as roulette wheel, rank-based, etc.
+    """
+    contenders = random.sample(list(zip(population, fitnesses)), k)
+    get_fitness = lambda x: x[1]
+    return max(contenders, key=get_fitness)[0][:]
+
+def crossover(a: Genome, b: Genome, p_crossover: float) -> tuple[Genome, Genome]:
+    """Performs single-point crossover between two genomes.
+    Note that other crossover strategies exist such as two-point crossover, uniform crossover, etc."""
+    min_length = min(len(a), len(b))
+    if random.random() > p_crossover or min_length < 2:
+        return a[:], b[:]
+    cutoff_point = random.randint(1, min_length - 1)
+    return a[:cutoff_point]+b[cutoff_point:], b[:cutoff_point]+a[cutoff_point:]
+
+def mutation(g: Genome, p_mutation: int) -> Genome:
+    """Performs bit-flip mutation on a genome.
+    Note that other mutation strategies exist such as swap mutation, scramble mutation, etc.
+    """
+    return [(1 - gene) if random.random() < p_mutation else gene for gene in g]
+
+def run_ga(
+    items: list[Item],
+    capacity: int,
+    pop_size=POPULATION_SIZE,
+    generations=GENERATIONS,
+    p_crossover=CROSSOVER_PROBABILITY,
+    p_mutation=MUTATION_PROBABILITY,
+    tournament_k=TOURNAMENT_K,
+    elitism=ELITISM,
+):
+    """Runs the genetic algorithm to solve the knapsack problem."""
+    n = len(items)
+    population = [random_genome(n) for _ in range(pop_size)]
+    best_history = []  # track best fitness per generation
+    avg_history = []
+    best_overall = None
+    best_fit_overall = -1
+
+    for _ in range(generations):
+        fitnesses = [evaluate(genome, items, capacity)[0] for genome in population]
+        best_fit = max(fitnesses)
+        best_idx = fitnesses.index(best_fit)
+        best_history.append(best_fit)
+        avg_fit = sum(fitnesses) / pop_size
+        avg_history.append(avg_fit)
+
+        if best_fit > best_fit_overall:
+            best_fit_overall = best_fit
+            best_overall = population[best_idx][:]
+
+        # Elitism
+        get_fitness = lambda i: fitnesses[i]
+        elite_indices = sorted(range(pop_size), key=get_fitness, reverse=True)[:elitism] # Sort the population by fitness and get the top `elitism` indices
+        elites = [population[i][:] for i in elite_indices] # Make nepo babies
+
+        # New generation
+        new_pop = elites[:]
+        while len(new_pop) < pop_size:
+            parent1 = selection(population, fitnesses, k=tournament_k)
+            parent2 = selection(population, fitnesses, k=tournament_k)
+            child1, child2 = crossover(parent1, parent2, p_crossover)
+            child1 = mutation(child1, p_mutation)
+            child2 = mutation(child2, p_mutation)
+            new_pop.extend([child1, child2])
+        population = new_pop[:pop_size]
+
+    # Final evaluation of the best
+    best_value, best_weight = evaluate(best_overall, items, capacity)
+    return {
+        "best_genome": best_overall,
+        "best_value": best_value,
+        "best_weight": best_weight,
+        "capacity": capacity,
+        "best_history": best_history,
+        "avg_history": avg_history,
+    }
+
+result = run_ga(items, capacity)
+
+best_items = [items[i] for i, bit in enumerate(result["best_genome"]) if bit == 1]
+
+print(f"Knapsack capacity: {result["capacity"]}")
+print(f"Best solution: value = {result["best_value"]}, weight = {result["best_weight"]}")
+
+# print("Items included in the best solution:", best_items)
+
+# import matplotlib.pyplot as plt
+
+# # Plot fitness curves
+# plt.figure()
+# plt.plot(result["best_history"], label="Best fitness")
+# plt.plot(result["avg_history"], label="Average fitness")
+# plt.title("GA on Knapsack: Fitness over Generations")
+# plt.xlabel("Generation")
+# plt.ylabel("Fitness")
+# plt.legend()
+# plt.tight_layout()
+# plt.show()

From e77bd83e9bc501654de834b7cbe6125fdbef4bd0 Mon Sep 17 00:00:00 2001
From: Dang-Hoang-Tung <Dang-Hoang-Tung@users.noreply.github.com>
Date: Sun, 5 Oct 2025 17:10:41 +0000
Subject: [PATCH 02/27] updating DIRECTORY.md

---
 DIRECTORY.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/DIRECTORY.md b/DIRECTORY.md
index 0f9859577493..f759b7f19da3 100644
--- a/DIRECTORY.md
+++ b/DIRECTORY.md
@@ -461,6 +461,7 @@
 
 ## Genetic Algorithm
   * [Basic String](genetic_algorithm/basic_string.py)
+  * [Knapsack](genetic_algorithm/knapsack.py)
 
 ## Geodesy
   * [Haversine Distance](geodesy/haversine_distance.py)

From 0611a6c0142187e6be241eae863152d07052b5e6 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 5 Oct 2025 17:25:57 +0000
Subject: [PATCH 03/27] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 genetic_algorithm/knapsack.py | 51 +++++++++++++++++++++++++----------
 1 file changed, 37 insertions(+), 14 deletions(-)

diff --git a/genetic_algorithm/knapsack.py b/genetic_algorithm/knapsack.py
index 0be7e0ce1cfb..0ce0e8ba22e6 100644
--- a/genetic_algorithm/knapsack.py
+++ b/genetic_algorithm/knapsack.py
@@ -7,17 +7,24 @@
 
 # =========================== Problem setup: Knapsack ===========================
 
-KNAPSACK_N_ITEMS = 42                   # Number of items in the knapsack problem
-KNAPSACK_VALUE_RANGE = (10, 100)        # Range of item values
-KNAPSACK_WEIGHT_RANGE = (5, 50)         # Range of item weights
-KNAPSACK_CAPACITY_RATIO = 0.5           # Capacity as a fraction of total weight
+KNAPSACK_N_ITEMS = 42  # Number of items in the knapsack problem
+KNAPSACK_VALUE_RANGE = (10, 100)  # Range of item values
+KNAPSACK_WEIGHT_RANGE = (5, 50)  # Range of item weights
+KNAPSACK_CAPACITY_RATIO = 0.5  # Capacity as a fraction of total weight
+
 
 @dataclass
 class Item:
     value: int
     weight: int
 
-def generate_knapsack_instance(n_items: int, value_range: tuple[int, int], weight_range: tuple[int, int], capacity_ratio=float) -> tuple[list[Item], int]:
+
+def generate_knapsack_instance(
+    n_items: int,
+    value_range: tuple[int, int],
+    weight_range: tuple[int, int],
+    capacity_ratio=float,
+) -> tuple[list[Item], int]:
     """Generates a random knapsack problem instance."""
     items = []
     for _ in range(n_items):
@@ -28,8 +35,13 @@ def generate_knapsack_instance(n_items: int, value_range: tuple[int, int], weigh
     capacity = int(sum(it.weight for it in items) * capacity_ratio)
     return items, capacity
 
-items, capacity = generate_knapsack_instance(n_items=KNAPSACK_N_ITEMS, value_range=KNAPSACK_VALUE_RANGE, weight_range=KNAPSACK_WEIGHT_RANGE, capacity_ratio=KNAPSACK_CAPACITY_RATIO)
 
+items, capacity = generate_knapsack_instance(
+    n_items=KNAPSACK_N_ITEMS,
+    value_range=KNAPSACK_VALUE_RANGE,
+    weight_range=KNAPSACK_WEIGHT_RANGE,
+    capacity_ratio=KNAPSACK_CAPACITY_RATIO,
+)
 
 
 # ============================== GA Representation ==============================
@@ -45,7 +57,8 @@ def generate_knapsack_instance(n_items: int, value_range: tuple[int, int], weigh
 
 OVERWEIGHT_PENALTY_FACTOR = 10
 
-Genome = list[int] # An index list where 1 means item is included, 0 means excluded
+Genome = list[int]  # An index list where 1 means item is included, 0 means excluded
+
 
 def evaluate(genome: Genome, items: list[Item], capacity: int) -> tuple[int, int]:
     """Evaluation function - calculates the fitness of each candidate based on total value and weight."""
@@ -57,13 +70,15 @@ def evaluate(genome: Genome, items: list[Item], capacity: int) -> tuple[int, int
             total_weight += item.weight
     if total_weight > capacity:
         # Penalize overweight solutions: return small value scaled by overflow
-        overflow = (total_weight - capacity)
+        overflow = total_weight - capacity
         total_value = max(0, total_value - overflow * OVERWEIGHT_PENALTY_FACTOR)
     return total_value, total_weight
 
+
 def random_genome(n: int) -> Genome:
     """Generates a random genome of length n."""
-    return [random.randint(0,1) for _ in range(n)]
+    return [random.randint(0, 1) for _ in range(n)]
+
 
 def selection(population: list[Genome], fitnesses: list[int], k: int) -> Genome:
     """Performs tournament selection to choose genomes from the population.
@@ -73,6 +88,7 @@ def selection(population: list[Genome], fitnesses: list[int], k: int) -> Genome:
     get_fitness = lambda x: x[1]
     return max(contenders, key=get_fitness)[0][:]
 
+
 def crossover(a: Genome, b: Genome, p_crossover: float) -> tuple[Genome, Genome]:
     """Performs single-point crossover between two genomes.
     Note that other crossover strategies exist such as two-point crossover, uniform crossover, etc."""
@@ -80,7 +96,8 @@ def crossover(a: Genome, b: Genome, p_crossover: float) -> tuple[Genome, Genome]
     if random.random() > p_crossover or min_length < 2:
         return a[:], b[:]
     cutoff_point = random.randint(1, min_length - 1)
-    return a[:cutoff_point]+b[cutoff_point:], b[:cutoff_point]+a[cutoff_point:]
+    return a[:cutoff_point] + b[cutoff_point:], b[:cutoff_point] + a[cutoff_point:]
+
 
 def mutation(g: Genome, p_mutation: int) -> Genome:
     """Performs bit-flip mutation on a genome.
@@ -88,6 +105,7 @@ def mutation(g: Genome, p_mutation: int) -> Genome:
     """
     return [(1 - gene) if random.random() < p_mutation else gene for gene in g]
 
+
 def run_ga(
     items: list[Item],
     capacity: int,
@@ -120,8 +138,10 @@ def run_ga(
 
         # Elitism
         get_fitness = lambda i: fitnesses[i]
-        elite_indices = sorted(range(pop_size), key=get_fitness, reverse=True)[:elitism] # Sort the population by fitness and get the top `elitism` indices
-        elites = [population[i][:] for i in elite_indices] # Make nepo babies
+        elite_indices = sorted(range(pop_size), key=get_fitness, reverse=True)[
+            :elitism
+        ]  # Sort the population by fitness and get the top `elitism` indices
+        elites = [population[i][:] for i in elite_indices]  # Make nepo babies
 
         # New generation
         new_pop = elites[:]
@@ -145,12 +165,15 @@ def run_ga(
         "avg_history": avg_history,
     }
 
+
 result = run_ga(items, capacity)
 
 best_items = [items[i] for i, bit in enumerate(result["best_genome"]) if bit == 1]
 
-print(f"Knapsack capacity: {result["capacity"]}")
-print(f"Best solution: value = {result["best_value"]}, weight = {result["best_weight"]}")
+print(f"Knapsack capacity: {result['capacity']}")
+print(
+    f"Best solution: value = {result['best_value']}, weight = {result['best_weight']}"
+)
 
 # print("Items included in the best solution:", best_items)
 

From 45f1516d3de1c6f6fc964103dce1bd2d49757e65 Mon Sep 17 00:00:00 2001
From: Tony Dang <tungtonydang@gmail.com>
Date: Sun, 5 Oct 2025 18:40:26 +0100
Subject: [PATCH 04/27] Update with doctests

---
 genetic_algorithm/knapsack.py | 254 ++++++++++++++++++++++++++--------
 1 file changed, 194 insertions(+), 60 deletions(-)

diff --git a/genetic_algorithm/knapsack.py b/genetic_algorithm/knapsack.py
index 0ce0e8ba22e6..6f8881af06e5 100644
--- a/genetic_algorithm/knapsack.py
+++ b/genetic_algorithm/knapsack.py
@@ -1,49 +1,76 @@
-"""Did you know that Genetic Algorithms can be used to quickly approximate combinatorial optimization problems such as knapsack?"""
+"""Did you know that Genetic Algorithms can be used to quickly approximate
+combinatorial optimization problems such as knapsack?
+
+Run doctests:
+    python -m doctest -v ga_knapsack.py
+"""
 
 import random
 from dataclasses import dataclass
 
+# Keep module-level RNG deterministic for examples that rely on random,
+# but individual doctests re-seed locally as needed.
 random.seed(42)
 
 # =========================== Problem setup: Knapsack ===========================
 
-KNAPSACK_N_ITEMS = 42  # Number of items in the knapsack problem
-KNAPSACK_VALUE_RANGE = (10, 100)  # Range of item values
-KNAPSACK_WEIGHT_RANGE = (5, 50)  # Range of item weights
-KNAPSACK_CAPACITY_RATIO = 0.5  # Capacity as a fraction of total weight
-
+KNAPSACK_N_ITEMS: int = 42                   # Number of items in the knapsack problem
+KNAPSACK_VALUE_RANGE: tuple[int, int] = (10, 100)        # Range of item values
+KNAPSACK_WEIGHT_RANGE: tuple[int, int] = (5, 50)         # Range of item weights
+KNAPSACK_CAPACITY_RATIO: float = 0.5           # Capacity as a fraction of total weight
 
 @dataclass
 class Item:
     value: int
     weight: int
 
-
 def generate_knapsack_instance(
     n_items: int,
     value_range: tuple[int, int],
     weight_range: tuple[int, int],
-    capacity_ratio=float,
+    capacity_ratio: float
 ) -> tuple[list[Item], int]:
-    """Generates a random knapsack problem instance."""
+    """
+    Generates a random knapsack problem instance.
+
+    Returns a tuple: (items, capacity), where items is a list of Item(value, weight)
+    and capacity is an int computed as floor(capacity_ratio * total_weight).
+
+    Examples
+    --------
+    Use a tiny, deterministic instance to validate shape and capacity range:
+
+    >>> random.seed(0)
+    >>> items, cap = generate_knapsack_instance(
+    ...     n_items=3,
+    ...     value_range=(5, 5),
+    ...     weight_range=(10, 10),
+    ...     capacity_ratio=0.5
+    ... )
+    >>> len(items), cap
+    (3, 15)
+    >>> all(isinstance(it, Item) for it in items)
+    True
+    >>> [it.value for it in items], [it.weight for it in items]
+    ([5, 5, 5], [10, 10, 10])
+    """
     items = []
     for _ in range(n_items):
         value = random.randint(*value_range)
         weight = random.randint(*weight_range)
         items.append(Item(value=value, weight=weight))
-    # We set capacity as a fraction of total weight
+    # Capacity as a fraction of total weight
     capacity = int(sum(it.weight for it in items) * capacity_ratio)
     return items, capacity
 
-
+# Example instance (guarded by __main__ below for printing)
 items, capacity = generate_knapsack_instance(
     n_items=KNAPSACK_N_ITEMS,
     value_range=KNAPSACK_VALUE_RANGE,
     weight_range=KNAPSACK_WEIGHT_RANGE,
-    capacity_ratio=KNAPSACK_CAPACITY_RATIO,
+    capacity_ratio=KNAPSACK_CAPACITY_RATIO
 )
 
-
 # ============================== GA Representation ==============================
 
 # HYPERPARAMETERS (For tuning the GA)
@@ -59,9 +86,30 @@ def generate_knapsack_instance(
 
 Genome = list[int]  # An index list where 1 means item is included, 0 means excluded
 
-
 def evaluate(genome: Genome, items: list[Item], capacity: int) -> tuple[int, int]:
-    """Evaluation function - calculates the fitness of each candidate based on total value and weight."""
+    """
+    Calculates fitness (value) and weight of a candidate solution. If overweight,
+    the returned value is penalized; weight is the actual summed weight.
+
+    Returns (value, weight).
+
+    Examples
+    --------
+    Feasible genome (no penalty):
+
+    >>> it = [Item(10, 4), Item(7, 3), Item(5, 2)]
+    >>> genome = [1, 0, 1]  # take items 0 and 2
+    >>> evaluate(genome, it, capacity=7)
+    (15, 6)
+
+    Overweight genome (penalty applies):
+    Total value = 10+7+5 = 22, total weight = 9, capacity = 7, overflow = 2
+    Penalized value = max(0, 22 - 2 * OVERWEIGHT_PENALTY_FACTOR) = 2
+
+    >>> genome = [1, 1, 1]
+    >>> evaluate(genome, it, capacity=7)
+    (2, 9)
+    """
     total_value = 0
     total_weight = 0
     for gene, item in zip(genome, items):
@@ -69,20 +117,42 @@ def evaluate(genome: Genome, items: list[Item], capacity: int) -> tuple[int, int
             total_value += item.value
             total_weight += item.weight
     if total_weight > capacity:
-        # Penalize overweight solutions: return small value scaled by overflow
-        overflow = total_weight - capacity
+        overflow = (total_weight - capacity)
         total_value = max(0, total_value - overflow * OVERWEIGHT_PENALTY_FACTOR)
     return total_value, total_weight
 
 
 def random_genome(n: int) -> Genome:
-    """Generates a random genome of length n."""
-    return [random.randint(0, 1) for _ in range(n)]
+    """
+    Generates a random genome (list of 0/1) of length n.
 
+    Examples
+    --------
+    Check length and content are 0/1 bits:
+
+    >>> random.seed(123)
+    >>> g = random_genome(5)
+    >>> len(g), set(g).issubset({0, 1})
+    (5, True)
+    """
+    return [random.randint(0, 1) for _ in range(n)]
 
 def selection(population: list[Genome], fitnesses: list[int], k: int) -> Genome:
-    """Performs tournament selection to choose genomes from the population.
+    """
+    Performs tournament selection to choose a genome from the population.
+
     Note that other selection strategies exist such as roulette wheel, rank-based, etc.
+
+    Examples
+    --------
+    Deterministic tournament with fixed seed (k=2):
+
+    >>> random.seed(1)
+    >>> pop = [[0,0,0], [1,0,0], [1,1,0], [1,1,1]]
+    >>> fits = [0, 5, 9, 7]
+    >>> parent = selection(pop, fits, k=2)
+    >>> parent in pop
+    True
     """
     contenders = random.sample(list(zip(population, fitnesses)), k)
     get_fitness = lambda x: x[1]
@@ -90,18 +160,54 @@ def selection(population: list[Genome], fitnesses: list[int], k: int) -> Genome:
 
 
 def crossover(a: Genome, b: Genome, p_crossover: float) -> tuple[Genome, Genome]:
-    """Performs single-point crossover between two genomes.
-    Note that other crossover strategies exist such as two-point crossover, uniform crossover, etc."""
+    """
+    Performs single-point crossover between two genomes.
+    If crossover does not occur (random > p_crossover) or genomes are too short,
+    returns copies of the parents.
+
+    Note: other crossover strategies exist (two-point, uniform, etc.).
+
+    Examples
+    --------
+    Force crossover with p=1.0 and fixed RNG; verify lengths and bit content:
+
+    >>> random.seed(2)
+    >>> a, b = [0,0,0,0], [1,1,1,1]
+    >>> c1, c2 = crossover(a, b, p_crossover=1.0)
+    >>> len(c1) == len(a) == len(c2) == len(b)
+    True
+    >>> set(c1).issubset({0,1}) and set(c2).issubset({0,1})
+    True
+
+    No crossover if p=0.0:
+
+    >>> c1, c2 = crossover([0,0,0], [1,1,1], p_crossover=0.0)
+    >>> c1, c2
+    ([0, 0, 0], [1, 1, 1])
+    """
     min_length = min(len(a), len(b))
     if random.random() > p_crossover or min_length < 2:
         return a[:], b[:]
     cutoff_point = random.randint(1, min_length - 1)
     return a[:cutoff_point] + b[cutoff_point:], b[:cutoff_point] + a[cutoff_point:]
 
+def mutation(g: Genome, p_mutation: float) -> Genome:
+    """
+    Performs bit-flip mutation on a genome. Each bit flips with probability p_mutation.
+
+    Note: other mutation strategies exist (swap, scramble, etc.).
+
+    Examples
+    --------
+    With probability 1.0, every bit flips:
 
-def mutation(g: Genome, p_mutation: int) -> Genome:
-    """Performs bit-flip mutation on a genome.
-    Note that other mutation strategies exist such as swap mutation, scramble mutation, etc.
+    >>> mutation([0, 1, 1, 0], p_mutation=1.0)
+    [1, 0, 0, 1]
+
+    With probability 0.0, nothing changes:
+
+    >>> mutation([0, 1, 1, 0], p_mutation=0.0)
+    [0, 1, 1, 0]
     """
     return [(1 - gene) if random.random() < p_mutation else gene for gene in g]
 
@@ -109,14 +215,46 @@ def mutation(g: Genome, p_mutation: int) -> Genome:
 def run_ga(
     items: list[Item],
     capacity: int,
-    pop_size=POPULATION_SIZE,
-    generations=GENERATIONS,
-    p_crossover=CROSSOVER_PROBABILITY,
-    p_mutation=MUTATION_PROBABILITY,
-    tournament_k=TOURNAMENT_K,
-    elitism=ELITISM,
+    pop_size: int = POPULATION_SIZE,
+    generations: int = GENERATIONS,
+    p_crossover: float = CROSSOVER_PROBABILITY,
+    p_mutation: float = MUTATION_PROBABILITY,
+    tournament_k: int = TOURNAMENT_K,
+    elitism: int = ELITISM,
 ):
-    """Runs the genetic algorithm to solve the knapsack problem."""
+    """
+    Runs the genetic algorithm to (approximately) solve the knapsack problem.
+
+    Returns a dict with keys:
+      - 'best_genome' (Genome)
+      - 'best_value' (int)
+      - 'best_weight' (int)
+      - 'capacity' (int)
+      - 'best_history' (list[int])
+      - 'avg_history' (list[float])
+
+    Examples
+    --------
+    Use a tiny instance and few generations to validate structure and lengths:
+
+    >>> random.seed(1234)
+    >>> tiny_items = [Item(5,2), Item(6,3), Item(2,1), Item(7,4)]
+    >>> cap = 5
+    >>> out = run_ga(
+    ...     tiny_items, cap,
+    ...     pop_size=10, generations=5,
+    ...     p_crossover=0.9, p_mutation=0.05,
+    ...     tournament_k=2, elitism=1
+    ... )
+    >>> sorted(out.keys())
+    ['avg_history', 'best_genome', 'best_history', 'best_value', 'best_weight', 'capacity']
+    >>> len(out['best_history']) == 5 and len(out['avg_history']) == 5
+    True
+    >>> isinstance(out['best_genome'], list) and isinstance(out['best_value'], int)
+    True
+    >>> out['capacity'] == cap
+    True
+    """
     n = len(items)
     population = [random_genome(n) for _ in range(pop_size)]
     best_history = []  # track best fitness per generation
@@ -138,10 +276,8 @@ def run_ga(
 
         # Elitism
         get_fitness = lambda i: fitnesses[i]
-        elite_indices = sorted(range(pop_size), key=get_fitness, reverse=True)[
-            :elitism
-        ]  # Sort the population by fitness and get the top `elitism` indices
-        elites = [population[i][:] for i in elite_indices]  # Make nepo babies
+        elite_indices = sorted(range(pop_size), key=get_fitness, reverse=True)[:elitism]
+        elites = [population[i][:] for i in elite_indices]
 
         # New generation
         new_pop = elites[:]
@@ -165,27 +301,25 @@ def run_ga(
         "avg_history": avg_history,
     }
 
-
-result = run_ga(items, capacity)
-
-best_items = [items[i] for i, bit in enumerate(result["best_genome"]) if bit == 1]
-
-print(f"Knapsack capacity: {result['capacity']}")
-print(
-    f"Best solution: value = {result['best_value']}, weight = {result['best_weight']}"
-)
-
-# print("Items included in the best solution:", best_items)
-
-# import matplotlib.pyplot as plt
-
-# # Plot fitness curves
-# plt.figure()
-# plt.plot(result["best_history"], label="Best fitness")
-# plt.plot(result["avg_history"], label="Average fitness")
-# plt.title("GA on Knapsack: Fitness over Generations")
-# plt.xlabel("Generation")
-# plt.ylabel("Fitness")
-# plt.legend()
-# plt.tight_layout()
-# plt.show()
+# ================================ Script entry =================================
+
+if __name__ == "__main__":
+    result = run_ga(items, capacity)
+    best_items = [items[i] for i, bit in enumerate(result["best_genome"]) if bit == 1]
+
+    print(f"Knapsack capacity: {result['capacity']}")
+    print(f"Best solution: value = {result['best_value']}, weight = {result['best_weight']}")
+    # Uncomment to inspect chosen items:
+    # print("Items included in the best solution:", best_items)
+
+    # Optional: plot fitness curves
+    # import matplotlib.pyplot as plt
+    # plt.figure()
+    # plt.plot(result["best_history"], label="Best fitness")
+    # plt.plot(result["avg_history"], label="Average fitness")
+    # plt.title("GA on Knapsack: Fitness over Generations")
+    # plt.xlabel("Generation")
+    # plt.ylabel("Fitness")
+    # plt.legend()
+    # plt.tight_layout()
+    # plt.show()

From 97f54f21e11961a910b58dc674ccc5673ce854d8 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 5 Oct 2025 17:40:48 +0000
Subject: [PATCH 05/27] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 genetic_algorithm/knapsack.py | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/genetic_algorithm/knapsack.py b/genetic_algorithm/knapsack.py
index 6f8881af06e5..29b8e5eb5da3 100644
--- a/genetic_algorithm/knapsack.py
+++ b/genetic_algorithm/knapsack.py
@@ -14,21 +14,23 @@
 
 # =========================== Problem setup: Knapsack ===========================
 
-KNAPSACK_N_ITEMS: int = 42                   # Number of items in the knapsack problem
-KNAPSACK_VALUE_RANGE: tuple[int, int] = (10, 100)        # Range of item values
-KNAPSACK_WEIGHT_RANGE: tuple[int, int] = (5, 50)         # Range of item weights
-KNAPSACK_CAPACITY_RATIO: float = 0.5           # Capacity as a fraction of total weight
+KNAPSACK_N_ITEMS: int = 42  # Number of items in the knapsack problem
+KNAPSACK_VALUE_RANGE: tuple[int, int] = (10, 100)  # Range of item values
+KNAPSACK_WEIGHT_RANGE: tuple[int, int] = (5, 50)  # Range of item weights
+KNAPSACK_CAPACITY_RATIO: float = 0.5  # Capacity as a fraction of total weight
+
 
 @dataclass
 class Item:
     value: int
     weight: int
 
+
 def generate_knapsack_instance(
     n_items: int,
     value_range: tuple[int, int],
     weight_range: tuple[int, int],
-    capacity_ratio: float
+    capacity_ratio: float,
 ) -> tuple[list[Item], int]:
     """
     Generates a random knapsack problem instance.
@@ -63,12 +65,13 @@ def generate_knapsack_instance(
     capacity = int(sum(it.weight for it in items) * capacity_ratio)
     return items, capacity
 
+
 # Example instance (guarded by __main__ below for printing)
 items, capacity = generate_knapsack_instance(
     n_items=KNAPSACK_N_ITEMS,
     value_range=KNAPSACK_VALUE_RANGE,
     weight_range=KNAPSACK_WEIGHT_RANGE,
-    capacity_ratio=KNAPSACK_CAPACITY_RATIO
+    capacity_ratio=KNAPSACK_CAPACITY_RATIO,
 )
 
 # ============================== GA Representation ==============================
@@ -86,6 +89,7 @@ def generate_knapsack_instance(
 
 Genome = list[int]  # An index list where 1 means item is included, 0 means excluded
 
+
 def evaluate(genome: Genome, items: list[Item], capacity: int) -> tuple[int, int]:
     """
     Calculates fitness (value) and weight of a candidate solution. If overweight,
@@ -117,7 +121,7 @@ def evaluate(genome: Genome, items: list[Item], capacity: int) -> tuple[int, int
             total_value += item.value
             total_weight += item.weight
     if total_weight > capacity:
-        overflow = (total_weight - capacity)
+        overflow = total_weight - capacity
         total_value = max(0, total_value - overflow * OVERWEIGHT_PENALTY_FACTOR)
     return total_value, total_weight
 
@@ -137,6 +141,7 @@ def random_genome(n: int) -> Genome:
     """
     return [random.randint(0, 1) for _ in range(n)]
 
+
 def selection(population: list[Genome], fitnesses: list[int], k: int) -> Genome:
     """
     Performs tournament selection to choose a genome from the population.
@@ -191,6 +196,7 @@ def crossover(a: Genome, b: Genome, p_crossover: float) -> tuple[Genome, Genome]
     cutoff_point = random.randint(1, min_length - 1)
     return a[:cutoff_point] + b[cutoff_point:], b[:cutoff_point] + a[cutoff_point:]
 
+
 def mutation(g: Genome, p_mutation: float) -> Genome:
     """
     Performs bit-flip mutation on a genome. Each bit flips with probability p_mutation.
@@ -301,6 +307,7 @@ def run_ga(
         "avg_history": avg_history,
     }
 
+
 # ================================ Script entry =================================
 
 if __name__ == "__main__":
@@ -308,7 +315,9 @@ def run_ga(
     best_items = [items[i] for i, bit in enumerate(result["best_genome"]) if bit == 1]
 
     print(f"Knapsack capacity: {result['capacity']}")
-    print(f"Best solution: value = {result['best_value']}, weight = {result['best_weight']}")
+    print(
+        f"Best solution: value = {result['best_value']}, weight = {result['best_weight']}"
+    )
     # Uncomment to inspect chosen items:
     # print("Items included in the best solution:", best_items)
 

From 38c8ac3f211a4f9a4694f47f3a388f3e2221389d Mon Sep 17 00:00:00 2001
From: Tony Dang <tungtonydang@gmail.com>
Date: Sun, 5 Oct 2025 18:43:25 +0100
Subject: [PATCH 06/27] Update Genome to genome_t

---
 genetic_algorithm/knapsack.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/genetic_algorithm/knapsack.py b/genetic_algorithm/knapsack.py
index 29b8e5eb5da3..ad42eebede53 100644
--- a/genetic_algorithm/knapsack.py
+++ b/genetic_algorithm/knapsack.py
@@ -87,10 +87,10 @@ def generate_knapsack_instance(
 
 OVERWEIGHT_PENALTY_FACTOR = 10
 
-Genome = list[int]  # An index list where 1 means item is included, 0 means excluded
+genome_t = list[int]  # An index list where 1 means item is included, 0 means excluded
 
 
-def evaluate(genome: Genome, items: list[Item], capacity: int) -> tuple[int, int]:
+def evaluate(genome: genome_t, items: list[Item], capacity: int) -> tuple[int, int]:
     """
     Calculates fitness (value) and weight of a candidate solution. If overweight,
     the returned value is penalized; weight is the actual summed weight.
@@ -126,7 +126,7 @@ def evaluate(genome: Genome, items: list[Item], capacity: int) -> tuple[int, int
     return total_value, total_weight
 
 
-def random_genome(n: int) -> Genome:
+def random_genome(n: int) -> genome_t:
     """
     Generates a random genome (list of 0/1) of length n.
 
@@ -142,7 +142,7 @@ def random_genome(n: int) -> Genome:
     return [random.randint(0, 1) for _ in range(n)]
 
 
-def selection(population: list[Genome], fitnesses: list[int], k: int) -> Genome:
+def selection(population: list[genome_t], fitnesses: list[int], k: int) -> genome_t:
     """
     Performs tournament selection to choose a genome from the population.
 
@@ -164,7 +164,7 @@ def selection(population: list[Genome], fitnesses: list[int], k: int) -> Genome:
     return max(contenders, key=get_fitness)[0][:]
 
 
-def crossover(a: Genome, b: Genome, p_crossover: float) -> tuple[Genome, Genome]:
+def crossover(a: genome_t, b: genome_t, p_crossover: float) -> tuple[genome_t, genome_t]:
     """
     Performs single-point crossover between two genomes.
     If crossover does not occur (random > p_crossover) or genomes are too short,
@@ -197,7 +197,7 @@ def crossover(a: Genome, b: Genome, p_crossover: float) -> tuple[Genome, Genome]
     return a[:cutoff_point] + b[cutoff_point:], b[:cutoff_point] + a[cutoff_point:]
 
 
-def mutation(g: Genome, p_mutation: float) -> Genome:
+def mutation(g: genome_t, p_mutation: float) -> genome_t:
     """
     Performs bit-flip mutation on a genome. Each bit flips with probability p_mutation.
 
@@ -232,7 +232,7 @@ def run_ga(
     Runs the genetic algorithm to (approximately) solve the knapsack problem.
 
     Returns a dict with keys:
-      - 'best_genome' (Genome)
+      - 'best_genome' (genome_t)
       - 'best_value' (int)
       - 'best_weight' (int)
       - 'capacity' (int)

From 5c257b7019b3177ea3da6b9429bd586042d9b747 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 5 Oct 2025 17:43:45 +0000
Subject: [PATCH 07/27] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 genetic_algorithm/knapsack.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/genetic_algorithm/knapsack.py b/genetic_algorithm/knapsack.py
index ad42eebede53..1d06acc8bad5 100644
--- a/genetic_algorithm/knapsack.py
+++ b/genetic_algorithm/knapsack.py
@@ -164,7 +164,9 @@ def selection(population: list[genome_t], fitnesses: list[int], k: int) -> genom
     return max(contenders, key=get_fitness)[0][:]
 
 
-def crossover(a: genome_t, b: genome_t, p_crossover: float) -> tuple[genome_t, genome_t]:
+def crossover(
+    a: genome_t, b: genome_t, p_crossover: float
+) -> tuple[genome_t, genome_t]:
     """
     Performs single-point crossover between two genomes.
     If crossover does not occur (random > p_crossover) or genomes are too short,

From 2d293273a84d8e9bae5c05acf319e36b80e5fa7c Mon Sep 17 00:00:00 2001
From: Tony Dang <tungtonydang@gmail.com>
Date: Sun, 5 Oct 2025 18:50:17 +0100
Subject: [PATCH 08/27] Fix variable names

---
 genetic_algorithm/knapsack.py | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/genetic_algorithm/knapsack.py b/genetic_algorithm/knapsack.py
index 1d06acc8bad5..f99e1dff9841 100644
--- a/genetic_algorithm/knapsack.py
+++ b/genetic_algorithm/knapsack.py
@@ -126,7 +126,7 @@ def evaluate(genome: genome_t, items: list[Item], capacity: int) -> tuple[int, i
     return total_value, total_weight
 
 
-def random_genome(n: int) -> genome_t:
+def random_genome(length: int) -> genome_t:
     """
     Generates a random genome (list of 0/1) of length n.
 
@@ -139,10 +139,10 @@ def random_genome(n: int) -> genome_t:
     >>> len(g), set(g).issubset({0, 1})
     (5, True)
     """
-    return [random.randint(0, 1) for _ in range(n)]
+    return [random.randint(0, 1) for _ in range(length)]
 
 
-def selection(population: list[genome_t], fitnesses: list[int], k: int) -> genome_t:
+def selection(population: list[genome_t], fitnesses: list[int], tournament_k: int) -> genome_t:
     """
     Performs tournament selection to choose a genome from the population.
 
@@ -159,13 +159,13 @@ def selection(population: list[genome_t], fitnesses: list[int], k: int) -> genom
     >>> parent in pop
     True
     """
-    contenders = random.sample(list(zip(population, fitnesses)), k)
-    get_fitness = lambda x: x[1]
+    contenders = random.sample(list(zip(population, fitnesses)), tournament_k)
+    get_fitness = lambda contender: contender[1]
     return max(contenders, key=get_fitness)[0][:]
 
 
 def crossover(
-    a: genome_t, b: genome_t, p_crossover: float
+    genome_1: genome_t, genome_2: genome_t, p_crossover: float
 ) -> tuple[genome_t, genome_t]:
     """
     Performs single-point crossover between two genomes.
@@ -192,14 +192,14 @@ def crossover(
     >>> c1, c2
     ([0, 0, 0], [1, 1, 1])
     """
-    min_length = min(len(a), len(b))
+    min_length = min(len(genome_1), len(genome_2))
     if random.random() > p_crossover or min_length < 2:
-        return a[:], b[:]
+        return genome_1[:], genome_2[:]
     cutoff_point = random.randint(1, min_length - 1)
-    return a[:cutoff_point] + b[cutoff_point:], b[:cutoff_point] + a[cutoff_point:]
+    return genome_1[:cutoff_point] + genome_2[cutoff_point:], genome_2[:cutoff_point] + genome_1[cutoff_point:]
 
 
-def mutation(g: genome_t, p_mutation: float) -> genome_t:
+def mutation(genome: genome_t, p_mutation: float) -> genome_t:
     """
     Performs bit-flip mutation on a genome. Each bit flips with probability p_mutation.
 
@@ -217,7 +217,7 @@ def mutation(g: genome_t, p_mutation: float) -> genome_t:
     >>> mutation([0, 1, 1, 0], p_mutation=0.0)
     [0, 1, 1, 0]
     """
-    return [(1 - gene) if random.random() < p_mutation else gene for gene in g]
+    return [(1 - gene) if random.random() < p_mutation else gene for gene in genome]
 
 
 def run_ga(
@@ -290,8 +290,8 @@ def run_ga(
         # New generation
         new_pop = elites[:]
         while len(new_pop) < pop_size:
-            parent1 = selection(population, fitnesses, k=tournament_k)
-            parent2 = selection(population, fitnesses, k=tournament_k)
+            parent1 = selection(population, fitnesses, tournament_k=tournament_k)
+            parent2 = selection(population, fitnesses, tournament_k=tournament_k)
             child1, child2 = crossover(parent1, parent2, p_crossover)
             child1 = mutation(child1, p_mutation)
             child2 = mutation(child2, p_mutation)

From 6a13f89004f05f0e8eb61a7b12bb390488a25c2b Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 5 Oct 2025 17:50:38 +0000
Subject: [PATCH 09/27] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 genetic_algorithm/knapsack.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/genetic_algorithm/knapsack.py b/genetic_algorithm/knapsack.py
index f99e1dff9841..ed40e4eab1ab 100644
--- a/genetic_algorithm/knapsack.py
+++ b/genetic_algorithm/knapsack.py
@@ -142,7 +142,9 @@ def random_genome(length: int) -> genome_t:
     return [random.randint(0, 1) for _ in range(length)]
 
 
-def selection(population: list[genome_t], fitnesses: list[int], tournament_k: int) -> genome_t:
+def selection(
+    population: list[genome_t], fitnesses: list[int], tournament_k: int
+) -> genome_t:
     """
     Performs tournament selection to choose a genome from the population.
 
@@ -196,7 +198,9 @@ def crossover(
     if random.random() > p_crossover or min_length < 2:
         return genome_1[:], genome_2[:]
     cutoff_point = random.randint(1, min_length - 1)
-    return genome_1[:cutoff_point] + genome_2[cutoff_point:], genome_2[:cutoff_point] + genome_1[cutoff_point:]
+    return genome_1[:cutoff_point] + genome_2[cutoff_point:], genome_2[
+        :cutoff_point
+    ] + genome_1[cutoff_point:]
 
 
 def mutation(genome: genome_t, p_mutation: float) -> genome_t:

From c893eda4a7415dc0ad124fb786ba8f0e7550db4a Mon Sep 17 00:00:00 2001
From: Tony Dang <tungtonydang@gmail.com>
Date: Sun, 5 Oct 2025 18:51:20 +0100
Subject: [PATCH 10/27] Add type annotation for run_ga

---
 genetic_algorithm/knapsack.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/genetic_algorithm/knapsack.py b/genetic_algorithm/knapsack.py
index ed40e4eab1ab..853139efcf94 100644
--- a/genetic_algorithm/knapsack.py
+++ b/genetic_algorithm/knapsack.py
@@ -233,7 +233,7 @@ def run_ga(
     p_mutation: float = MUTATION_PROBABILITY,
     tournament_k: int = TOURNAMENT_K,
     elitism: int = ELITISM,
-):
+) -> dict:
     """
     Runs the genetic algorithm to (approximately) solve the knapsack problem.
 

From ddfa4790184070c303527374f5b19c8c1a017efa Mon Sep 17 00:00:00 2001
From: Tony Dang <tungtonydang@gmail.com>
Date: Sun, 5 Oct 2025 18:52:53 +0100
Subject: [PATCH 11/27] Update variable names

---
 genetic_algorithm/knapsack.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/genetic_algorithm/knapsack.py b/genetic_algorithm/knapsack.py
index 853139efcf94..a4f51a600540 100644
--- a/genetic_algorithm/knapsack.py
+++ b/genetic_algorithm/knapsack.py
@@ -318,7 +318,7 @@ def run_ga(
 
 if __name__ == "__main__":
     result = run_ga(items, capacity)
-    best_items = [items[i] for i, bit in enumerate(result["best_genome"]) if bit == 1]
+    best_items = [items[idx] for idx, bit in enumerate(result["best_genome"]) if bit == 1]
 
     print(f"Knapsack capacity: {result['capacity']}")
     print(

From a04a1dde917d2402fe9143f4181d9cf93e184bcc Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 5 Oct 2025 17:53:15 +0000
Subject: [PATCH 12/27] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 genetic_algorithm/knapsack.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/genetic_algorithm/knapsack.py b/genetic_algorithm/knapsack.py
index a4f51a600540..358a6622c7ef 100644
--- a/genetic_algorithm/knapsack.py
+++ b/genetic_algorithm/knapsack.py
@@ -318,7 +318,9 @@ def run_ga(
 
 if __name__ == "__main__":
     result = run_ga(items, capacity)
-    best_items = [items[idx] for idx, bit in enumerate(result["best_genome"]) if bit == 1]
+    best_items = [
+        items[idx] for idx, bit in enumerate(result["best_genome"]) if bit == 1
+    ]
 
     print(f"Knapsack capacity: {result['capacity']}")
     print(

From 0b72ce9ba7047baa1fb43679f81755d554147cc7 Mon Sep 17 00:00:00 2001
From: Tony Dang <tungtonydang@gmail.com>
Date: Sun, 5 Oct 2025 18:55:02 +0100
Subject: [PATCH 13/27] Update variable names

---
 genetic_algorithm/knapsack.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/genetic_algorithm/knapsack.py b/genetic_algorithm/knapsack.py
index 358a6622c7ef..85514ee1a4b8 100644
--- a/genetic_algorithm/knapsack.py
+++ b/genetic_algorithm/knapsack.py
@@ -267,8 +267,7 @@ def run_ga(
     >>> out['capacity'] == cap
     True
     """
-    n = len(items)
-    population = [random_genome(n) for _ in range(pop_size)]
+    population = [random_genome(len(items)) for _ in range(pop_size)]
     best_history = []  # track best fitness per generation
     avg_history = []
     best_overall = None
@@ -287,9 +286,9 @@ def run_ga(
             best_overall = population[best_idx][:]
 
         # Elitism
-        get_fitness = lambda i: fitnesses[i]
+        get_fitness = lambda idx: fitnesses[idx]
         elite_indices = sorted(range(pop_size), key=get_fitness, reverse=True)[:elitism]
-        elites = [population[i][:] for i in elite_indices]
+        elites = [population[idx][:] for idx in elite_indices]
 
         # New generation
         new_pop = elites[:]

From 6dff6c1f291da327a4a43099021599c7a87411b6 Mon Sep 17 00:00:00 2001
From: Tony Dang <tungtonydang@gmail.com>
Date: Sun, 5 Oct 2025 19:03:28 +0100
Subject: [PATCH 14/27] Fix issues

---
 genetic_algorithm/knapsack.py | 22 ++++++++++------------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/genetic_algorithm/knapsack.py b/genetic_algorithm/knapsack.py
index 85514ee1a4b8..e60db3001431 100644
--- a/genetic_algorithm/knapsack.py
+++ b/genetic_algorithm/knapsack.py
@@ -162,8 +162,7 @@ def selection(
     True
     """
     contenders = random.sample(list(zip(population, fitnesses)), tournament_k)
-    get_fitness = lambda contender: contender[1]
-    return max(contenders, key=get_fitness)[0][:]
+    return max(contenders, key=lambda contender: contender[1])[0][:]
 
 
 def crossover(
@@ -258,8 +257,6 @@ def run_ga(
     ...     p_crossover=0.9, p_mutation=0.05,
     ...     tournament_k=2, elitism=1
     ... )
-    >>> sorted(out.keys())
-    ['avg_history', 'best_genome', 'best_history', 'best_value', 'best_weight', 'capacity']
     >>> len(out['best_history']) == 5 and len(out['avg_history']) == 5
     True
     >>> isinstance(out['best_genome'], list) and isinstance(out['best_value'], int)
@@ -286,8 +283,8 @@ def run_ga(
             best_overall = population[best_idx][:]
 
         # Elitism
-        get_fitness = lambda idx: fitnesses[idx]
-        elite_indices = sorted(range(pop_size), key=get_fitness, reverse=True)[:elitism]
+        sorted_indices = sorted(range(pop_size), key=lambda idx: fitnesses[idx])
+        elite_indices = sorted_indices.reverse[:elitism]
         elites = [population[idx][:] for idx in elite_indices]
 
         # New generation
@@ -317,18 +314,19 @@ def run_ga(
 
 if __name__ == "__main__":
     result = run_ga(items, capacity)
-    best_items = [
-        items[idx] for idx, bit in enumerate(result["best_genome"]) if bit == 1
-    ]
+    best_value, best_weight = result["best_value"], result["best_weight"]
 
     print(f"Knapsack capacity: {result['capacity']}")
     print(
-        f"Best solution: value = {result['best_value']}, weight = {result['best_weight']}"
+        f"Best solution: value = {best_value}, weight = {best_weight}"
     )
-    # Uncomment to inspect chosen items:
+    # # Uncomment to inspect chosen items:
+    # best_items = [
+    #     items[idx] for idx, bit in enumerate(result["best_genome"]) if bit == 1
+    # ]
     # print("Items included in the best solution:", best_items)
 
-    # Optional: plot fitness curves
+    # # Optional: plot fitness curves
     # import matplotlib.pyplot as plt
     # plt.figure()
     # plt.plot(result["best_history"], label="Best fitness")

From ff4a50bbffb3a438345032c6ab565c0e8a294a6f Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 5 Oct 2025 18:03:50 +0000
Subject: [PATCH 15/27] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 genetic_algorithm/knapsack.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/genetic_algorithm/knapsack.py b/genetic_algorithm/knapsack.py
index e60db3001431..cc55098dc96a 100644
--- a/genetic_algorithm/knapsack.py
+++ b/genetic_algorithm/knapsack.py
@@ -317,9 +317,7 @@ def run_ga(
     best_value, best_weight = result["best_value"], result["best_weight"]
 
     print(f"Knapsack capacity: {result['capacity']}")
-    print(
-        f"Best solution: value = {best_value}, weight = {best_weight}"
-    )
+    print(f"Best solution: value = {best_value}, weight = {best_weight}")
     # # Uncomment to inspect chosen items:
     # best_items = [
     #     items[idx] for idx, bit in enumerate(result["best_genome"]) if bit == 1

From 5f8eb7963a403551158d2d4e7297c5cdba333a74 Mon Sep 17 00:00:00 2001
From: Tony Dang <tungtonydang@gmail.com>
Date: Sun, 5 Oct 2025 19:05:30 +0100
Subject: [PATCH 16/27] Fix issues

---
 genetic_algorithm/knapsack.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/genetic_algorithm/knapsack.py b/genetic_algorithm/knapsack.py
index cc55098dc96a..bcbae9ebc3f4 100644
--- a/genetic_algorithm/knapsack.py
+++ b/genetic_algorithm/knapsack.py
@@ -284,7 +284,7 @@ def run_ga(
 
         # Elitism
         sorted_indices = sorted(range(pop_size), key=lambda idx: fitnesses[idx])
-        elite_indices = sorted_indices.reverse[:elitism]
+        elite_indices = sorted_indices.reverse()[:elitism]
         elites = [population[idx][:] for idx in elite_indices]
 
         # New generation

From 36207c672376ebcf0e3cc73e73e12df2bbce7e37 Mon Sep 17 00:00:00 2001
From: Tony Dang <tungtonydang@gmail.com>
Date: Sun, 5 Oct 2025 19:07:43 +0100
Subject: [PATCH 17/27] Fix issues

---
 genetic_algorithm/knapsack.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/genetic_algorithm/knapsack.py b/genetic_algorithm/knapsack.py
index bcbae9ebc3f4..6d17518d3449 100644
--- a/genetic_algorithm/knapsack.py
+++ b/genetic_algorithm/knapsack.py
@@ -284,7 +284,7 @@ def run_ga(
 
         # Elitism
         sorted_indices = sorted(range(pop_size), key=lambda idx: fitnesses[idx])
-        elite_indices = sorted_indices.reverse()[:elitism]
+        elite_indices = sorted_indices[::-1][:elitism] # reverse and take top indices
         elites = [population[idx][:] for idx in elite_indices]
 
         # New generation

From c95007c159e59f1a844fc8e58b4f2ee6d2ccea97 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 5 Oct 2025 18:08:05 +0000
Subject: [PATCH 18/27] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 genetic_algorithm/knapsack.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/genetic_algorithm/knapsack.py b/genetic_algorithm/knapsack.py
index 6d17518d3449..66bcc5cac580 100644
--- a/genetic_algorithm/knapsack.py
+++ b/genetic_algorithm/knapsack.py
@@ -284,7 +284,7 @@ def run_ga(
 
         # Elitism
         sorted_indices = sorted(range(pop_size), key=lambda idx: fitnesses[idx])
-        elite_indices = sorted_indices[::-1][:elitism] # reverse and take top indices
+        elite_indices = sorted_indices[::-1][:elitism]  # reverse and take top indices
         elites = [population[idx][:] for idx in elite_indices]
 
         # New generation

From 8b810d0b8c908173de10653d09098fe4c69f1f2c Mon Sep 17 00:00:00 2001
From: Tony Dang <tungtonydang@gmail.com>
Date: Sun, 5 Oct 2025 19:12:03 +0100
Subject: [PATCH 19/27] Fix type issues

---
 genetic_algorithm/knapsack.py | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/genetic_algorithm/knapsack.py b/genetic_algorithm/knapsack.py
index 66bcc5cac580..d1f42b87eeba 100644
--- a/genetic_algorithm/knapsack.py
+++ b/genetic_algorithm/knapsack.py
@@ -265,22 +265,22 @@ def run_ga(
     True
     """
     population = [random_genome(len(items)) for _ in range(pop_size)]
-    best_history = []  # track best fitness per generation
-    avg_history = []
-    best_overall = None
-    best_fit_overall = -1
+    best_fitness_history: list[int] = []  # track best fitness per generation
+    avg_fitness_history: list[int] = []
+    best_genome_overall: genome_t = []
+    best_fitness_overall: int = -1
 
     for _ in range(generations):
         fitnesses = [evaluate(genome, items, capacity)[0] for genome in population]
         best_fit = max(fitnesses)
         best_idx = fitnesses.index(best_fit)
-        best_history.append(best_fit)
+        best_fitness_history.append(best_fit)
         avg_fit = sum(fitnesses) / pop_size
-        avg_history.append(avg_fit)
+        avg_fitness_history.append(avg_fit)
 
-        if best_fit > best_fit_overall:
-            best_fit_overall = best_fit
-            best_overall = population[best_idx][:]
+        if best_fit > best_fitness_overall:
+            best_fitness_overall = best_fit
+            best_genome_overall = population[best_idx][:]
 
         # Elitism
         sorted_indices = sorted(range(pop_size), key=lambda idx: fitnesses[idx])
@@ -299,14 +299,14 @@ def run_ga(
         population = new_pop[:pop_size]
 
     # Final evaluation of the best
-    best_value, best_weight = evaluate(best_overall, items, capacity)
+    best_value, best_weight = evaluate(best_genome_overall, items, capacity)
     return {
-        "best_genome": best_overall,
+        "best_genome": best_genome_overall,
         "best_value": best_value,
         "best_weight": best_weight,
         "capacity": capacity,
-        "best_history": best_history,
-        "avg_history": avg_history,
+        "best_fitness_history": best_fitness_history,
+        "avg_fitness_history": avg_fitness_history,
     }
 
 
@@ -327,8 +327,8 @@ def run_ga(
     # # Optional: plot fitness curves
     # import matplotlib.pyplot as plt
     # plt.figure()
-    # plt.plot(result["best_history"], label="Best fitness")
-    # plt.plot(result["avg_history"], label="Average fitness")
+    # plt.plot(result["best_fitness_history"], label="Best fitness")
+    # plt.plot(result["avg_fitness_history"], label="Average fitness")
     # plt.title("GA on Knapsack: Fitness over Generations")
     # plt.xlabel("Generation")
     # plt.ylabel("Fitness")

From 07f07deb3fbf631f4bf3cfef5ad0a751c76e0ab2 Mon Sep 17 00:00:00 2001
From: Tony Dang <tungtonydang@gmail.com>
Date: Sun, 5 Oct 2025 19:12:58 +0100
Subject: [PATCH 20/27] Fix type issues

---
 genetic_algorithm/knapsack.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/genetic_algorithm/knapsack.py b/genetic_algorithm/knapsack.py
index d1f42b87eeba..314fb30b0e5e 100644
--- a/genetic_algorithm/knapsack.py
+++ b/genetic_algorithm/knapsack.py
@@ -89,7 +89,6 @@ def generate_knapsack_instance(
 
 genome_t = list[int]  # An index list where 1 means item is included, 0 means excluded
 
-
 def evaluate(genome: genome_t, items: list[Item], capacity: int) -> tuple[int, int]:
     """
     Calculates fitness (value) and weight of a candidate solution. If overweight,

From e7a417a325302224feba07bc3ad318b8adfa9b39 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 5 Oct 2025 18:13:38 +0000
Subject: [PATCH 21/27] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 genetic_algorithm/knapsack.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/genetic_algorithm/knapsack.py b/genetic_algorithm/knapsack.py
index 314fb30b0e5e..d1f42b87eeba 100644
--- a/genetic_algorithm/knapsack.py
+++ b/genetic_algorithm/knapsack.py
@@ -89,6 +89,7 @@ def generate_knapsack_instance(
 
 genome_t = list[int]  # An index list where 1 means item is included, 0 means excluded
 
+
 def evaluate(genome: genome_t, items: list[Item], capacity: int) -> tuple[int, int]:
     """
     Calculates fitness (value) and weight of a candidate solution. If overweight,

From 3d17060483038c7e21c7e686f4458cd4c72268f2 Mon Sep 17 00:00:00 2001
From: Tony Dang <tungtonydang@gmail.com>
Date: Sun, 5 Oct 2025 19:14:53 +0100
Subject: [PATCH 22/27] Fix type issues

---
 genetic_algorithm/knapsack.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/genetic_algorithm/knapsack.py b/genetic_algorithm/knapsack.py
index d1f42b87eeba..27a35e2e9902 100644
--- a/genetic_algorithm/knapsack.py
+++ b/genetic_algorithm/knapsack.py
@@ -266,7 +266,7 @@ def run_ga(
     """
     population = [random_genome(len(items)) for _ in range(pop_size)]
     best_fitness_history: list[int] = []  # track best fitness per generation
-    avg_fitness_history: list[int] = []
+    avg_fitness_history: list[float] = []
     best_genome_overall: genome_t = []
     best_fitness_overall: int = -1
 

From 8f4c28e1f584ecab0284fffd44500f6fd1acf61f Mon Sep 17 00:00:00 2001
From: Tony Dang <tungtonydang@gmail.com>
Date: Sun, 5 Oct 2025 19:21:10 +0100
Subject: [PATCH 23/27] fix issues

---
 genetic_algorithm/knapsack.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/genetic_algorithm/knapsack.py b/genetic_algorithm/knapsack.py
index 27a35e2e9902..efac9f477e85 100644
--- a/genetic_algorithm/knapsack.py
+++ b/genetic_algorithm/knapsack.py
@@ -257,7 +257,7 @@ def run_ga(
     ...     p_crossover=0.9, p_mutation=0.05,
     ...     tournament_k=2, elitism=1
     ... )
-    >>> len(out['best_history']) == 5 and len(out['avg_history']) == 5
+    >>> len(out['best_fitness_history']) == 5 and len(out['avg_fitness_history']) == 5
     True
     >>> isinstance(out['best_genome'], list) and isinstance(out['best_value'], int)
     True

From 4f9c5081c1f3541f5d0ba0e7961093d4849e6eb8 Mon Sep 17 00:00:00 2001
From: Tony Dang <tungtonydang@gmail.com>
Date: Sun, 5 Oct 2025 19:21:46 +0100
Subject: [PATCH 24/27] fix issues

---
 genetic_algorithm/knapsack.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/genetic_algorithm/knapsack.py b/genetic_algorithm/knapsack.py
index efac9f477e85..8c30da49d5df 100644
--- a/genetic_algorithm/knapsack.py
+++ b/genetic_algorithm/knapsack.py
@@ -157,7 +157,7 @@ def selection(
     >>> random.seed(1)
     >>> pop = [[0,0,0], [1,0,0], [1,1,0], [1,1,1]]
     >>> fits = [0, 5, 9, 7]
-    >>> parent = selection(pop, fits, k=2)
+    >>> parent = selection(pop, fits, tournament_k=2)
     >>> parent in pop
     True
     """

From 833a9af0dbbbc61376291bde604a45be9a8b442b Mon Sep 17 00:00:00 2001
From: Tony Dang <tungtonydang@gmail.com>
Date: Sun, 5 Oct 2025 19:28:57 +0100
Subject: [PATCH 25/27] Polish up

---
 genetic_algorithm/knapsack.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/genetic_algorithm/knapsack.py b/genetic_algorithm/knapsack.py
index 8c30da49d5df..c55e1f30add6 100644
--- a/genetic_algorithm/knapsack.py
+++ b/genetic_algorithm/knapsack.py
@@ -1,15 +1,17 @@
 """Did you know that Genetic Algorithms can be used to quickly approximate
 combinatorial optimization problems such as knapsack?
 
+Genetic algorithms: https://en.wikipedia.org/wiki/Genetic_algorithm
+Evolutionary computation: https://en.wikipedia.org/wiki/Evolutionary_computation
+Knapsack problem: https://en.wikipedia.org/wiki/Knapsack_problem
+
 Run doctests:
-    python -m doctest -v ga_knapsack.py
+    python -m doctest -v knapsack.py
 """
 
 import random
 from dataclasses import dataclass
 
-# Keep module-level RNG deterministic for examples that rely on random,
-# but individual doctests re-seed locally as needed.
 random.seed(42)
 
 # =========================== Problem setup: Knapsack ===========================

From 215849d49112d22ec1876d36215feed8fa0d84d4 Mon Sep 17 00:00:00 2001
From: Tony Dang <tungtonydang@gmail.com>
Date: Sun, 5 Oct 2025 19:30:23 +0100
Subject: [PATCH 26/27] Polish up

---
 genetic_algorithm/knapsack.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/genetic_algorithm/knapsack.py b/genetic_algorithm/knapsack.py
index c55e1f30add6..e581ca9f0020 100644
--- a/genetic_algorithm/knapsack.py
+++ b/genetic_algorithm/knapsack.py
@@ -1,6 +1,12 @@
 """Did you know that Genetic Algorithms can be used to quickly approximate
 combinatorial optimization problems such as knapsack?
 
+It is commonly known that combinatorial optimization problems can be solved using
+dynamic programming. It is lesser known that genetic algorithms (or evolutionary
+computing in general) can reach the best solution fairly quickly in a lot of cases.
+Otherwise, it can still approximate a very good solution (in life, good is good 
+enough).
+
 Genetic algorithms: https://en.wikipedia.org/wiki/Genetic_algorithm
 Evolutionary computation: https://en.wikipedia.org/wiki/Evolutionary_computation
 Knapsack problem: https://en.wikipedia.org/wiki/Knapsack_problem

From bb4e35b4875d5841e65671972a8550b57143d0a3 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 5 Oct 2025 18:31:01 +0000
Subject: [PATCH 27/27] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 genetic_algorithm/knapsack.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/genetic_algorithm/knapsack.py b/genetic_algorithm/knapsack.py
index e581ca9f0020..270dc0c9acfd 100644
--- a/genetic_algorithm/knapsack.py
+++ b/genetic_algorithm/knapsack.py
@@ -4,7 +4,7 @@
 It is commonly known that combinatorial optimization problems can be solved using
 dynamic programming. It is lesser known that genetic algorithms (or evolutionary
 computing in general) can reach the best solution fairly quickly in a lot of cases.
-Otherwise, it can still approximate a very good solution (in life, good is good 
+Otherwise, it can still approximate a very good solution (in life, good is good
 enough).
 
 Genetic algorithms: https://en.wikipedia.org/wiki/Genetic_algorithm