Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
f4dc224
Genetic algorithm for Knapsack
Dang-Hoang-Tung Oct 5, 2025
e77bd83
updating DIRECTORY.md
Dang-Hoang-Tung Oct 5, 2025
0611a6c
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 5, 2025
45f1516
Update with doctests
Dang-Hoang-Tung Oct 5, 2025
97f54f2
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 5, 2025
38c8ac3
Update Genome to genome_t
Dang-Hoang-Tung Oct 5, 2025
5c257b7
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 5, 2025
2d29327
Fix variable names
Dang-Hoang-Tung Oct 5, 2025
6a13f89
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 5, 2025
c893eda
Add type annotation for run_ga
Dang-Hoang-Tung Oct 5, 2025
ddfa479
Update variable names
Dang-Hoang-Tung Oct 5, 2025
a04a1dd
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 5, 2025
0b72ce9
Update variable names
Dang-Hoang-Tung Oct 5, 2025
6dff6c1
Fix issues
Dang-Hoang-Tung Oct 5, 2025
ff4a50b
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 5, 2025
5f8eb79
Fix issues
Dang-Hoang-Tung Oct 5, 2025
36207c6
Fix issues
Dang-Hoang-Tung Oct 5, 2025
c95007c
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 5, 2025
8b810d0
Fix type issues
Dang-Hoang-Tung Oct 5, 2025
07f07de
Fix type issues
Dang-Hoang-Tung Oct 5, 2025
e7a417a
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 5, 2025
3d17060
Fix type issues
Dang-Hoang-Tung Oct 5, 2025
8f4c28e
fix issues
Dang-Hoang-Tung Oct 5, 2025
4f9c508
fix issues
Dang-Hoang-Tung Oct 5, 2025
833a9af
Polish up
Dang-Hoang-Tung Oct 5, 2025
215849d
Polish up
Dang-Hoang-Tung Oct 5, 2025
bb4e35b
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 5, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions DIRECTORY.md
Original file line number Diff line number Diff line change
Expand Up @@ -461,6 +461,7 @@

## Genetic Algorithm
* [Basic String](genetic_algorithm/basic_string.py)
* [Knapsack](genetic_algorithm/knapsack.py)

## Geodesy
* [Haversine Distance](geodesy/haversine_distance.py)
Expand Down
345 changes: 345 additions & 0 deletions genetic_algorithm/knapsack.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,345 @@
"""Did you know that Genetic Algorithms can be used to quickly approximate
combinatorial optimization problems such as knapsack?

It is commonly known that combinatorial optimization problems can be solved using
dynamic programming. It is lesser known that genetic algorithms (or evolutionary
computing in general) can reach the best solution fairly quickly in a lot of cases.
Otherwise, it can still approximate a very good solution (in life, good is good
enough).

Genetic algorithms: https://en.wikipedia.org/wiki/Genetic_algorithm
Evolutionary computation: https://en.wikipedia.org/wiki/Evolutionary_computation
Knapsack problem: https://en.wikipedia.org/wiki/Knapsack_problem

Run doctests:
python -m doctest -v knapsack.py
"""

import random
from dataclasses import dataclass

random.seed(42)

# =========================== Problem setup: Knapsack ===========================

KNAPSACK_N_ITEMS: int = 42 # Number of items in the knapsack problem
KNAPSACK_VALUE_RANGE: tuple[int, int] = (10, 100) # Range of item values
KNAPSACK_WEIGHT_RANGE: tuple[int, int] = (5, 50) # Range of item weights
KNAPSACK_CAPACITY_RATIO: float = 0.5 # Capacity as a fraction of total weight


@dataclass
class Item:
value: int
weight: int


def generate_knapsack_instance(
n_items: int,
value_range: tuple[int, int],
weight_range: tuple[int, int],
capacity_ratio: float,
) -> tuple[list[Item], int]:
"""
Generates a random knapsack problem instance.

Returns a tuple: (items, capacity), where items is a list of Item(value, weight)
and capacity is an int computed as floor(capacity_ratio * total_weight).

Examples
--------
Use a tiny, deterministic instance to validate shape and capacity range:

>>> random.seed(0)
>>> items, cap = generate_knapsack_instance(
... n_items=3,
... value_range=(5, 5),
... weight_range=(10, 10),
... capacity_ratio=0.5
... )
>>> len(items), cap
(3, 15)
>>> all(isinstance(it, Item) for it in items)
True
>>> [it.value for it in items], [it.weight for it in items]
([5, 5, 5], [10, 10, 10])
"""
items = []
for _ in range(n_items):
value = random.randint(*value_range)
weight = random.randint(*weight_range)
items.append(Item(value=value, weight=weight))
# Capacity as a fraction of total weight
capacity = int(sum(it.weight for it in items) * capacity_ratio)
return items, capacity


# Example instance (guarded by __main__ below for printing)
items, capacity = generate_knapsack_instance(
n_items=KNAPSACK_N_ITEMS,
value_range=KNAPSACK_VALUE_RANGE,
weight_range=KNAPSACK_WEIGHT_RANGE,
capacity_ratio=KNAPSACK_CAPACITY_RATIO,
)

# ============================== GA Representation ==============================

# HYPERPARAMETERS (For tuning the GA)

POPULATION_SIZE = 120
GENERATIONS = 200
CROSSOVER_PROBABILITY = 0.9
MUTATION_PROBABILITY = 0.01
TOURNAMENT_K = 3
ELITISM = 2

OVERWEIGHT_PENALTY_FACTOR = 10

genome_t = list[int] # An index list where 1 means item is included, 0 means excluded


def evaluate(genome: genome_t, items: list[Item], capacity: int) -> tuple[int, int]:
"""
Calculates fitness (value) and weight of a candidate solution. If overweight,
the returned value is penalized; weight is the actual summed weight.

Returns (value, weight).

Examples
--------
Feasible genome (no penalty):

>>> it = [Item(10, 4), Item(7, 3), Item(5, 2)]
>>> genome = [1, 0, 1] # take items 0 and 2
>>> evaluate(genome, it, capacity=7)
(15, 6)

Overweight genome (penalty applies):
Total value = 10+7+5 = 22, total weight = 9, capacity = 7, overflow = 2
Penalized value = max(0, 22 - 2 * OVERWEIGHT_PENALTY_FACTOR) = 2

>>> genome = [1, 1, 1]
>>> evaluate(genome, it, capacity=7)
(2, 9)
"""
total_value = 0
total_weight = 0
for gene, item in zip(genome, items):
if gene:
total_value += item.value
total_weight += item.weight
if total_weight > capacity:
overflow = total_weight - capacity
total_value = max(0, total_value - overflow * OVERWEIGHT_PENALTY_FACTOR)
return total_value, total_weight


def random_genome(length: int) -> genome_t:
"""
Generates a random genome (list of 0/1) of length n.

Examples
--------
Check length and content are 0/1 bits:

>>> random.seed(123)
>>> g = random_genome(5)
>>> len(g), set(g).issubset({0, 1})
(5, True)
"""
return [random.randint(0, 1) for _ in range(length)]


def selection(
population: list[genome_t], fitnesses: list[int], tournament_k: int
) -> genome_t:
"""
Performs tournament selection to choose a genome from the population.

Note that other selection strategies exist such as roulette wheel, rank-based, etc.

Examples
--------
Deterministic tournament with fixed seed (k=2):

>>> random.seed(1)
>>> pop = [[0,0,0], [1,0,0], [1,1,0], [1,1,1]]
>>> fits = [0, 5, 9, 7]
>>> parent = selection(pop, fits, tournament_k=2)
>>> parent in pop
True
"""
contenders = random.sample(list(zip(population, fitnesses)), tournament_k)
return max(contenders, key=lambda contender: contender[1])[0][:]


def crossover(
genome_1: genome_t, genome_2: genome_t, p_crossover: float
) -> tuple[genome_t, genome_t]:
"""
Performs single-point crossover between two genomes.
If crossover does not occur (random > p_crossover) or genomes are too short,
returns copies of the parents.

Note: other crossover strategies exist (two-point, uniform, etc.).

Examples
--------
Force crossover with p=1.0 and fixed RNG; verify lengths and bit content:

>>> random.seed(2)
>>> a, b = [0,0,0,0], [1,1,1,1]
>>> c1, c2 = crossover(a, b, p_crossover=1.0)
>>> len(c1) == len(a) == len(c2) == len(b)
True
>>> set(c1).issubset({0,1}) and set(c2).issubset({0,1})
True

No crossover if p=0.0:

>>> c1, c2 = crossover([0,0,0], [1,1,1], p_crossover=0.0)
>>> c1, c2
([0, 0, 0], [1, 1, 1])
"""
min_length = min(len(genome_1), len(genome_2))
if random.random() > p_crossover or min_length < 2:
return genome_1[:], genome_2[:]
cutoff_point = random.randint(1, min_length - 1)
return genome_1[:cutoff_point] + genome_2[cutoff_point:], genome_2[
:cutoff_point
] + genome_1[cutoff_point:]


def mutation(genome: genome_t, p_mutation: float) -> genome_t:
"""
Performs bit-flip mutation on a genome. Each bit flips with probability p_mutation.

Note: other mutation strategies exist (swap, scramble, etc.).

Examples
--------
With probability 1.0, every bit flips:

>>> mutation([0, 1, 1, 0], p_mutation=1.0)
[1, 0, 0, 1]

With probability 0.0, nothing changes:

>>> mutation([0, 1, 1, 0], p_mutation=0.0)
[0, 1, 1, 0]
"""
return [(1 - gene) if random.random() < p_mutation else gene for gene in genome]


def run_ga(
items: list[Item],
capacity: int,
pop_size: int = POPULATION_SIZE,
generations: int = GENERATIONS,
p_crossover: float = CROSSOVER_PROBABILITY,
p_mutation: float = MUTATION_PROBABILITY,
tournament_k: int = TOURNAMENT_K,
elitism: int = ELITISM,
) -> dict:
"""
Runs the genetic algorithm to (approximately) solve the knapsack problem.

Returns a dict with keys:
- 'best_genome' (genome_t)
- 'best_value' (int)
- 'best_weight' (int)
- 'capacity' (int)
- 'best_history' (list[int])
- 'avg_history' (list[float])

Examples
--------
Use a tiny instance and few generations to validate structure and lengths:

>>> random.seed(1234)
>>> tiny_items = [Item(5,2), Item(6,3), Item(2,1), Item(7,4)]
>>> cap = 5
>>> out = run_ga(
... tiny_items, cap,
... pop_size=10, generations=5,
... p_crossover=0.9, p_mutation=0.05,
... tournament_k=2, elitism=1
... )
>>> len(out['best_fitness_history']) == 5 and len(out['avg_fitness_history']) == 5
True
>>> isinstance(out['best_genome'], list) and isinstance(out['best_value'], int)
True
>>> out['capacity'] == cap
True
"""
population = [random_genome(len(items)) for _ in range(pop_size)]
best_fitness_history: list[int] = [] # track best fitness per generation
avg_fitness_history: list[float] = []
best_genome_overall: genome_t = []
best_fitness_overall: int = -1

for _ in range(generations):
fitnesses = [evaluate(genome, items, capacity)[0] for genome in population]
best_fit = max(fitnesses)
best_idx = fitnesses.index(best_fit)
best_fitness_history.append(best_fit)
avg_fit = sum(fitnesses) / pop_size
avg_fitness_history.append(avg_fit)

if best_fit > best_fitness_overall:
best_fitness_overall = best_fit
best_genome_overall = population[best_idx][:]

# Elitism
sorted_indices = sorted(range(pop_size), key=lambda idx: fitnesses[idx])
elite_indices = sorted_indices[::-1][:elitism] # reverse and take top indices
elites = [population[idx][:] for idx in elite_indices]

# New generation
new_pop = elites[:]
while len(new_pop) < pop_size:
parent1 = selection(population, fitnesses, tournament_k=tournament_k)
parent2 = selection(population, fitnesses, tournament_k=tournament_k)
child1, child2 = crossover(parent1, parent2, p_crossover)
child1 = mutation(child1, p_mutation)
child2 = mutation(child2, p_mutation)
new_pop.extend([child1, child2])
population = new_pop[:pop_size]

# Final evaluation of the best
best_value, best_weight = evaluate(best_genome_overall, items, capacity)
return {
"best_genome": best_genome_overall,
"best_value": best_value,
"best_weight": best_weight,
"capacity": capacity,
"best_fitness_history": best_fitness_history,
"avg_fitness_history": avg_fitness_history,
}


# ================================ Script entry =================================

if __name__ == "__main__":
result = run_ga(items, capacity)
best_value, best_weight = result["best_value"], result["best_weight"]

print(f"Knapsack capacity: {result['capacity']}")
print(f"Best solution: value = {best_value}, weight = {best_weight}")
# # Uncomment to inspect chosen items:
# best_items = [
# items[idx] for idx, bit in enumerate(result["best_genome"]) if bit == 1
# ]
# print("Items included in the best solution:", best_items)

# # Optional: plot fitness curves
# import matplotlib.pyplot as plt
# plt.figure()
# plt.plot(result["best_fitness_history"], label="Best fitness")
# plt.plot(result["avg_fitness_history"], label="Average fitness")
# plt.title("GA on Knapsack: Fitness over Generations")
# plt.xlabel("Generation")
# plt.ylabel("Fitness")
# plt.legend()
# plt.tight_layout()
# plt.show()