Skip to content

Commit 5639196

Browse files
committed
Atomic iterator to share monotonically increasing value between multiple threads
1 parent 60a3bdf commit 5639196

File tree

8 files changed

+66
-81
lines changed

8 files changed

+66
-81
lines changed

README.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ Improvements to solutions are always appreciated. Please see the
5858
## Performance
5959

6060
Benchmarks are measured using the built-in `cargo bench` tool run on an [Apple M2 Max][apple-link].
61-
All 250 solutions from 2024 to 2015 complete sequentially in **518 milliseconds**.
61+
All 250 solutions from 2024 to 2015 complete sequentially in **517 milliseconds**.
6262
Interestingly 86% of the total time is spent on just 9 solutions.
6363
Performance is reasonable even on older hardware, for example a 2011 MacBook Pro with an
6464
[Intel i7-2720QM][intel-link] processor takes 3.5 seconds to run the same 250 solutions.
@@ -67,7 +67,7 @@ Performance is reasonable even on older hardware, for example a 2011 MacBook Pro
6767

6868
| Year | [2015](#2015) | [2016](#2016) | [2017](#2017) | [2018](#2018) | [2019](#2019) | [2020](#2020) | [2021](#2021) | [2022](#2022) | [2023](#2023) | [2024](#2024) |
6969
| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |
70-
| Benchmark (ms) | 17 | 118 | 82 | 35 | 15 | 225 | 9 | 8 | 5 | 4 |
70+
| Benchmark (ms) | 17 | 117 | 82 | 35 | 15 | 225 | 9 | 8 | 5 | 4 |
7171

7272
## 2024
7373

@@ -335,7 +335,7 @@ Performance is reasonable even on older hardware, for example a 2011 MacBook Pro
335335
| 2 | [Bathroom Security](https://adventofcode.com/2016/day/2) | [Source](src/year2016/day02.rs) | 29 |
336336
| 3 | [Squares With Three Sides](https://adventofcode.com/2016/day/3) | [Source](src/year2016/day03.rs) | 24 |
337337
| 4 | [Security Through Obscurity](https://adventofcode.com/2016/day/4) | [Source](src/year2016/day04.rs) | 79 |
338-
| 5 | [How About a Nice Game of Chess?](https://adventofcode.com/2016/day/5) | [Source](src/year2016/day05.rs) | 37000 |
338+
| 5 | [How About a Nice Game of Chess?](https://adventofcode.com/2016/day/5) | [Source](src/year2016/day05.rs) | 35000 |
339339
| 6 | [Signals and Noise](https://adventofcode.com/2016/day/6) | [Source](src/year2016/day06.rs) | 3 |
340340
| 7 | [Internet Protocol Version 7](https://adventofcode.com/2016/day/7) | [Source](src/year2016/day07.rs) | 364 |
341341
| 8 | [Two-Factor Authentication](https://adventofcode.com/2016/day/8) | [Source](src/year2016/day08.rs) | 9 |
@@ -366,7 +366,7 @@ Performance is reasonable even on older hardware, for example a 2011 MacBook Pro
366366
| 1 | [Not Quite Lisp](https://adventofcode.com/2015/day/1) | [Source](src/year2015/day01.rs) | 2 |
367367
| 2 | [I Was Told There Would Be No Math](https://adventofcode.com/2015/day/2) | [Source](src/year2015/day02.rs) | 8 |
368368
| 3 | [Perfectly Spherical Houses in a Vacuum](https://adventofcode.com/2015/day/3) | [Source](src/year2015/day03.rs) | 95 |
369-
| 4 | [The Ideal Stocking Stuffer](https://adventofcode.com/2015/day/4) | [Source](src/year2015/day04.rs) | 14000 |
369+
| 4 | [The Ideal Stocking Stuffer](https://adventofcode.com/2015/day/4) | [Source](src/year2015/day04.rs) | 13000 |
370370
| 5 | [Doesn't He Have Intern-Elves For This?](https://adventofcode.com/2015/day/5) | [Source](src/year2015/day05.rs) | 38 |
371371
| 6 | [Probably a Fire Hazard](https://adventofcode.com/2015/day/6) | [Source](src/year2015/day06.rs) | 454 |
372372
| 7 | [Some Assembly Required](https://adventofcode.com/2015/day/7) | [Source](src/year2015/day07.rs) | 27 |

src/util/thread.rs

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
//! [scoped](https://doc.rust-lang.org/stable/std/thread/fn.scope.html)
33
//! threads equals to the number of cores on the machine. Unlike normal threads, scoped threads
44
//! can borrow data from their environment.
5-
use std::sync::atomic::{AtomicUsize, Ordering::Relaxed};
5+
use std::sync::atomic::{AtomicBool, AtomicU32, AtomicUsize, Ordering::Relaxed};
66
use std::thread::*;
77

88
/// Usually the number of physical cores.
@@ -170,3 +170,24 @@ fn pack(start: usize, end: usize) -> usize {
170170
fn unpack(both: usize) -> (usize, usize) {
171171
(both & 0xffffffff, both >> 32)
172172
}
173+
174+
/// Shares monotonically increasing value between multiple threads.
175+
pub struct AtomicIter {
176+
running: AtomicBool,
177+
index: AtomicU32,
178+
step: u32,
179+
}
180+
181+
impl AtomicIter {
182+
pub fn new(start: u32, step: u32) -> Self {
183+
AtomicIter { running: AtomicBool::new(true), index: AtomicU32::from(start), step }
184+
}
185+
186+
pub fn next(&self) -> Option<u32> {
187+
self.running.load(Relaxed).then(|| self.index.fetch_add(self.step, Relaxed))
188+
}
189+
190+
pub fn stop(&self) {
191+
self.running.store(false, Relaxed);
192+
}
193+
}

src/year2015/day04.rs

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -19,21 +19,19 @@
1919
//! [`format!`]: std::format
2020
use crate::util::md5::*;
2121
use crate::util::thread::*;
22-
use std::sync::atomic::{AtomicBool, AtomicU32, Ordering};
22+
use std::sync::atomic::{AtomicU32, Ordering};
2323

2424
pub struct Shared {
2525
prefix: String,
26-
done: AtomicBool,
27-
counter: AtomicU32,
26+
iter: AtomicIter,
2827
first: AtomicU32,
2928
second: AtomicU32,
3029
}
3130

3231
pub fn parse(input: &str) -> Shared {
3332
let shared = Shared {
3433
prefix: input.trim().to_owned(),
35-
done: AtomicBool::new(false),
36-
counter: AtomicU32::new(1000),
34+
iter: AtomicIter::new(1000, 1000),
3735
first: AtomicU32::new(u32::MAX),
3836
second: AtomicU32::new(u32::MAX),
3937
};
@@ -78,16 +76,15 @@ fn check_hash(buffer: &mut [u8], size: usize, n: u32, shared: &Shared) {
7876

7977
if result & 0xffffff00 == 0 {
8078
shared.second.fetch_min(n, Ordering::Relaxed);
81-
shared.done.store(true, Ordering::Relaxed);
79+
shared.iter.stop();
8280
} else if result & 0xfffff000 == 0 {
8381
shared.first.fetch_min(n, Ordering::Relaxed);
8482
}
8583
}
8684

8785
#[cfg(not(feature = "simd"))]
8886
fn worker(shared: &Shared) {
89-
while !shared.done.load(Ordering::Relaxed) {
90-
let offset = shared.counter.fetch_add(1000, Ordering::Relaxed);
87+
while let Some(offset) = shared.iter.next() {
9188
let (mut buffer, size) = format_string(&shared.prefix, offset);
9289

9390
for n in 0..1000 {
@@ -130,16 +127,15 @@ mod simd {
130127
for i in 0..N {
131128
if result[i] & 0xffffff00 == 0 {
132129
shared.second.fetch_min(start + offset + i as u32, Ordering::Relaxed);
133-
shared.done.store(true, Ordering::Relaxed);
130+
shared.iter.stop();
134131
} else if result[i] & 0xfffff000 == 0 {
135132
shared.first.fetch_min(start + offset + i as u32, Ordering::Relaxed);
136133
}
137134
}
138135
}
139136

140137
pub(super) fn worker(shared: &Shared) {
141-
while !shared.done.load(Ordering::Relaxed) {
142-
let start = shared.counter.fetch_add(1000, Ordering::Relaxed);
138+
while let Some(start) = shared.iter.next() {
143139
let (prefix, size) = format_string(&shared.prefix, start);
144140
let mut buffers = [prefix; 32];
145141

src/year2016/day05.rs

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,10 @@
77
use crate::util::md5::*;
88
use crate::util::thread::*;
99
use std::sync::Mutex;
10-
use std::sync::atomic::{AtomicBool, AtomicU32, Ordering};
1110

1211
struct Shared {
1312
prefix: String,
14-
done: AtomicBool,
15-
counter: AtomicU32,
13+
iter: AtomicIter,
1614
mutex: Mutex<Exclusive>,
1715
}
1816

@@ -24,8 +22,7 @@ struct Exclusive {
2422
pub fn parse(input: &str) -> Vec<u32> {
2523
let shared = Shared {
2624
prefix: input.trim().to_owned(),
27-
done: AtomicBool::new(false),
28-
counter: AtomicU32::new(1000),
25+
iter: AtomicIter::new(1000, 1000),
2926
mutex: Mutex::new(Exclusive { found: vec![], mask: 0 }),
3027
};
3128

@@ -90,15 +87,14 @@ fn check_hash(buffer: &mut [u8], size: usize, n: u32, shared: &Shared) {
9087
exclusive.mask |= 1 << (result >> 8);
9188

9289
if exclusive.mask & 0xff == 0xff {
93-
shared.done.store(true, Ordering::Relaxed);
90+
shared.iter.stop();
9491
}
9592
}
9693
}
9794

9895
#[cfg(not(feature = "simd"))]
9996
fn worker(shared: &Shared) {
100-
while !shared.done.load(Ordering::Relaxed) {
101-
let offset = shared.counter.fetch_add(1000, Ordering::Relaxed);
97+
while let Some(offset) = shared.iter.next() {
10298
let (mut buffer, size) = format_string(&shared.prefix, offset);
10399

104100
for n in 0..1000 {
@@ -146,15 +142,14 @@ mod simd {
146142
exclusive.mask |= 1 << (result[i] >> 8);
147143

148144
if exclusive.mask & 0xff == 0xff {
149-
shared.done.store(true, Ordering::Relaxed);
145+
shared.iter.stop();
150146
}
151147
}
152148
}
153149
}
154150

155151
pub(super) fn worker(shared: &Shared) {
156-
while !shared.done.load(Ordering::Relaxed) {
157-
let start = shared.counter.fetch_add(1000, Ordering::Relaxed);
152+
while let Some(start) = shared.iter.next() {
158153
let (prefix, size) = format_string(&shared.prefix, start);
159154
let mut buffers = [prefix; 32];
160155

src/year2016/day14.rs

Lines changed: 10 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,12 @@ use crate::util::md5::*;
66
use crate::util::thread::*;
77
use std::collections::{BTreeMap, BTreeSet};
88
use std::sync::Mutex;
9-
use std::sync::atomic::{AtomicBool, AtomicI32, Ordering};
109

1110
/// Atomics can be safely shared between threads.
1211
struct Shared<'a> {
1312
input: &'a str,
1413
part_two: bool,
15-
done: AtomicBool,
16-
counter: AtomicI32,
14+
iter: AtomicIter,
1715
mutex: Mutex<Exclusive>,
1816
}
1917

@@ -40,15 +38,12 @@ pub fn part2(input: &str) -> i32 {
4038

4139
/// Find the first 64 keys that satisfy the rules.
4240
fn generate_pad(input: &str, part_two: bool) -> i32 {
41+
let step = if cfg!(feature = "simd") { 32 } else { 1 };
42+
4343
let exclusive =
4444
Exclusive { threes: BTreeMap::new(), fives: BTreeMap::new(), found: BTreeSet::new() };
45-
let shared = Shared {
46-
input,
47-
part_two,
48-
done: AtomicBool::new(false),
49-
counter: AtomicI32::new(0),
50-
mutex: Mutex::new(exclusive),
51-
};
45+
let shared =
46+
Shared { input, part_two, iter: AtomicIter::new(0, step), mutex: Mutex::new(exclusive) };
5247

5348
// Use as many cores as possible to parallelize the search.
5449
spawn(|| worker(&shared));
@@ -59,9 +54,9 @@ fn generate_pad(input: &str, part_two: bool) -> i32 {
5954

6055
#[cfg(not(feature = "simd"))]
6156
fn worker(shared: &Shared<'_>) {
62-
while !shared.done.load(Ordering::Relaxed) {
57+
while let Some(n) = shared.iter.next() {
6358
// Get the next key to check.
64-
let n = shared.counter.fetch_add(1, Ordering::Relaxed);
59+
let n = n as i32;
6560

6661
// Calculate the hash.
6762
let (mut buffer, size) = format_string(shared.input, n);
@@ -88,9 +83,9 @@ fn worker(shared: &Shared<'_>) {
8883
let mut result = ([0; 32], [0; 32], [0; 32], [0; 32]);
8984
let mut buffers = [[0; 64]; 32];
9085

91-
while !shared.done.load(Ordering::Relaxed) {
86+
while let Some(start) = shared.iter.next() {
9287
// Get the next key to check.
93-
let start = shared.counter.fetch_add(32, Ordering::Relaxed);
88+
let start = start as i32;
9489

9590
// Calculate the hash.
9691
for i in 0..32 {
@@ -183,7 +178,7 @@ fn check(shared: &Shared<'_>, n: i32, hash: (u32, u32, u32, u32)) {
183178
exclusive.found.extend(candidates);
184179

185180
if exclusive.found.len() >= 64 {
186-
shared.done.store(true, Ordering::Relaxed);
181+
shared.iter.stop();
187182
}
188183
}
189184
}

src/year2017/day15.rs

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ use crate::util::iter::*;
1111
use crate::util::math::*;
1212
use crate::util::parse::*;
1313
use crate::util::thread::*;
14-
use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
1514
use std::sync::mpsc::{Receiver, Sender, channel};
1615
use std::thread;
1716

@@ -26,8 +25,7 @@ type Input = (usize, usize);
2625
pub struct Shared {
2726
first: usize,
2827
second: usize,
29-
start: AtomicUsize,
30-
done: AtomicBool,
28+
iter: AtomicIter,
3129
}
3230

3331
/// Generated numbers from `start` to `start + BLOCK`.
@@ -40,7 +38,7 @@ struct Block {
4038

4139
pub fn parse(input: &str) -> Input {
4240
let [first, second] = input.iter_unsigned().chunk::<2>().next().unwrap();
43-
let shared = Shared { first, second, start: AtomicUsize::new(0), done: AtomicBool::new(false) };
41+
let shared = Shared { first, second, iter: AtomicIter::new(0, BLOCK as u32) };
4442
let (tx, rx) = channel();
4543

4644
thread::scope(|scope| {
@@ -62,9 +60,9 @@ pub fn part2(input: &Input) -> usize {
6260
}
6361

6462
fn sender(shared: &Shared, tx: &Sender<Block>) {
65-
while !shared.done.load(Ordering::Relaxed) {
63+
while let Some(start) = shared.iter.next() {
6664
// Start at any point in the sequence using modular exponentiation.
67-
let start = shared.start.fetch_add(BLOCK, Ordering::Relaxed);
65+
let start = start as usize;
6866
let mut first = shared.first * 16807.mod_pow(start, MOD);
6967
let mut second = shared.second * 48271.mod_pow(start, MOD);
7068

@@ -138,7 +136,7 @@ fn receiver(shared: &Shared, rx: &Receiver<Block>) -> Input {
138136
}
139137

140138
// Signal worker threads to finish.
141-
shared.done.store(true, Ordering::Relaxed);
139+
shared.iter.stop();
142140

143141
(part_one, part_two)
144142
}

src/year2018/day15.rs

Lines changed: 6 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,6 @@
7979
use crate::util::grid::*;
8080
use crate::util::point::*;
8181
use crate::util::thread::*;
82-
use std::sync::atomic::{AtomicBool, AtomicI32, Ordering};
8382

8483
const READING_ORDER: [Point; 4] = [UP, LEFT, RIGHT, DOWN];
8584

@@ -103,12 +102,6 @@ struct Unit {
103102
power: i32,
104103
}
105104

106-
/// Shared between threads for part two.
107-
struct Shared {
108-
done: AtomicBool,
109-
elf_attack_power: AtomicI32,
110-
}
111-
112105
/// Parse the input into a bitmask for the cave walls
113106
/// and a list of point coordinates for each Elf and Goblin.
114107
pub fn parse(input: &str) -> Input {
@@ -143,24 +136,21 @@ pub fn part1(input: &Input) -> i32 {
143136
/// single Elf is killed. Since each fight is independent we can parallelize the search over
144137
/// multiple threads.
145138
pub fn part2(input: &Input) -> i32 {
146-
let shared = Shared { done: AtomicBool::new(false), elf_attack_power: AtomicI32::new(4) };
139+
let iter = AtomicIter::new(4, 1);
147140

148141
// Use as many cores as possible to parallelize the search.
149-
let result = spawn(|| worker(input, &shared));
142+
let result = spawn(|| worker(input, &iter));
150143
// Find lowest possible power.
151144
result.into_iter().flatten().min_by_key(|&(eap, _)| eap).map(|(_, score)| score).unwrap()
152145
}
153146

154-
fn worker(input: &Input, shared: &Shared) -> Option<(i32, i32)> {
155-
while !shared.done.load(Ordering::Relaxed) {
156-
// Get the next attack power, incrementing it atomically for the next fight.
157-
let power = shared.elf_attack_power.fetch_add(1, Ordering::Relaxed);
158-
147+
fn worker(input: &Input, iter: &AtomicIter) -> Option<(u32, i32)> {
148+
while let Some(power) = iter.next() {
159149
// If the Elves win then set the score and signal all threads to stop.
160150
// Use a channel to queue all potential scores as another thread may already have sent a
161151
// different value.
162-
if let Some(score) = fight(input, power, true) {
163-
shared.done.store(true, Ordering::Relaxed);
152+
if let Some(score) = fight(input, power as i32, true) {
153+
iter.stop();
164154
return Some((power, score));
165155
}
166156
}

0 commit comments

Comments
 (0)