|
12 | 12 | //! |
13 | 13 | //! The algorithm keeps track of the bounds of the expanding square and supplies a `default` value, |
14 | 14 | //! that in the example case is always zero, but in the real data toggles between zero and one. |
15 | | -pub struct Input { |
16 | | - size: usize, |
17 | | - algorithm: [u8; 512], |
18 | | - pixels: [u8; 40_000], |
19 | | -} |
| 15 | +//! |
| 16 | +//! A faster SIMD approach processes cells 16 at a time. |
| 17 | +use crate::util::grid::*; |
| 18 | +use crate::util::point::*; |
| 19 | + |
| 20 | +type Input = (Vec<u8>, Grid<u8>); |
20 | 21 |
|
21 | 22 | pub fn parse(input: &str) -> Input { |
22 | | - // `#` is odd and `.` is even so we can convert to one or zero by bitwise AND with 1. |
23 | | - let bits: Vec<Vec<_>> = |
24 | | - input.lines().map(|line| line.bytes().map(|b| b & 1).collect()).collect(); |
25 | | - let size = bits.len() - 2; |
26 | | - let algorithm = bits[0][..512].try_into().unwrap(); |
27 | | - |
28 | | - // Offset the initial square by 50 cells in both dimensions. |
29 | | - // The square expands by at most one in each step so this is enough room to stay within bounds. |
30 | | - let mut pixels = [0; 40_000]; |
31 | | - for (i, row) in bits[2..].iter().enumerate() { |
32 | | - let start = (i + 50) * 200 + 50; |
33 | | - let end = start + size; |
34 | | - pixels[start..end].copy_from_slice(&row[..size]); |
35 | | - } |
| 23 | + let (prefix, suffix) = input.split_once("\n\n").unwrap(); |
| 24 | + |
| 25 | + let algorithm = prefix.bytes().map(|b| u8::from(b == b'#')).collect(); |
| 26 | + let grid = Grid::parse(suffix); |
36 | 27 |
|
37 | | - Input { size, algorithm, pixels } |
| 28 | + (algorithm, grid) |
38 | 29 | } |
39 | 30 |
|
40 | | -pub fn part1(input: &Input) -> usize { |
41 | | - enhance(input, 2) |
| 31 | +pub fn part1(input: &Input) -> u32 { |
| 32 | + #[cfg(not(feature = "simd"))] |
| 33 | + let result = scalar::enhance(input, 2); |
| 34 | + |
| 35 | + #[cfg(feature = "simd")] |
| 36 | + let result = simd::enhance(input, 2); |
| 37 | + |
| 38 | + result |
42 | 39 | } |
43 | 40 |
|
44 | | -pub fn part2(input: &Input) -> usize { |
45 | | - enhance(input, 50) |
| 41 | +pub fn part2(input: &Input) -> u32 { |
| 42 | + #[cfg(not(feature = "simd"))] |
| 43 | + let result = scalar::enhance(input, 50); |
| 44 | + |
| 45 | + #[cfg(feature = "simd")] |
| 46 | + let result = simd::enhance(input, 50); |
| 47 | + |
| 48 | + result |
46 | 49 | } |
47 | 50 |
|
48 | | -fn enhance(input: &Input, steps: usize) -> usize { |
49 | | - let algorithm = input.algorithm; |
50 | | - let mut pixels = input.pixels; |
51 | | - let mut next = [0; 40_000]; |
52 | | - |
53 | | - let mut start = 50; |
54 | | - let mut end = 50 + input.size as i32; |
55 | | - let mut default = 0; |
56 | | - |
57 | | - for _ in 0..steps { |
58 | | - for y in (start - 1)..(end + 1) { |
59 | | - // If the pixel is within current bounds then return it, or else use the `default` |
60 | | - // edge value specified by the enhancement algorithm. |
61 | | - let helper = |sx, sy, shift| { |
62 | | - let result = if sx < end && sy >= start && sy < end { |
63 | | - pixels[(sy * 200 + sx) as usize] as usize |
64 | | - } else { |
65 | | - default as usize |
| 51 | +#[cfg(not(feature = "simd"))] |
| 52 | +mod scalar { |
| 53 | + use super::*; |
| 54 | + |
| 55 | + pub(super) fn enhance(input: &Input, steps: i32) -> u32 { |
| 56 | + let (algorithm, grid) = input; |
| 57 | + |
| 58 | + // Offset the initial square by `step` + 1 buffer cells in both dimensions. |
| 59 | + // The square expands by at most one in each step so this is enough room to stay within bounds. |
| 60 | + let extra = steps + 1; |
| 61 | + let offset = Point::new(extra, extra); |
| 62 | + let mut pixels = Grid::new(grid.width + 2 * extra, grid.height + 2 * extra, 0); |
| 63 | + |
| 64 | + for y in 0..grid.height { |
| 65 | + for x in 0..grid.width { |
| 66 | + let point = Point::new(x, y); |
| 67 | + pixels[point + offset] = u8::from(grid[point] == b'#'); |
| 68 | + } |
| 69 | + } |
| 70 | + |
| 71 | + let mut next = pixels.clone(); |
| 72 | + let mut default = 0; |
| 73 | + let mut start = extra; |
| 74 | + let mut end = extra + grid.width; |
| 75 | + |
| 76 | + for _ in 0..steps { |
| 77 | + for y in (start - 1)..(end + 1) { |
| 78 | + // If the pixel is within current bounds then return it, or else use the `default` |
| 79 | + // edge value specified by the enhancement algorithm. |
| 80 | + let helper = |sx, sy, shift| { |
| 81 | + let result = if sx < end && start <= sy && sy < end { |
| 82 | + pixels[Point::new(sx, sy)] |
| 83 | + } else { |
| 84 | + default |
| 85 | + }; |
| 86 | + (result as usize) << shift |
66 | 87 | }; |
67 | | - result << shift |
68 | | - }; |
69 | | - |
70 | | - // If the edge pixels are 1 then the initial edge will look like |
71 | | - // [##a] |
72 | | - // [##b] |
73 | | - // [##c] |
74 | | - // or 11a11b11c when encoded as an index. |
75 | | - let mut index = if default == 1 { 0b11011011 } else { 0b00000000 }; |
76 | | - |
77 | | - for x in (start - 1)..(end + 1) { |
78 | | - // Keeps a sliding window of the index, updated as we evaluate the row from |
79 | | - // left to right. Shift the index left by one each turn, updating the values from |
80 | | - // the three new rightmost pixels entering the window. |
81 | | - index = ((index << 1) & 0b110110110) |
82 | | - + helper(x + 1, y - 1, 6) |
83 | | - + helper(x + 1, y, 3) |
84 | | - + helper(x + 1, y + 1, 0); |
85 | | - |
86 | | - next[(y * 200 + x) as usize] = algorithm[index]; |
| 88 | + |
| 89 | + // If the edge pixels are 1 then the initial edge will look like |
| 90 | + // [##a] |
| 91 | + // [##b] |
| 92 | + // [##c] |
| 93 | + // or 11a11b11c when encoded as an index. |
| 94 | + let mut index = if default == 1 { 0b11011011 } else { 0b00000000 }; |
| 95 | + |
| 96 | + for x in (start - 1)..(end + 1) { |
| 97 | + // Keeps a sliding window of the index, updated as we evaluate the row from |
| 98 | + // left to right. Shift the index left by one each turn, updating the values from |
| 99 | + // the three new rightmost pixels entering the window. |
| 100 | + index = ((index << 1) & 0b110110110) |
| 101 | + + helper(x + 1, y - 1, 6) |
| 102 | + + helper(x + 1, y, 3) |
| 103 | + + helper(x + 1, y + 1, 0); |
| 104 | + |
| 105 | + next[Point::new(x, y)] = algorithm[index]; |
| 106 | + } |
| 107 | + } |
| 108 | + |
| 109 | + // Swap grids then calculate the next value for edge pixels beyond the boundary. |
| 110 | + (pixels, next) = (next, pixels); |
| 111 | + default = if default == 0 { algorithm[0] } else { algorithm[511] }; |
| 112 | + |
| 113 | + // Boundaries expand by one each turn |
| 114 | + start -= 1; |
| 115 | + end += 1; |
| 116 | + } |
| 117 | + |
| 118 | + pixels.bytes.iter().map(|&b| b as u32).sum() |
| 119 | + } |
| 120 | +} |
| 121 | + |
| 122 | +#[cfg(feature = "simd")] |
| 123 | +mod simd { |
| 124 | + use super::*; |
| 125 | + use std::simd::Simd; |
| 126 | + use std::simd::num::SimdUint as _; |
| 127 | + |
| 128 | + const LANE_WIDTH: usize = 16; |
| 129 | + type Vector = Simd<u16, LANE_WIDTH>; |
| 130 | + |
| 131 | + pub(super) fn enhance(input: &Input, steps: i32) -> u32 { |
| 132 | + let (algorithm, grid) = input; |
| 133 | + |
| 134 | + // Offset the initial square by `steps` + 1 buffer cells in both dimensions. |
| 135 | + // The square expands by at most one in each step so this is enough room to stay within bounds. |
| 136 | + let extra = steps + 1; |
| 137 | + let offset = Point::new(extra, extra); |
| 138 | + let mut pixels = |
| 139 | + Grid::new(grid.width + 2 * extra + LANE_WIDTH as i32, grid.height + 2 * extra, 0); |
| 140 | + |
| 141 | + for y in 0..grid.height { |
| 142 | + for x in 0..grid.width { |
| 143 | + let point = Point::new(x, y); |
| 144 | + pixels[point + offset] = u8::from(grid[point] == b'#'); |
| 145 | + } |
| 146 | + } |
| 147 | + |
| 148 | + let mut next = pixels.clone(); |
| 149 | + let mut default = 0; |
| 150 | + let mut start = extra - 1; |
| 151 | + let mut end = extra + grid.width + 1; |
| 152 | + |
| 153 | + for _ in 0..steps { |
| 154 | + // Edge pixels on the infinite grid flip flop between on and off. |
| 155 | + for y in (start - 1)..(end + 1) { |
| 156 | + pixels[Point::new(start - 1, y)] = default; |
| 157 | + pixels[Point::new(start, y)] = default; |
| 158 | + pixels[Point::new(end - 1, y)] = default; |
| 159 | + pixels[Point::new(end, y)] = default; |
| 160 | + } |
| 161 | + |
| 162 | + for x in (start..end).step_by(LANE_WIDTH) { |
| 163 | + let edge = Simd::splat(if default == 0 { 0b000 } else { 0b111 }); |
| 164 | + let mut above = edge; |
| 165 | + let mut row = edge; |
| 166 | + |
| 167 | + for y in start..end { |
| 168 | + let below = if y < end - 2 { from_grid(&pixels, x, y + 1) } else { edge }; |
| 169 | + |
| 170 | + let indices = (above << 6) | (row << 3) | below; |
| 171 | + above = row; |
| 172 | + row = below; |
| 173 | + |
| 174 | + let base = (pixels.width * y + x) as usize; |
| 175 | + for (i, j) in indices.to_array().into_iter().enumerate() { |
| 176 | + next.bytes[base + i] = algorithm[j as usize]; |
| 177 | + } |
| 178 | + } |
87 | 179 | } |
| 180 | + |
| 181 | + // Swap grids then calculate the next value for edge pixels beyond the boundary. |
| 182 | + (pixels, next) = (next, pixels); |
| 183 | + default = if default == 0 { algorithm[0] } else { algorithm[511] }; |
| 184 | + |
| 185 | + // Boundaries expand by one each turn. |
| 186 | + start -= 1; |
| 187 | + end += 1; |
88 | 188 | } |
89 | 189 |
|
90 | | - // Boundaries expand by one each turn |
91 | | - pixels = next; |
92 | | - start -= 1; |
93 | | - end += 1; |
| 190 | + // Only count pixels inside the boundary. |
| 191 | + let mut result = 0; |
94 | 192 |
|
95 | | - // Calculate the next value for edge pixels beyond the boundary. |
96 | | - default = if default == 0 { algorithm[0] } else { algorithm[511] }; |
| 193 | + for y in 1..end - 1 { |
| 194 | + for x in 1..end - 1 { |
| 195 | + result += pixels[Point::new(x, y)] as u32; |
| 196 | + } |
| 197 | + } |
| 198 | + |
| 199 | + result |
97 | 200 | } |
98 | 201 |
|
99 | | - pixels.iter().filter(|&&p| p == 1).count() |
| 202 | + #[inline] |
| 203 | + fn from_grid(grid: &Grid<u8>, x: i32, y: i32) -> Vector { |
| 204 | + let index = (grid.width * y + x) as usize; |
| 205 | + |
| 206 | + let row = Simd::from_slice(&grid.bytes[index..]); |
| 207 | + let left = row.shift_elements_right::<1>(grid[Point::new(x - 1, y)]); |
| 208 | + let right = row.shift_elements_left::<1>(grid[Point::new(x + LANE_WIDTH as i32, y)]); |
| 209 | + |
| 210 | + let result = (left << 2) | (row << 1) | right; |
| 211 | + result.cast() |
| 212 | + } |
100 | 213 | } |
0 commit comments