|
11 | 11 | //! [`#[inline]`](https://doc.rust-lang.org/reference/attributes/codegen.html#the-inline-attribute). |
12 | 12 | //! |
13 | 13 | //! An optional SIMD variant that computes multiple hashes in parallel is also implemented. |
14 | | -
|
15 | 14 | pub fn buffer_size(n: usize) -> usize { |
16 | 15 | (n + 9).next_multiple_of(64) |
17 | 16 | } |
18 | 17 |
|
19 | | -pub fn hash(mut buffer: &mut [u8], size: usize) -> (u32, u32, u32, u32) { |
| 18 | +#[inline] |
| 19 | +pub fn hash(buffer: &mut [u8], size: usize) -> [u32; 4] { |
20 | 20 | let end = buffer.len() - 8; |
21 | 21 | let bits = size * 8; |
22 | 22 |
|
23 | 23 | buffer[size] = 0x80; |
24 | 24 | buffer[end..].copy_from_slice(&bits.to_le_bytes()); |
25 | 25 |
|
26 | 26 | let mut m = [0; 16]; |
27 | | - let mut a0: u32 = 0x67452301; |
28 | | - let mut b0: u32 = 0xefcdab89; |
29 | | - let mut c0: u32 = 0x98badcfe; |
30 | | - let mut d0: u32 = 0x10325476; |
| 27 | + let [mut a0, mut b0, mut c0, mut d0] = [0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476]; |
31 | 28 |
|
32 | | - while !buffer.is_empty() { |
33 | | - let (prefix, suffix) = buffer.split_at_mut(64); |
34 | | - buffer = suffix; |
35 | | - |
36 | | - for (i, chunk) in prefix.chunks_exact(4).enumerate() { |
| 29 | + for block in buffer.chunks_exact(64) { |
| 30 | + for (i, chunk) in block.chunks_exact(4).enumerate() { |
37 | 31 | m[i] = u32::from_le_bytes(chunk.try_into().unwrap()); |
38 | 32 | } |
39 | 33 |
|
40 | | - let mut a = a0; |
41 | | - let mut b = b0; |
42 | | - let mut c = c0; |
43 | | - let mut d = d0; |
| 34 | + let [mut a, mut b, mut c, mut d] = [a0, b0, c0, d0]; |
44 | 35 |
|
45 | 36 | a = round1(a, b, c, d, m[0], 7, 0xd76aa478); |
46 | 37 | d = round1(d, a, b, c, m[1], 12, 0xe8c7b756); |
@@ -110,13 +101,11 @@ pub fn hash(mut buffer: &mut [u8], size: usize) -> (u32, u32, u32, u32) { |
110 | 101 | c = round4(c, d, a, b, m[2], 15, 0x2ad7d2bb); |
111 | 102 | b = round4(b, c, d, a, m[9], 21, 0xeb86d391); |
112 | 103 |
|
113 | | - a0 = a0.wrapping_add(a); |
114 | | - b0 = b0.wrapping_add(b); |
115 | | - c0 = c0.wrapping_add(c); |
116 | | - d0 = d0.wrapping_add(d); |
| 104 | + [a0, b0, c0, d0] = |
| 105 | + [a0.wrapping_add(a), b0.wrapping_add(b), c0.wrapping_add(c), d0.wrapping_add(d)]; |
117 | 106 | } |
118 | 107 |
|
119 | | - (a0.to_be(), b0.to_be(), c0.to_be(), d0.to_be()) |
| 108 | + [a0.to_be(), b0.to_be(), c0.to_be(), d0.to_be()] |
120 | 109 | } |
121 | 110 |
|
122 | 111 | #[inline] |
@@ -150,69 +139,59 @@ fn common(f: u32, a: u32, b: u32, m: u32, s: u32, k: u32) -> u32 { |
150 | 139 |
|
151 | 140 | #[cfg(feature = "simd")] |
152 | 141 | pub mod simd { |
153 | | - use std::array; |
| 142 | + use std::array::from_fn; |
154 | 143 | use std::simd::num::SimdUint as _; |
155 | 144 | use std::simd::{LaneCount, Simd, SupportedLaneCount}; |
156 | 145 |
|
157 | 146 | #[inline] |
158 | | - #[expect(clippy::too_many_lines)] |
159 | | - pub fn hash<const N: usize>( |
160 | | - buffers: &mut [[u8; 64]], |
161 | | - size: usize, |
162 | | - ) -> ([u32; N], [u32; N], [u32; N], [u32; N]) |
| 147 | + pub fn hash_fixed<const N: usize>(buffers: &mut [[u8; 64]; N], size: usize) -> [[u32; N]; 4] |
163 | 148 | where |
164 | 149 | LaneCount<N>: SupportedLaneCount, |
165 | 150 | { |
166 | 151 | // Assume all buffers are the same size. |
167 | | - let end = 64 - 8; |
168 | | - let bits = size * 8; |
169 | | - |
170 | 152 | for buffer in buffers.iter_mut() { |
171 | 153 | buffer[size] = 0x80; |
172 | | - buffer[end..].copy_from_slice(&bits.to_le_bytes()); |
173 | 154 | } |
174 | 155 |
|
175 | | - let mut a0: Simd<u32, N> = Simd::splat(0x67452301); |
176 | | - let mut b0: Simd<u32, N> = Simd::splat(0xefcdab89); |
177 | | - let mut c0: Simd<u32, N> = Simd::splat(0x98badcfe); |
178 | | - let mut d0: Simd<u32, N> = Simd::splat(0x10325476); |
| 156 | + let [a0, b0, c0, d0] = [ |
| 157 | + Simd::splat(0x67452301), |
| 158 | + Simd::splat(0xefcdab89), |
| 159 | + Simd::splat(0x98badcfe), |
| 160 | + Simd::splat(0x10325476), |
| 161 | + ]; |
| 162 | + let [mut a, mut b, mut c, mut d] = [a0, b0, c0, d0]; |
179 | 163 |
|
180 | | - let mut a = a0; |
181 | | - let mut b = b0; |
182 | | - let mut c = c0; |
183 | | - let mut d = d0; |
184 | | - |
185 | | - let m0 = message(buffers, 0); |
| 164 | + let m0 = message(buffers, 0, size); |
186 | 165 | a = round1(a, b, c, d, m0, 7, 0xd76aa478); |
187 | | - let m1 = message(buffers, 1); |
| 166 | + let m1 = message(buffers, 4, size); |
188 | 167 | d = round1(d, a, b, c, m1, 12, 0xe8c7b756); |
189 | | - let m2 = message(buffers, 2); |
| 168 | + let m2 = message(buffers, 8, size); |
190 | 169 | c = round1(c, d, a, b, m2, 17, 0x242070db); |
191 | | - let m3 = message(buffers, 3); |
| 170 | + let m3 = message(buffers, 12, size); |
192 | 171 | b = round1(b, c, d, a, m3, 22, 0xc1bdceee); |
193 | | - let m4 = message(buffers, 4); |
| 172 | + let m4 = message(buffers, 16, size); |
194 | 173 | a = round1(a, b, c, d, m4, 7, 0xf57c0faf); |
195 | | - let m5 = message(buffers, 5); |
| 174 | + let m5 = message(buffers, 20, size); |
196 | 175 | d = round1(d, a, b, c, m5, 12, 0x4787c62a); |
197 | | - let m6 = message(buffers, 6); |
| 176 | + let m6 = message(buffers, 24, size); |
198 | 177 | c = round1(c, d, a, b, m6, 17, 0xa8304613); |
199 | | - let m7 = message(buffers, 7); |
| 178 | + let m7 = message(buffers, 28, size); |
200 | 179 | b = round1(b, c, d, a, m7, 22, 0xfd469501); |
201 | | - let m8 = message(buffers, 8); |
| 180 | + let m8 = message(buffers, 32, size); |
202 | 181 | a = round1(a, b, c, d, m8, 7, 0x698098d8); |
203 | | - let m9 = message(buffers, 9); |
| 182 | + let m9 = message(buffers, 36, size); |
204 | 183 | d = round1(d, a, b, c, m9, 12, 0x8b44f7af); |
205 | | - let m10 = message(buffers, 10); |
| 184 | + let m10 = message(buffers, 40, size); |
206 | 185 | c = round1(c, d, a, b, m10, 17, 0xffff5bb1); |
207 | | - let m11 = message(buffers, 11); |
| 186 | + let m11 = message(buffers, 44, size); |
208 | 187 | b = round1(b, c, d, a, m11, 22, 0x895cd7be); |
209 | | - let m12 = message(buffers, 12); |
| 188 | + let m12 = message(buffers, 48, size); |
210 | 189 | a = round1(a, b, c, d, m12, 7, 0x6b901122); |
211 | | - let m13 = message(buffers, 13); |
| 190 | + let m13 = message(buffers, 52, size); |
212 | 191 | d = round1(d, a, b, c, m13, 12, 0xfd987193); |
213 | | - let m14 = message(buffers, 14); |
| 192 | + let m14 = Simd::splat(size as u32 * 8); |
214 | 193 | c = round1(c, d, a, b, m14, 17, 0xa679438e); |
215 | | - let m15 = message(buffers, 15); |
| 194 | + let m15 = Simd::splat(0); |
216 | 195 | b = round1(b, c, d, a, m15, 22, 0x49b40821); |
217 | 196 |
|
218 | 197 | a = round2(a, b, c, d, m1, 5, 0xf61e2562); |
@@ -266,30 +245,27 @@ pub mod simd { |
266 | 245 | c = round4(c, d, a, b, m2, 15, 0x2ad7d2bb); |
267 | 246 | b = round4(b, c, d, a, m9, 21, 0xeb86d391); |
268 | 247 |
|
269 | | - a0 += a; |
270 | | - b0 += b; |
271 | | - c0 += c; |
272 | | - d0 += d; |
273 | | - |
274 | | - ( |
275 | | - a0.swap_bytes().to_array(), |
276 | | - b0.swap_bytes().to_array(), |
277 | | - c0.swap_bytes().to_array(), |
278 | | - d0.swap_bytes().to_array(), |
279 | | - ) |
| 248 | + [ |
| 249 | + (a0 + a).swap_bytes().to_array(), |
| 250 | + (b0 + b).swap_bytes().to_array(), |
| 251 | + (c0 + c).swap_bytes().to_array(), |
| 252 | + (d0 + d).swap_bytes().to_array(), |
| 253 | + ] |
280 | 254 | } |
281 | 255 |
|
282 | 256 | #[inline] |
283 | | - fn message<const N: usize>(buffers: &mut [[u8; 64]], i: usize) -> Simd<u32, N> |
| 257 | + fn message<const N: usize>(buffers: &[[u8; 64]; N], i: usize, size: usize) -> Simd<u32, N> |
284 | 258 | where |
285 | 259 | LaneCount<N>: SupportedLaneCount, |
286 | 260 | { |
287 | | - let start = 4 * i; |
288 | | - let end = start + 4; |
289 | | - Simd::from_array(array::from_fn(|lane| { |
290 | | - let slice = &buffers[lane][start..end]; |
291 | | - u32::from_le_bytes(slice.try_into().unwrap()) |
292 | | - })) |
| 261 | + if i > size { |
| 262 | + Simd::splat(0) |
| 263 | + } else { |
| 264 | + Simd::from_array(from_fn(|lane| { |
| 265 | + let slice = &buffers[lane][i..i + 4]; |
| 266 | + u32::from_le_bytes(slice.try_into().unwrap()) |
| 267 | + })) |
| 268 | + } |
293 | 269 | } |
294 | 270 |
|
295 | 271 | #[inline] |
|
0 commit comments