From 0098878d9c13a7f720d29edaf0976ea12ada0102 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Fri, 7 Nov 2025 13:57:14 +1100 Subject: [PATCH 1/4] Improve and rename `inbounds!` macro. It has two versions, one with an upper bound, and one with a lower and upper bound. This commit removes the first one and changes the second one to take a range, because that is more concise and flexible and clearer. Also, rename it as `in_range`, which makes sense given that the bounds are specified via a Rust `Range`. Note: some of the ranges are incorrect, and will be fixed in the next commit. --- crates/cuda_std/src/thread.rs | 49 ++++++++++++++--------------------- 1 file changed, 19 insertions(+), 30 deletions(-) diff --git a/crates/cuda_std/src/thread.rs b/crates/cuda_std/src/thread.rs index 449cc972..e2426001 100644 --- a/crates/cuda_std/src/thread.rs +++ b/crates/cuda_std/src/thread.rs @@ -89,26 +89,15 @@ extern "C" { } #[cfg(target_os = "cuda")] -macro_rules! inbounds { - // the bounds were taken mostly from the cuda C++ programming guide, i also - // double-checked with what cuda clang does by checking its emitted llvm ir's scalar metadata - ($func_name:ident, $bound:expr) => {{ +macro_rules! in_range { + // The bounds were taken mostly from the cuda C++ programming guide. I also + // double-checked with what cuda clang does by checking its emitted llvm ir's scalar metadata. + ($func_name:ident, $range:expr) => {{ let val = unsafe { $func_name() }; - if val > $bound { - // SAFETY: this condition is declared unreachable by compute capability max bound + if !$range.contains(&val) { + // SAFETY: this condition is declared unreachable by compute capability max bound. // https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#compute-capabilities - // we do this to potentially allow for better optimizations by LLVM - unsafe { core::hint::unreachable_unchecked() } - } else { - val - } - }}; - ($func_name:ident, $lower_bound:expr, $upper_bound:expr) => {{ - let val = unsafe { $func_name() }; - if !($lower_bound..=$upper_bound).contains(&val) { - // SAFETY: this condition is declared unreachable by compute capability max bound - // https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#compute-capabilities - // we do this to potentially allow for better optimizations by LLVM + // We do this to potentially allow for better optimizations by LLVM. unsafe { core::hint::unreachable_unchecked() } } else { val @@ -119,73 +108,73 @@ macro_rules! inbounds { #[gpu_only] #[inline(always)] pub fn thread_idx_x() -> u32 { - inbounds!(__nvvm_thread_idx_x, 1024) + in_range!(__nvvm_thread_idx_x, 0..=1024) } #[gpu_only] #[inline(always)] pub fn thread_idx_y() -> u32 { - inbounds!(__nvvm_thread_idx_y, 1024) + in_range!(__nvvm_thread_idx_y, 0..=1024) } #[gpu_only] #[inline(always)] pub fn thread_idx_z() -> u32 { - inbounds!(__nvvm_thread_idx_z, 64) + in_range!(__nvvm_thread_idx_z, 0..=64) } #[gpu_only] #[inline(always)] pub fn block_idx_x() -> u32 { - inbounds!(__nvvm_block_idx_x, 2147483647) + in_range!(__nvvm_block_idx_x, 0..=2147483647) } #[gpu_only] #[inline(always)] pub fn block_idx_y() -> u32 { - inbounds!(__nvvm_block_idx_y, 65535) + in_range!(__nvvm_block_idx_y, 0..=65535) } #[gpu_only] #[inline(always)] pub fn block_idx_z() -> u32 { - inbounds!(__nvvm_block_idx_z, 65535) + in_range!(__nvvm_block_idx_z, 0..=65535) } #[gpu_only] #[inline(always)] pub fn block_dim_x() -> u32 { - inbounds!(__nvvm_block_dim_x, 1, 1025) + in_range!(__nvvm_block_dim_x, 1..=1025) } #[gpu_only] #[inline(always)] pub fn block_dim_y() -> u32 { - inbounds!(__nvvm_block_dim_y, 1, 1025) + in_range!(__nvvm_block_dim_y, 1..=1025) } #[gpu_only] #[inline(always)] pub fn block_dim_z() -> u32 { - inbounds!(__nvvm_block_dim_z, 1, 65) + in_range!(__nvvm_block_dim_z, 1..=65) } #[gpu_only] #[inline(always)] pub fn grid_dim_x() -> u32 { - inbounds!(__nvvm_grid_dim_x, 1, 2147483648) + in_range!(__nvvm_grid_dim_x, 1..=2147483648) } #[gpu_only] #[inline(always)] pub fn grid_dim_y() -> u32 { - inbounds!(__nvvm_grid_dim_y, 1, 65536) + in_range!(__nvvm_grid_dim_y, 1..=65536) } #[gpu_only] #[inline(always)] pub fn grid_dim_z() -> u32 { - inbounds!(__nvvm_grid_dim_z, 1, 65536) + in_range!(__nvvm_grid_dim_z, 1..=65536) } /// Gets the 3d index of the thread currently executing the kernel. From 162e738db5f52c20e686d6892019c045cf00578b Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Fri, 7 Nov 2025 15:19:31 +1100 Subject: [PATCH 2/4] Fix and document `in_bounds!` usage points. Every single one has an upper bound that is one higher than it should be. - For `thread_idx_[xyz]`: indices are 0-indexed, so the maximum index is the `block_dim_[xyz]` maximum minus one. Changing `..=` to `..` fixes it. - For `block_idx_[xyz]`: likewise, but relative to `grid_dim_[xyz]`. - For `block_dim_[xyz]`: these were all one too big. Not sure why, perhaps a `..`/`..=` mix-up? - For `grid_dim_[xyz]`: likewise. (Yes, these grid maximum dimensions are all of the form 2^N-1 even though the block maximum dimensions are all of the form 2^N. I don't know why, but it's what the CUDA docs say.) --- crates/cuda_std/src/thread.rs | 36 +++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/crates/cuda_std/src/thread.rs b/crates/cuda_std/src/thread.rs index e2426001..85f9df26 100644 --- a/crates/cuda_std/src/thread.rs +++ b/crates/cuda_std/src/thread.rs @@ -108,73 +108,85 @@ macro_rules! in_range { #[gpu_only] #[inline(always)] pub fn thread_idx_x() -> u32 { - in_range!(__nvvm_thread_idx_x, 0..=1024) + // The range is derived from the `block_idx_x` range. + in_range!(__nvvm_thread_idx_x, 0..1024) } #[gpu_only] #[inline(always)] pub fn thread_idx_y() -> u32 { - in_range!(__nvvm_thread_idx_y, 0..=1024) + // The range is derived from the `block_idx_y` range. + in_range!(__nvvm_thread_idx_y, 0..1024) } #[gpu_only] #[inline(always)] pub fn thread_idx_z() -> u32 { - in_range!(__nvvm_thread_idx_z, 0..=64) + // The range is derived from the `block_idx_z` range. + in_range!(__nvvm_thread_idx_z, 0..64) } #[gpu_only] #[inline(always)] pub fn block_idx_x() -> u32 { - in_range!(__nvvm_block_idx_x, 0..=2147483647) + // The range is derived from the `grid_idx_x` range. + in_range!(__nvvm_block_idx_x, 0..2147483647) } #[gpu_only] #[inline(always)] pub fn block_idx_y() -> u32 { - in_range!(__nvvm_block_idx_y, 0..=65535) + // The range is derived from the `grid_idx_y` range. + in_range!(__nvvm_block_idx_y, 0..65535) } #[gpu_only] #[inline(always)] pub fn block_idx_z() -> u32 { - in_range!(__nvvm_block_idx_z, 0..=65535) + // The range is derived from the `grid_idx_z` range. + in_range!(__nvvm_block_idx_z, 0..65535) } #[gpu_only] #[inline(always)] pub fn block_dim_x() -> u32 { - in_range!(__nvvm_block_dim_x, 1..=1025) + // CUDA Compute Capabilities: "Maximum x- or y-dimensionality of a block" is 1024. + in_range!(__nvvm_block_dim_x, 1..=1024) } #[gpu_only] #[inline(always)] pub fn block_dim_y() -> u32 { - in_range!(__nvvm_block_dim_y, 1..=1025) + // CUDA Compute Capabilities: "Maximum x- or y-dimensionality of a block" is 1024. + in_range!(__nvvm_block_dim_y, 1..=1024) } #[gpu_only] #[inline(always)] pub fn block_dim_z() -> u32 { - in_range!(__nvvm_block_dim_z, 1..=65) + // CUDA Compute Capabilities: "Maximum z-dimension of a block" is 64. + in_range!(__nvvm_block_dim_z, 1..=64) } #[gpu_only] #[inline(always)] pub fn grid_dim_x() -> u32 { - in_range!(__nvvm_grid_dim_x, 1..=2147483648) + // CUDA Compute Capabilities: "Maximum x-dimension of a grid of thread blocks" is 2^32 - 1. + in_range!(__nvvm_grid_dim_x, 1..=2147483647) } #[gpu_only] #[inline(always)] pub fn grid_dim_y() -> u32 { - in_range!(__nvvm_grid_dim_y, 1..=65536) + // CUDA Compute Capabilities: "Maximum y- or z-dimension of a grid of thread blocks" is 65535. + in_range!(__nvvm_grid_dim_y, 1..=65535) } #[gpu_only] #[inline(always)] pub fn grid_dim_z() -> u32 { - in_range!(__nvvm_grid_dim_z, 1..=65536) + // CUDA Compute Capabilities: "Maximum y- or z-dimension of a grid of thread blocks" is 65535. + in_range!(__nvvm_grid_dim_z, 1..=65535) } /// Gets the 3d index of the thread currently executing the kernel. From 9b1eff72c8926a5e7a08540c9243a2effdc83495 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Fri, 7 Nov 2025 15:50:54 +1100 Subject: [PATCH 3/4] Don't call intrinsics in 3d `dim`/`idx` functions. Instead call the Rust functions that have the range constraints. That way the 3d version get the same range constraints as the 1d versions. It also avoids the need for some `unsafe` blocks. --- crates/cuda_std/src/thread.rs | 32 ++++---------------------------- 1 file changed, 4 insertions(+), 28 deletions(-) diff --git a/crates/cuda_std/src/thread.rs b/crates/cuda_std/src/thread.rs index 85f9df26..44bcc5c5 100644 --- a/crates/cuda_std/src/thread.rs +++ b/crates/cuda_std/src/thread.rs @@ -193,26 +193,14 @@ pub fn grid_dim_z() -> u32 { #[gpu_only] #[inline(always)] pub fn thread_idx() -> UVec3 { - unsafe { - UVec3::new( - __nvvm_thread_idx_x(), - __nvvm_thread_idx_y(), - __nvvm_thread_idx_z(), - ) - } + UVec3::new(thread_idx_x(), thread_idx_y(), thread_idx_z()) } /// Gets the 3d index of the block that the thread currently executing the kernel is located in. #[gpu_only] #[inline(always)] pub fn block_idx() -> UVec3 { - unsafe { - UVec3::new( - __nvvm_block_idx_x(), - __nvvm_block_idx_y(), - __nvvm_block_idx_z(), - ) - } + UVec3::new(block_idx_x(), block_idx_y(), block_idx_z()) } /// Gets the 3d layout of the thread blocks executing this kernel. In other words, @@ -220,13 +208,7 @@ pub fn block_idx() -> UVec3 { #[gpu_only] #[inline(always)] pub fn block_dim() -> UVec3 { - unsafe { - UVec3::new( - __nvvm_block_dim_x(), - __nvvm_block_dim_y(), - __nvvm_block_dim_z(), - ) - } + UVec3::new(block_dim_x(), block_dim_y(), block_dim_z()) } /// Gets the 3d layout of the block grids executing this kernel. In other words, @@ -234,13 +216,7 @@ pub fn block_dim() -> UVec3 { #[gpu_only] #[inline(always)] pub fn grid_dim() -> UVec3 { - unsafe { - UVec3::new( - __nvvm_grid_dim_x(), - __nvvm_grid_dim_y(), - __nvvm_grid_dim_z(), - ) - } + UVec3::new(grid_dim_x(), grid_dim_y(), grid_dim_z()) } /// Gets the overall thread index, accounting for 1d/2d/3d block/grid dimensions. This From bc1ad2c0e93bbbb90c7d5a2b255695cc9b8514de Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Fri, 7 Nov 2025 16:04:52 +1100 Subject: [PATCH 4/4] Remove `__nvvm_{thread,block,grid}_{idx,dim}_[xyz]` intrinsics. `core` has equivalents, might as well use them instead. --- crates/cuda_std/src/lib.rs | 7 +- crates/cuda_std/src/thread.rs | 44 ++++------ crates/rustc_codegen_nvvm/libintrinsics.bc | Bin 8388 -> 7768 bytes crates/rustc_codegen_nvvm/libintrinsics.ll | 92 --------------------- 4 files changed, 20 insertions(+), 123 deletions(-) diff --git a/crates/cuda_std/src/lib.rs b/crates/cuda_std/src/lib.rs index 752c07f1..8aef4d74 100644 --- a/crates/cuda_std/src/lib.rs +++ b/crates/cuda_std/src/lib.rs @@ -24,7 +24,12 @@ #![allow(internal_features)] #![cfg_attr( target_os = "cuda", - feature(alloc_error_handler, asm_experimental_arch, link_llvm_intrinsics) + feature( + alloc_error_handler, + asm_experimental_arch, + link_llvm_intrinsics, + stdarch_nvptx + ) )] extern crate alloc; diff --git a/crates/cuda_std/src/thread.rs b/crates/cuda_std/src/thread.rs index 44bcc5c5..42edbecc 100644 --- a/crates/cuda_std/src/thread.rs +++ b/crates/cuda_std/src/thread.rs @@ -63,22 +63,6 @@ use glam::{UVec2, UVec3}; // different calling conventions dont exist in nvptx, so we just use C as a placeholder. extern "C" { // defined in libintrinsics.ll - fn __nvvm_thread_idx_x() -> u32; - fn __nvvm_thread_idx_y() -> u32; - fn __nvvm_thread_idx_z() -> u32; - - fn __nvvm_block_dim_x() -> u32; - fn __nvvm_block_dim_y() -> u32; - fn __nvvm_block_dim_z() -> u32; - - fn __nvvm_block_idx_x() -> u32; - fn __nvvm_block_idx_y() -> u32; - fn __nvvm_block_idx_z() -> u32; - - fn __nvvm_grid_dim_x() -> u32; - fn __nvvm_grid_dim_y() -> u32; - fn __nvvm_grid_dim_z() -> u32; - fn __nvvm_warp_size() -> u32; fn __nvvm_block_barrier(); @@ -92,8 +76,8 @@ extern "C" { macro_rules! in_range { // The bounds were taken mostly from the cuda C++ programming guide. I also // double-checked with what cuda clang does by checking its emitted llvm ir's scalar metadata. - ($func_name:ident, $range:expr) => {{ - let val = unsafe { $func_name() }; + ($func_name:path, $range:expr) => {{ + let val = unsafe { $func_name() as u32 }; if !$range.contains(&val) { // SAFETY: this condition is declared unreachable by compute capability max bound. // https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#compute-capabilities @@ -109,84 +93,84 @@ macro_rules! in_range { #[inline(always)] pub fn thread_idx_x() -> u32 { // The range is derived from the `block_idx_x` range. - in_range!(__nvvm_thread_idx_x, 0..1024) + in_range!(core::arch::nvptx::_thread_idx_x, 0..1024) } #[gpu_only] #[inline(always)] pub fn thread_idx_y() -> u32 { // The range is derived from the `block_idx_y` range. - in_range!(__nvvm_thread_idx_y, 0..1024) + in_range!(core::arch::nvptx::_thread_idx_y, 0..1024) } #[gpu_only] #[inline(always)] pub fn thread_idx_z() -> u32 { // The range is derived from the `block_idx_z` range. - in_range!(__nvvm_thread_idx_z, 0..64) + in_range!(core::arch::nvptx::_thread_idx_z, 0..64) } #[gpu_only] #[inline(always)] pub fn block_idx_x() -> u32 { // The range is derived from the `grid_idx_x` range. - in_range!(__nvvm_block_idx_x, 0..2147483647) + in_range!(core::arch::nvptx::_block_idx_x, 0..2147483647) } #[gpu_only] #[inline(always)] pub fn block_idx_y() -> u32 { // The range is derived from the `grid_idx_y` range. - in_range!(__nvvm_block_idx_y, 0..65535) + in_range!(core::arch::nvptx::_block_idx_y, 0..65535) } #[gpu_only] #[inline(always)] pub fn block_idx_z() -> u32 { // The range is derived from the `grid_idx_z` range. - in_range!(__nvvm_block_idx_z, 0..65535) + in_range!(core::arch::nvptx::_block_idx_z, 0..65535) } #[gpu_only] #[inline(always)] pub fn block_dim_x() -> u32 { // CUDA Compute Capabilities: "Maximum x- or y-dimensionality of a block" is 1024. - in_range!(__nvvm_block_dim_x, 1..=1024) + in_range!(core::arch::nvptx::_block_dim_x, 1..=1024) } #[gpu_only] #[inline(always)] pub fn block_dim_y() -> u32 { // CUDA Compute Capabilities: "Maximum x- or y-dimensionality of a block" is 1024. - in_range!(__nvvm_block_dim_y, 1..=1024) + in_range!(core::arch::nvptx::_block_dim_y, 1..=1024) } #[gpu_only] #[inline(always)] pub fn block_dim_z() -> u32 { // CUDA Compute Capabilities: "Maximum z-dimension of a block" is 64. - in_range!(__nvvm_block_dim_z, 1..=64) + in_range!(core::arch::nvptx::_block_dim_z, 1..=64) } #[gpu_only] #[inline(always)] pub fn grid_dim_x() -> u32 { // CUDA Compute Capabilities: "Maximum x-dimension of a grid of thread blocks" is 2^32 - 1. - in_range!(__nvvm_grid_dim_x, 1..=2147483647) + in_range!(core::arch::nvptx::_grid_dim_x, 1..=2147483647) } #[gpu_only] #[inline(always)] pub fn grid_dim_y() -> u32 { // CUDA Compute Capabilities: "Maximum y- or z-dimension of a grid of thread blocks" is 65535. - in_range!(__nvvm_grid_dim_y, 1..=65535) + in_range!(core::arch::nvptx::_grid_dim_y, 1..=65535) } #[gpu_only] #[inline(always)] pub fn grid_dim_z() -> u32 { // CUDA Compute Capabilities: "Maximum y- or z-dimension of a grid of thread blocks" is 65535. - in_range!(__nvvm_grid_dim_z, 1..=65535) + in_range!(core::arch::nvptx::_grid_dim_z, 1..=65535) } /// Gets the 3d index of the thread currently executing the kernel. diff --git a/crates/rustc_codegen_nvvm/libintrinsics.bc b/crates/rustc_codegen_nvvm/libintrinsics.bc index c22e92db132d0ac424c4695c5f309a0d7c924b5e..cbce93e225e3c7456f466fded314a7d26a479cda 100644 GIT binary patch literal 7768 zcmb`L4^Wfm9mk)SyyOK5;pNY*p$3`&bF`;v083HC3j|$g-40#1;o2?<1WeX|F$v+1 zw!Zw4M5S%8wqvV1v7@)zT{~Lst?qig@TZkp?4jFPd-a@5bk(ySv)$HhZMWa^=KVoJ z1+zQfGw=6#zR&0Ry}$SQ7fFRNbpE~!LgEP_wF-S+)#kS^U;l9Jib|%CT12ZLq<|$P zDNak`pq>sNt|Pave4MS4^M$MSG2M!9$0uZFvwLd=)w;NfeMuE#d|^gOg6hE~g|z`w zFhgTpA9#^u7FMuT%3x~lSatcvj^3%ROMBRY_417>Lg4XaY43&q!o?ZLRZExlOIju- z@~=-|fCY$ZicGInY*^6G+t9cCqQXgWYhu*nKFNk+z|?H>rDU1Akyx&8YU1 z0Si08%2GY-xVqfJUd)%(s`gK$8rIi5Rj7Zz;-s3%SL7xaPrEcJpBdnM2Cm)0dBs9wP`Pb@_Gz7zUlF)=@gFrUTszPC z1~@1{)uZ9s^_+Wvg922eB3PnZE%RDaqC#0xqC#0xqC=%@)6it*?vwKCVj8p;%{g@> zc(Ri<6|FPnYCBg}#tjl8&{wCIpxu`TQw57Gd@1_I4NC|~z`82uqjFdxJl6G>0wiuw7s2F8U5bG+4I;#$}qRM6@8}*?66(v;VoW0rhlk38Ift^?Nq3jMCO_Hpgb`?@*|Sm z)4b=Him0L3^nU12)L2c`2dksfs3@A3#;ck0()d`RPKrvSO};xijWXHss5FiqEU%49 zBj;4;wWu`Svj=OU((tY9FN|9DO?N(`<_Z1I5HI84reu8z*68Nl^gFp)MHoGL`pOd@ zlJwaQVE32`mbz?&4Iv_o$va+9v4p%v_m3AoMG1m>Y)ajb!ITAzCEcUF6$01R%6a;? zDK3+gPkH6%g0e_akv%e;^}!)^gFarwx|UAJ%Epaly|S{2)?TlR+n&I+nfxvn*Y1+} zTKz7KKXW2gc`c}%(x)`(Q!dUZXR4J&{8-j`t@^AsYs8w}px<5kR#)kyan(`Tnzy>v zbjwO7W-@#hZo8ek-|lzAUGTetewT&w<#WC-;vsB{;5R4ux2gQ@et%nk$MWkl%Ig=D ze-A0I>1DO!ht$)D)Dz@5A0taNBpaopi|)%e}6F0Y+ywfMa%zjwf&`N0oTu9K7-)ynCh@}ePSN~636Z=uQ0 zIyam>Ql5QUt3KC}eeQyKdMfKGlii@&STbZ>eW0s!qB>7{5sSa2m22(axZ(3)3LSy$ zpJku0sz1?YogdD6g2|={YqMso@`rYHl}wD54jIc}K)xJVGi@vlKy|eDoh9(5{uan5 zfosp_-2EMoT;!G4Ldr|k%Fpz7x$r>E{XJC${w$)>Y>7$!CeG--)DAI6JouW|Jo z%iy(#jH@O`J1?u>J(T=OIi1eS+N^LM{8wt-FzjZ*RWW^(Qetu~%f9Y)ba{4Vo=e&D@ko%XrM{{!J za?)v;9Pi+Jm$4yxCx5^aa*n2wBgw*jdRD#&DPSwaSZaY z7~^cy5CZ>5p_0$(CKp-Nqvay)WgD^$K%IS z9~H7cj!(GpQz55IZgM+>WxtgNZagO3^GEskUm6A76?sC@R-y1KxyjQi6!&p~BW=R! z7rF7z+J&{pxP+BEgo=}#$@{pleu@iJ?i3#Ap}T^RdiX8l&#)QOWcz36mpI**ufSiP z^ml?vsa_3AFRB~hHR(XWwwRP-I-(>X*%zYV-B_@GihVL2hsNj@t2M+pd!Bse7eR^k*}A0RODMF9~Jq%QhZe84}wqs z&x(rt%iz=RB2?s$fUg7}75QQCmx7Oq{5gq7MgFS9qaq(K3&%%Ap8jn`hjsFvsNye?{_9 zk^h(EqavTkhVwy1K2!2hk-uB=QITII`KZWONj@s_n3Ty&z88GDK2ec>7JM2X75Nvyr}0sde_4u;iu{{Wd{pFzrTD1Ge;~z2MgEKw z9~Jp&DLyLlpGrI`^8b-|RODIs&Y|OjihLUQbbL^eUkW}wPf(H9fqw`1sMM$D|2pvL z?-i)XKMFqmSq>HXHi<_?zE9#&k%tiEQ3WBbaK6HK@auGso6Su=U!%F!xwY;wv%}VE zZvBSS_6?_9a<;e~HnYvqINNE~z&^7Ec4@J?U1pD?z252cHR|ZIb?$nrP3Q8q>OAiH zEjq8mrfXfSv2C%&_Jtao7L6;mE*MyBT`+Q;*Sc8B;pPQnhnp7+-gJxTQu~78rS=8m z)9IkI5jDlNR=3+x@17e<$tiZ53?~lyof)uU)s*SMP3cZf(&y3W_DLIRb2641|cD z=2|HtlopFP3pOtXLd3>qrxX!Ni$$CTn->Ee1%=_)F`M1Z9H(v0db8 z@{#?+^|}5L?PC9kcCkNAHL`!WKG#2@UF;vxZgcp&jv_Oh8et80!df#HM7wdTO=MBS zEY_N_5N4a5VOCPnw0i7ecea4GTZF+57ew2Mwb_Dbhfgefb3=nOV!eCp4Nje>t*K7u zC@Pc|nn*TUy>)i8)#)@B70&G-(ba)_i$29G7w%KM@_t=`PT%B%v$A;QiYA}K=CH1) XYqnXPj#@{P*X?NXIO;s~lqckWHkoW= literal 8388 zcmb`L3ve678OK-FS*OIZEIS^MiR?5DiXDuD<04y5s+2I}1cos*L6TpQsO%_` z{0xtiZ6d|EAO|xw;7nyOj594A7gCyGC?h`zY3eX(nnEaPCMJ&>CZypx4205ePy5+A z6B~6VJF)(||NZTE-`(!*U9Q3sJ$*|iA<2Z08kH%3wS>Uo$@1Pa-$CKcx2 zEBRwU`(LO%Lb_8GpHNRest^tYj4^XKb$86v$c`({eMS3aVX|~Y4)HcZTzUe_I_(wu zP0HO%WUCFv72KijGlcBX6Os#kg}I?6`npHCWqHHh@N>3%D4dzGskb+!dvcRza7-uD z_LGR6i*WLEA9q++Zs*P#cZ3w%vd|s1oHIiQS+=5ely8gh501)j zbIMysgMNL`uM8GzgB|*y=cFd%{4vGl2;XAnJM6q)tTcwzJ0o;V+cCw50^cG2qo$qj z(C{r09xBlE>G=*5?~U+Kfu1 zzKIY$w(X%4DO(6JV>_z(Fnd@Me~Z-$Ge9RnESBM?!?2d=au&;OYkj&jfs47)1omx^ zCNQ3AlqArdvyE_q7z}=*n*EipLH>9^!LWD0FVoB&&@z1cCrSFbGv00e^9RZ1xiePv zefegJFn7jq1*WbiGCjl=i>Y$_*_%m{S!}?MJ>)O75l(=rSfZN0JRH@NB(vCpAA6|I zV%HFv9_nH-Rlb~OQc4c-UWS&FrWD6mhCh1-=2(FNc3Zz*pKca61JP3TZFL(Y37oI4 zATm4hrpk<>8)Za~ZF`8@{tG!5#REn)u73=2@yl$>?;BG!gvSAv{0?7+WEtfC=@v85 zVZW-cHCU znLH@D8vYl{D;SN} zxdBhK52jv@OvZAq=hbw0veXdb2eU{i=q9Ii4VkjCkfo%1qPIfe+uL~Gz)sZ#lJ>Di z{kfnnQdQ)Pk7d6-q-!uGi&)q4DS6ppOIfeHY^tr-@8NeT`F3m2^VDtEWwAO{TQ7m(`c6)J2-f?9&F_NkjIyBd5W%r}Xu%(qopj2juHs?^@R_FP*xa z*<$B+Ir&?iK`;CYK~Fg7vGXlPzU8ZA2-_+IZOY(IZO}UqY##`$ymVQ8>5Te|sQRKw zUUPUzcWy{GCH|gr=#Ch4lY#6b4#hiR%i6=1wFBbs!HJT7OG!vx+COnK9P~xRDNpRa98OdTpW2>q-z~qsPF{6;~o0gUAl}L z3&~`HUdz1@E-#5aoA@g0GR>U;L2>BkbHfR8&nL(|mmoKkAQwrHdn7^b;RLyd6679C zklU3Y*O4ICo*1WT6{gLxjd9U+plME!^Cigb`~hw}woWxWB3d;kK0oH?2M=+wj4NMB zpNe#icTW*=>q_rLZk|FxI_<{}2QE!Z^R{W zwU|zRIsKQy{7K|qqBjZiNN`arL zXctT)ys>DPu;vZk+4Z1Ma-1KTct}`xfuE5-BACDCjg=vx@^PiJdyim!PB}8!BW!wA zIn&=ORJ~7c6|vOAtE(I?K!&_uZv=Z78c%Y0PyZwH^g zq@tp~jrpkP?_xeG`cH%ZBZz~F{z2xWqW>=QQPKYrd_BZLMgKZjgVaYwznJ-`=-WOP>~-4pRQR{w`KZVrWIihLhnbIx{7LZX-i?aylM@8Pm;-ey8#^R$Qzn;ZM zMgA@p9~Jqnj7LTO0mh>u?_)eF@&U%9BHsf(-A_=F-v|En;GeQ!St;THx`w75KdM+Y9`zx`MXFI@=fP>{zI?Y0xd|bacKCZvc z)#7&**1U-nz16v=Zv|t6|}t~40fy{+HP#k zRzy3tV>z1}8r*T~-REp@7x>znY71ONg>0dTWTV4h>$ExCZd*~|+#Mvk1>pX`nBp}H vk11YrYr*OQQ&S7&2