@@ -12,77 +12,77 @@ namespace cp_algo::math {
1212
1313 struct interval {
1414 int lo, hi;
15+ auto operator <=>(const interval&) const = default ;
1516 };
1617
17- void exec_on_blocks (int64_t n, auto &&callback) {
18+ // callback(k, prefix) such that:
19+ // (F * G)[k] = prefix + (F[k] - F[k-1]) * G[1] + (G[k] - G[k-1]) * F[1]
20+ // Uses H as a buffer for (F * G)[k], then overrides with callback results
21+ void exec_on_blocks (int64_t n, auto &H, auto const & F, auto const & G, auto &&callback) {
1822 auto [rt_n, num_floors] = floor_stats (n);
1923
2024 auto to_ord = [&](int64_t k) {
2125 return k <= rt_n ? int (k) : num_floors - int (n / k) + 1 ;
2226 };
2327
24- callback ({1 , 1 }, {1 , 1 }, {1 , num_floors});
28+ auto call = [&](interval x, interval y, interval z) {
29+ auto sum_x = F[x.hi ] - F[x.lo - 1 ];
30+ auto sum_y = G[y.hi ] - G[y.lo - 1 ];
31+ auto t = sum_x * sum_y;
32+ H[z.lo ] += t;
33+ if (z.hi < num_floors) {
34+ H[z.hi + 1 ] -= t;
35+ }
36+ };
2537
38+ auto prefix = F[1 ] * G[1 ];
2639 for (int k = 2 ; k <= num_floors; ++k) {
2740 if (k > rt_n) {
2841 int z = num_floors - k + 1 ;
2942 for (int x = 2 ; ; x++) {
3043 int y_lo_ord = std::max (x, z) + 1 ;
3144 int y_hi_ord = to_ord (n / (x * z));
3245 if (y_hi_ord < y_lo_ord) break ;
33- callback ({x, x}, {y_lo_ord, y_hi_ord}, {k, k});
34- callback ({y_lo_ord, y_hi_ord}, {x, x}, {k, k});
46+ call ({x, x}, {y_lo_ord, y_hi_ord}, {k, k});
47+ call ({y_lo_ord, y_hi_ord}, {x, x}, {k, k});
3548 }
3649 }
3750
38- callback ({ 1 , 1 }, { k, k}, {k, num_floors} );
39- callback ({k, k}, { 1 , 1 }, {k, num_floors}) ;
51+ H[k] = callback ( k, prefix += H[k] );
52+ prefix += (F[k] - F[k- 1 ]) * G[ 1 ] + (G[k] - G[k- 1 ]) * F[ 1 ] ;
4053
4154 if (k <= rt_n) {
4255 int x = k;
4356 for (int y = 2 ; y < k; ++y) {
4457 int z_lo_ord = to_ord (1LL * x * y);
4558 int z_hi_ord = to_ord (n / x);
4659 if (z_hi_ord < z_lo_ord) break ;
47- callback ({x, x}, {y, y}, {z_lo_ord, z_hi_ord});
48- callback ({y, y}, {x, x}, {z_lo_ord, z_hi_ord});
60+ call ({x, x}, {y, y}, {z_lo_ord, z_hi_ord});
61+ call ({y, y}, {x, x}, {z_lo_ord, z_hi_ord});
4962 }
5063 int z_lo_ord = to_ord (1LL * x * x);
51- callback ({x, x}, {x, x}, {z_lo_ord, num_floors});
64+ call ({x, x}, {x, x}, {z_lo_ord, num_floors});
5265 }
5366 }
5467 }
5568
5669 auto Dirichlet_mul (auto &&F, auto &&G, int64_t n) {
5770 auto m = size (F);
58- std::decay_t <decltype (F)> H (m+1 );
59- exec_on_blocks (n, [&](interval x, interval y, interval z) {
60- auto sum_x = F[x.hi ] - F[x.lo - 1 ];
61- auto sum_y = G[y.hi ] - G[y.lo - 1 ];
62- auto t = sum_x * sum_y;
63- H[z.lo ] += t;
64- H[z.hi + 1 ] -= t;
71+ std::decay_t <decltype (F)> H (m);
72+ H[1 ] = F[1 ] * G[1 ];
73+ exec_on_blocks (n, H, F, G, [&](auto k, auto prefix) {
74+ return prefix + (F[k] - F[k - 1 ]) * G[1 ] + (G[k] - G[k - 1 ]) * F[1 ];
6575 });
66- std::partial_sum (begin (H), end (H), begin (H));
67- H.pop_back ();
6876 return H;
6977 }
7078
7179 auto Dirichlet_div (auto &&H, auto &&G, int64_t n) {
72- H.push_back (0 );
73- std::adjacent_difference (begin (H), end (H), begin (H));
7480 auto m = size (G);
7581 std::decay_t <decltype (G)> F (m);
7682 auto Gi = G[1 ].inv ();
77- exec_on_blocks (n, [&](interval x, interval y, interval z) {
78- if (y.lo == 1 ) [[unlikely]] {
79- F[x.hi ] = F[x.lo - 1 ] + H[z.lo ] * Gi;
80- }
81- auto sum_x = F[x.hi ] - F[x.lo - 1 ];
82- auto sum_y = G[y.hi ] - G[y.lo - 1 ];
83- auto t = sum_y * sum_x;
84- H[z.lo ] -= t;
85- H[z.hi + 1 ] += t;
83+ F[1 ] = Gi * H[1 ];
84+ exec_on_blocks (n, F, F, G, [&](auto k, auto prefix) {
85+ return F[k-1 ] + (Gi * (H[k] - prefix - (G[k] - G[k-1 ]) * F[1 ]));
8686 });
8787 return F;
8888 }
0 commit comments