@@ -45,7 +45,7 @@ TEST(workspaces, tile_vecElemMul_NoTail) {
4545 expected.compile ();
4646 expected.assemble ();
4747 expected.compute ();
48- ASSERT_TENSOR_EQ (A, expected );
48+ ASSERT_TENSOR_EQ (expected, A );
4949}
5050
5151TEST (workspaces, tile_vecElemMul_Tail1) {
@@ -83,7 +83,7 @@ TEST(workspaces, tile_vecElemMul_Tail1) {
8383 expected.compile ();
8484 expected.assemble ();
8585 expected.compute ();
86- ASSERT_TENSOR_EQ (A, expected );
86+ ASSERT_TENSOR_EQ (expected, A );
8787}
8888
8989TEST (workspaces, tile_vecElemMul_Tail2) {
@@ -121,7 +121,7 @@ TEST(workspaces, tile_vecElemMul_Tail2) {
121121 expected.compile ();
122122 expected.assemble ();
123123 expected.compute ();
124- ASSERT_TENSOR_EQ (A, expected );
124+ ASSERT_TENSOR_EQ (expected, A );
125125
126126// ir::IRPrinter irp = ir::IRPrinter(cout);
127127//
@@ -171,7 +171,7 @@ TEST(workspaces, tile_denseMatMul) {
171171 expected.compile ();
172172 expected.assemble ();
173173 expected.compute ();
174- ASSERT_TENSOR_EQ (A, expected );
174+ ASSERT_TENSOR_EQ (expected, A );
175175
176176// ir::IRPrinter irp = ir::IRPrinter(cout);
177177//
@@ -218,7 +218,7 @@ TEST(workspaces, precompute2D_add) {
218218 expected.compile ();
219219 expected.assemble ();
220220 expected.compute ();
221- ASSERT_TENSOR_EQ (A, expected );
221+ ASSERT_TENSOR_EQ (expected, A );
222222
223223}
224224
@@ -263,7 +263,7 @@ TEST(workspaces, precompute4D_add) {
263263 expected.compile ();
264264 expected.assemble ();
265265 expected.compute ();
266- ASSERT_TENSOR_EQ (A, expected );
266+ ASSERT_TENSOR_EQ (expected, A );
267267}
268268
269269TEST (workspaces, precompute4D_multireduce) {
@@ -305,7 +305,7 @@ TEST(workspaces, precompute4D_multireduce) {
305305 expected.compile ();
306306 expected.assemble ();
307307 expected.compute ();
308- ASSERT_TENSOR_EQ (A, expected );
308+ ASSERT_TENSOR_EQ (expected, A );
309309}
310310
311311TEST (workspaces, precompute3D_TspV) {
@@ -344,7 +344,7 @@ TEST(workspaces, precompute3D_TspV) {
344344 expected.compile ();
345345 expected.assemble ();
346346 expected.compute ();
347- ASSERT_TENSOR_EQ (A, expected );
347+ ASSERT_TENSOR_EQ (expected, A );
348348
349349}
350350
@@ -388,7 +388,7 @@ TEST(workspaces, precompute3D_multipleWS) {
388388 expected.compile ();
389389 expected.assemble ();
390390 expected.compute ();
391- ASSERT_TENSOR_EQ (A, expected );
391+ ASSERT_TENSOR_EQ (expected, A );
392392
393393}
394394
@@ -431,6 +431,123 @@ TEST(workspaces, precompute3D_renamedIVars_TspV) {
431431 expected.compile ();
432432 expected.assemble ();
433433 expected.compute ();
434- ASSERT_TENSOR_EQ (A, expected );
434+ ASSERT_TENSOR_EQ (expected, A );
435435
436436}
437+
438+ TEST (workspaces, DISABLED_tile_dotProduct_1) {
439+ // FIXME: Disabled because currently the precompute algorithm does not appropriately
440+ // optimize = from += when rewriting a statement for BOTH the producer and consumer
441+ // side of a where statement insertion.
442+ // Although always using += is CORRECT functionally, this fails the GPU tests since it
443+ // would result in scattering.
444+ int N = 1024 ;
445+ Tensor<double > A (" A" );
446+ Tensor<double > B (" B" , {N}, {Dense});
447+ Tensor<double > C (" C" , {N}, {Dense});
448+
449+ for (int i = 0 ; i < N; i++) {
450+ B.insert ({i}, (double ) i);
451+ C.insert ({i}, (double ) i);
452+ }
453+
454+ B.pack ();
455+ C.pack ();
456+
457+ IndexVar i (" i" );
458+ IndexVar i_bounded (" i_bounded" );
459+ IndexVar i0 (" i0" ), i1 (" i1" );
460+ IndexExpr BExpr = B (i);
461+ IndexExpr CExpr = C (i);
462+ IndexExpr precomputedExpr = (BExpr) * (CExpr);
463+ A () = precomputedExpr;
464+
465+ IndexStmt stmt = A.getAssignment ().concretize ();
466+ TensorVar B_new (" B_new" , Type (Float64, {(size_t )N}), taco::dense);
467+ TensorVar C_new (" C_new" , Type (Float64, {(size_t )N}), taco::dense);
468+ TensorVar precomputed (" precomputed" , Type (Float64, {(size_t )N}), taco::dense);
469+
470+ stmt = stmt.bound (i, i_bounded, (size_t )N, BoundType::MaxExact)
471+ .split (i_bounded, i0, i1, 32 );
472+ stmt = stmt.precompute (precomputedExpr, i1, i1, precomputed);
473+
474+ cout << stmt << endl;
475+ cout << endl;
476+
477+ stmt = stmt.precompute (BExpr, i1, i1, B_new)
478+ .precompute (CExpr, i1, i1, C_new);
479+
480+
481+ stmt = stmt.concretize ();
482+ cout << stmt << endl;
483+
484+ A.compile (stmt);
485+ A.assemble ();
486+ A.compute ();
487+
488+ Tensor<double > expected (" expected" );
489+ expected () = B (i) * C (i);
490+ expected.compile ();
491+ expected.assemble ();
492+ expected.compute ();
493+ ASSERT_TENSOR_EQ (expected, A);
494+ }
495+
496+ TEST (workspaces, DISABLED_tile_dotProduct_2) {
497+ // FIXME: This is also currently disabled since split(...) scheduling commands
498+ // only split on the FIRST INSTANCE of an indexVar (assumes only one).
499+ // This is wrong if the indexVar is not renamed across iw_vars since an indexVar can
500+ // then occur on BOTH the consumer and producer side and should be split across both.
501+
502+ int N = 1024 ;
503+ Tensor<double > A (" A" );
504+ Tensor<double > B (" B" , {N}, {Dense});
505+ Tensor<double > C (" C" , {N}, {Dense});
506+
507+ for (int i = 0 ; i < N; i++) {
508+ B.insert ({i}, (double ) i);
509+ C.insert ({i}, (double ) i);
510+ }
511+
512+ B.pack ();
513+ C.pack ();
514+
515+ IndexVar i (" i" );
516+ IndexVar i_bounded (" i_bounded" );
517+ IndexVar i0 (" i0" ), i1 (" i1" );
518+ IndexExpr BExpr = B (i);
519+ IndexExpr CExpr = C (i);
520+ IndexExpr precomputedExpr = (BExpr) * (CExpr);
521+ A () = precomputedExpr;
522+
523+ IndexStmt stmt = A.getAssignment ().concretize ();
524+ TensorVar B_new (" B_new" , Type (Float64, {(size_t )N}), taco::dense);
525+ TensorVar C_new (" C_new" , Type (Float64, {(size_t )N}), taco::dense);
526+ TensorVar precomputed (" precomputed" , Type (Float64, {(size_t )N}), taco::dense);
527+
528+ stmt = stmt.precompute (precomputedExpr, i, i, precomputed);
529+
530+ cout << stmt << endl;
531+ cout << endl;
532+
533+ stmt = stmt.precompute (BExpr, i, i, B_new)
534+ .precompute (CExpr, i, i, C_new);
535+
536+ stmt = stmt.bound (i, i_bounded, (size_t )N, BoundType::MaxExact)
537+ .split (i_bounded, i0, i1, 32 );
538+
539+ stmt = stmt.concretize ();
540+ cout << stmt << endl;
541+
542+ A.compile (stmt);
543+ A.assemble ();
544+ A.compute ();
545+
546+ Tensor<double > expected (" expected" );
547+ expected () = B (i) * C (i);
548+ expected.compile ();
549+ expected.assemble ();
550+ expected.compute ();
551+ ASSERT_TENSOR_EQ (expected, A);
552+ }
553+
0 commit comments