|
1 | | -diff --git tensorflow/stream_executor/cuda/cuda_gpu_executor.cc tensorflow/stream_executor/cuda/cuda_gpu_executor.cc |
| 1 | +diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD |
| 2 | +index 2e04827..9d81923 100644 |
| 3 | +--- a/tensorflow/core/kernels/BUILD |
| 4 | ++++ b/tensorflow/core/kernels/BUILD |
| 5 | +@@ -1184,7 +1184,7 @@ tf_kernel_libraries( |
| 6 | + "segment_reduction_ops", |
| 7 | + "scan_ops", |
| 8 | + "sequence_ops", |
| 9 | +- "sparse_matmul_op", |
| 10 | ++ #DC "sparse_matmul_op", |
| 11 | + ], |
| 12 | + deps = [ |
| 13 | + ":bounds_check", |
| 14 | +diff --git a/tensorflow/core/kernels/cwise_op_gpu_select.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_select.cu.cc |
| 15 | +index 02058a8..880a0c3 100644 |
| 16 | +--- a/tensorflow/core/kernels/cwise_op_gpu_select.cu.cc |
| 17 | ++++ b/tensorflow/core/kernels/cwise_op_gpu_select.cu.cc |
| 18 | +@@ -43,8 +43,14 @@ struct BatchSelectFunctor<GPUDevice, T> { |
| 19 | + const int all_but_batch = then_flat_outer_dims.dimension(1); |
| 20 | + |
| 21 | + #if !defined(EIGEN_HAS_INDEX_LIST) |
| 22 | +- Eigen::array<int, 2> broadcast_dims{{ 1, all_but_batch }}; |
| 23 | +- Eigen::Tensor<int, 2>::Dimensions reshape_dims{{ batch, 1 }}; |
| 24 | ++ // Eigen::array<int, 2> broadcast_dims{{ 1, all_but_batch }}; |
| 25 | ++ Eigen::array<int, 2> broadcast_dims; |
| 26 | ++ broadcast_dims[0] = 1; |
| 27 | ++ broadcast_dims[1] = all_but_batch; |
| 28 | ++ // Eigen::Tensor<int, 2>::Dimensions reshape_dims{{ batch, 1 }}; |
| 29 | ++ Eigen::Tensor<int, 2>::Dimensions reshape_dims; |
| 30 | ++ reshape_dims[0] = batch; |
| 31 | ++ reshape_dims[1] = 1; |
| 32 | + #else |
| 33 | + Eigen::IndexList<Eigen::type2index<1>, int> broadcast_dims; |
| 34 | + broadcast_dims.set(1, all_but_batch); |
| 35 | +diff --git a/tensorflow/core/kernels/sparse_tensor_dense_matmul_op_gpu.cu.cc b/tensorflow/core/kernels/sparse_tensor_dense_matmul_op_gpu.cu.cc |
| 36 | +index a177696..28d2f59 100644 |
| 37 | +--- a/tensorflow/core/kernels/sparse_tensor_dense_matmul_op_gpu.cu.cc |
| 38 | ++++ b/tensorflow/core/kernels/sparse_tensor_dense_matmul_op_gpu.cu.cc |
| 39 | +@@ -104,9 +104,17 @@ struct SparseTensorDenseMatMulFunctor<GPUDevice, T, ADJ_A, ADJ_B> { |
| 40 | + int n = (ADJ_B) ? b.dimension(0) : b.dimension(1); |
| 41 | + |
| 42 | + #if !defined(EIGEN_HAS_INDEX_LIST) |
| 43 | +- Eigen::Tensor<int, 2>::Dimensions matrix_1_by_nnz{{ 1, nnz }}; |
| 44 | +- Eigen::array<int, 2> n_by_1{{ n, 1 }}; |
| 45 | +- Eigen::array<int, 1> reduce_on_rows{{ 0 }}; |
| 46 | ++ // Eigen::Tensor<int, 2>::Dimensions matrix_1_by_nnz{{ 1, nnz }}; |
| 47 | ++ Eigen::Tensor<int, 2>::Dimensions matrix_1_by_nnz; |
| 48 | ++ matrix_1_by_nnz[0] = 1; |
| 49 | ++ matrix_1_by_nnz[1] = nnz; |
| 50 | ++ // Eigen::array<int, 2> n_by_1{{ n, 1 }}; |
| 51 | ++ Eigen::array<int, 2> n_by_1; |
| 52 | ++ n_by_1[0] = n; |
| 53 | ++ n_by_1[1] = 1; |
| 54 | ++ // Eigen::array<int, 1> reduce_on_rows{{ 0 }}; |
| 55 | ++ Eigen::array<int, 1> reduce_on_rows; |
| 56 | ++ reduce_on_rows[0]= 0; |
| 57 | + #else |
| 58 | + Eigen::IndexList<Eigen::type2index<1>, int> matrix_1_by_nnz; |
| 59 | + matrix_1_by_nnz.set(1, nnz); |
| 60 | +diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc |
2 | 61 | index b2da109..8ee1f3a 100644 |
3 | | ---- tensorflow/stream_executor/cuda/cuda_gpu_executor.cc |
4 | | -+++ tensorflow/stream_executor/cuda/cuda_gpu_executor.cc |
| 62 | +--- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc |
| 63 | ++++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc |
5 | 64 | @@ -870,7 +870,10 @@ CudaContext* CUDAExecutor::cuda_context() { return context_; } |
6 | 65 | // For anything more complicated/prod-focused than this, you'll likely want to |
7 | 66 | // turn to gsys' topology modeling. |
|
0 commit comments