Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions paddle/phi/kernels/cpu/add_position_encoding_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,9 @@ void AddPositionEncodingKernel(const Context& dev_ctx,

const int half_size = enc_size / 2;
for (int i = 0; i < batch_size; ++i) {
const int max_length =
x_lod.empty() ? max_seq_len : x_lod[0][i + 1] - x_lod[0][i];
const auto max_length(x_lod.empty() ? max_seq_len
: x_lod[0][i + 1] - x_lod[0][i]);

for (int j = 0; j < max_length; ++j) {
for (int k = 0; k < half_size; ++k) {
const double val =
Expand Down
2 changes: 1 addition & 1 deletion paddle/phi/kernels/cpu/batch_norm_grad_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ void BatchNormGradFunctor(const Context& dev_ctx,
bias_arr.setZero();
}

int scale_coeff = use_global_stats ? 1 : N * sample_size;
auto scale_coeff = use_global_stats ? 1 : N * sample_size;
const auto scale_inv_var_nhw = scale_arr * inv_var_arr / scale_coeff;

DenseTensor dy_sum;
Expand Down
4 changes: 2 additions & 2 deletions paddle/phi/kernels/cpu/box_coder_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ void DecodeCenterSize(const DenseTensor *target_box,
std::array<T, 4> var_data{1., 1., 1., 1.};
T *var_ptr = var_data.data();
size_t offset = i * col * len + j * len;
int prior_box_offset = axis == 0 ? j * len : i * len;
auto prior_box_offset = axis == 0 ? j * len : i * len;

T prior_box_width = prior_box_data[prior_box_offset + 2] -
prior_box_data[prior_box_offset] +
Expand All @@ -135,7 +135,7 @@ void DecodeCenterSize(const DenseTensor *target_box,

T target_box_center_x = 0, target_box_center_y = 0;
T target_box_width = 0, target_box_height = 0;
int prior_var_offset = axis == 0 ? j * len : i * len;
auto prior_var_offset = axis == 0 ? j * len : i * len;
if (var_size == 2) {
std::memcpy(var_ptr,
prior_box_var->data<T>() + prior_var_offset,
Expand Down
4 changes: 2 additions & 2 deletions paddle/phi/kernels/cpu/broadcast_tensors_grad_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,8 @@ void BroadcastTensorsGradKernel(const Context& dev_ctx,
std::vector<int> reduce_dims_vec;
std::vector<int> reshape_dims_vec;
for (int j = 0; j < in_rank; j++) {
int out_axis = out_rank - j - 1;
int in_axis = in_rank - j - 1;
auto out_axis = out_rank - j - 1;
auto in_axis = in_rank - j - 1;

reshape_dims_vec.push_back(static_cast<int>(input_dims[j]));
if (out_axis < 0 || output_dims[out_axis] != input_dims[in_axis]) {
Expand Down
5 changes: 3 additions & 2 deletions paddle/phi/kernels/cpu/conv_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,9 @@ inline int ConvOutSize(int input_size,
int pad_left,
int pad_right,
int stride) {
const int dkernel = dilation * (filter_size - 1) + 1;
int output_size =
const auto dkernel(dilation * (filter_size - 1) + 1);

auto output_size =
(input_size + (pad_left + pad_right) - dkernel) / stride + 1;

PADDLE_ENFORCE_GT(
Expand Down
8 changes: 4 additions & 4 deletions paddle/phi/kernels/cpu/cross_entropy_grad_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,8 @@ void CrossEntropyWithSoftmaxGradCPUKernel(const CPUContext& dev_ctx,
const int remain = d / axis_dim;
for (int i = 0; i < n; ++i) { // for each sample_1_dim
for (int j = 0; j < remain; j++) { // for each sample_other_dims
int idx = i * remain + j; // this sample's label_idx. for 1d case,
// remain=1 and j=0, so, idx = i
auto idx = i * remain + j; // this sample's label_idx. for 1d case,
// remain=1 and j=0, so, idx = i
auto lbl = static_cast<int64_t>(label_data[idx]); // NOLINT
if (lbl == ignore_index) {
for (int k = 0; k < axis_dim; ++k) { // for each class id's label
Expand Down Expand Up @@ -147,8 +147,8 @@ void CrossEntropyWithSoftmaxGradCPUKernel(const CPUContext& dev_ctx,
const int remain = d / axis_dim;
for (int i = 0; i < n; ++i) { // for each sample_1_dim
for (int j = 0; j < remain; j++) { // for each sample_other_dims
int idx = i * remain + j; // this sample's label_idx. for 1d case,
// remain=1 and j=0, so, idx = i
auto idx = i * remain + j; // this sample's label_idx. for 1d case,
// remain=1 and j=0, so, idx = i
auto lbl = static_cast<int64_t>(label_data[idx]); // NOLINT
if (lbl == ignore_index) {
for (int k = 0; k < axis_dim; ++k) { // for each class id's label
Expand Down
2 changes: 1 addition & 1 deletion paddle/phi/kernels/cpu/distribute_fpn_proposals_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ void DistributeFpnProposalsKernel(
std::vector<DenseTensor*> multi_fpn_rois,
std::vector<DenseTensor*> multi_level_rois_num,
DenseTensor* restore_index) {
const int num_level = max_level - min_level + 1;
const auto num_level(max_level - min_level + 1);

// check that the fpn_rois is not empty
if (!rois_num.get_ptr()) {
Expand Down
2 changes: 1 addition & 1 deletion paddle/phi/kernels/cpu/lookup_table_dequant_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ void LookupTableDequantKernel(const Context &dev_ctx,
ids[i]));
float min = *(table + ids[i] * quant_number);
float max = *(table + ids[i] * quant_number + 1);
int offset = ids[i] * quant_number + 2;
auto offset = ids[i] * quant_number + 2;
const unsigned char *tensor_buf =
reinterpret_cast<const unsigned char *>(table + offset);
dequant(
Expand Down
2 changes: 1 addition & 1 deletion paddle/phi/kernels/cpu/lrn_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ struct LRNFunctor<phi::CPUContext, T> {
}
for (int c = 1; c < C; ++c) {
// copy previous scale
int mid_offset = i * fea_size + c * img_size;
auto mid_offset = i * fea_size + c * img_size;
std::memcpy(mdata + mid_offset,
mdata + mid_offset - img_size,
img_size * sizeof(T));
Expand Down
2 changes: 1 addition & 1 deletion paddle/phi/kernels/cpu/matrix_rank_tol_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ void LapackSVD(const T* x_data,
int mn = std::min(rows, cols);
T* a = const_cast<T*>(x_data); // NOLINT
int lda = rows;
int lwork = 3 * mn + std::max(mx, 7 * mn);
auto lwork = 3 * mn + std::max(mx, 7 * mn);
std::vector<phi::dtype::Real<T>> rwork(
std::max(5 * mn * mn + 5 * mn, 2 * mx * mn + 2 * mn * mn + mn));
std::vector<T> work(lwork);
Expand Down
8 changes: 4 additions & 4 deletions paddle/phi/kernels/cpu/psroi_pool_grad_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -79,12 +79,12 @@ void PsroiPoolGradKernel(const Context& dev_ctx,
int pw = i % pooled_width;
int ph = (i / pooled_width) % pooled_height;
int c = (i / pooled_width / pooled_height) % output_channels;
int n = i / pooled_width / pooled_height / output_channels;
auto n = i / pooled_width / pooled_height / output_channels;

// set roi_batch_id
int roi_batch_id = rois_batch_id_data[n];
int input_channel = (c * pooled_height + ph) * pooled_width + pw;
int input_offset =
auto input_channel = (c * pooled_height + ph) * pooled_width + pw;
auto input_offset =
(roi_batch_id * input_channels + input_channel) * height * width;
T* offset_dx_data = dx_data + input_offset;

Expand Down Expand Up @@ -124,7 +124,7 @@ void PsroiPoolGradKernel(const Context& dev_ctx,
T diff_val = is_empty ? 0. : dout_data[i] / bin_area;
for (int ih = hstart; ih < hend; ++ih) {
for (int iw = wstart; iw < wend; ++iw) {
int input_index = ih * width + iw;
auto input_index = ih * width + iw;
offset_dx_data[input_index] += diff_val;
}
}
Expand Down
2 changes: 1 addition & 1 deletion paddle/phi/kernels/cpu/psroi_pool_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ void PsroiPoolKernel(const Context& dev_ctx,
wend = std::min(std::max(wend, 0), width);

int output_index = out_row_offset + pw;
int input_channel = (c * pooled_height + ph) * pooled_width + pw;
auto input_channel = (c * pooled_height + ph) * pooled_width + pw;
int input_plane_offset = static_cast<int>(
roi_batch_id * in_stride[0] + input_channel * in_stride[1]);
const T* offset_input_data = input_data + input_plane_offset;
Expand Down
6 changes: 3 additions & 3 deletions paddle/phi/kernels/cpu/rnn_functor.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ void ResetParameterVector(const std::vector<TensorType>& raw_params_vec,
for (int j = 0; j < layer_weight_size; j++) {
int k = j % 4;
const int& section = j / 4;
int tensor_idx = i * 2 * direction_num + section * 2 + k % 2;
auto tensor_idx = i * 2 * direction_num + section * 2 + k % 2;
if (k >= 2) {
tensor_idx += bias_start_idx;
}
Expand Down Expand Up @@ -217,8 +217,8 @@ void AllocateReserveData(const Context& dev_ctx,
int direction_num = is_bidirec ? 2 : 1;
int time_step = input->dims()[0];
int batch_size = input->dims()[1];
int block_size = direction_num * time_step * batch_size * hidden_size;
int hidden_data_idx = (num_layers - 1);
auto block_size = direction_num * time_step * batch_size * hidden_size;
auto hidden_data_idx = (num_layers - 1);
if (is_lstm(mode)) {
hidden_data_idx += (gate_num + 2) * num_layers;
} else if (is_gru(mode)) {
Expand Down
2 changes: 1 addition & 1 deletion paddle/phi/kernels/cpu/rnn_grad_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -384,7 +384,7 @@ struct GradLayer {
const std::string& mode) {
int direction_num = is_bidirec ? 2 : 1;
int current_reverse_idx = is_reverse ? 1 : 0;
int current_layer_idx = direction_num * layer_idx + current_reverse_idx;
auto current_layer_idx = direction_num * layer_idx + current_reverse_idx;
int begin_idx = 0;
if (is_reverse) {
begin_idx = time_step;
Expand Down
2 changes: 1 addition & 1 deletion paddle/phi/kernels/cpu/roi_align_grad_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ void RoiAlignGradKernel(const Context& dev_ctx,
out_grad_data + n * out_stride[0] + c * out_stride[1];
for (int ph = 0; ph < pooled_height; ++ph) {
for (int pw = 0; pw < pooled_width; ++pw) {
int pool_index = ph * pooled_width + pw;
auto pool_index = ph * pooled_width + pw;
T out_grad_this_bin = batch_out_grad_data[pool_index];
int roi_bin_grid_h = (sampling_ratio > 0)
? sampling_ratio
Expand Down
2 changes: 1 addition & 1 deletion paddle/phi/kernels/cpu/roi_pool_grad_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ void RoiPoolGradKernel(const Context& dev_ctx,
for (int c = 0; c < channels; ++c) {
for (int ph = 0; ph < pooled_height; ++ph) {
for (int pw = 0; pw < pooled_width; ++pw) {
int pool_index = ph * pooled_width + pw;
auto pool_index = ph * pooled_width + pw;
if (arg_max_data[pool_index] >= 0) {
auto index = arg_max_data[pool_index];
batch_grad_data[index] += out_grad_data[pool_index];
Expand Down
5 changes: 3 additions & 2 deletions paddle/phi/kernels/cpu/roi_pool_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ void RoiPoolKernel(const Context& dev_ctx,
wstart = std::min(std::max(wstart + box_start_w, 0), width);
wend = std::min(std::max(wend + box_start_w, 0), width);

const int pool_index = ph * pooled_width + pw;
const auto pool_index(ph * pooled_width + pw);

// Define an empty pooling region to be zero
bool is_empty = (hend <= hstart) || (wend <= wstart);
Expand All @@ -145,7 +145,8 @@ void RoiPoolKernel(const Context& dev_ctx,

for (int h = hstart; h < hend; ++h) {
for (int w = wstart; w < wend; ++w) {
const int index = h * width + w;
const auto index(h * width + w);

if (batch_data[index] > output_data[pool_index]) {
output_data[pool_index] = batch_data[index];
arg_max_data[pool_index] = index;
Expand Down
2 changes: 1 addition & 1 deletion paddle/phi/kernels/cpu/sequence_expand_grad_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ struct SequenceExpandGradFunctor<phi::CPUContext, T> {
if (x_seq_len == 0) continue;
auto dx_sub = dx->Slice(x_start, x_end);
dx_sub.Resize(common::flatten_to_1d(dx_sub.dims()));
int dout_end = dout_offset + repeat_num * x_seq_len;
auto dout_end = dout_offset + repeat_num * x_seq_len;
auto dout_sub = dout.Slice(dout_offset, dout_end);
dout_sub.Resize({repeat_num, dx_sub.dims()[0]});
phi::funcs::ColwiseSum<phi::CPUContext, T> col_sum;
Expand Down
4 changes: 2 additions & 2 deletions paddle/phi/kernels/cpu/svd_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,8 @@ void BatchSvd(const T* X,
// NOTE: this function is row major, because this function called the lapack.
int stride = rows * cols;
int k = std::min(rows, cols);
int stride_u = full ? rows * rows : k * rows;
int stride_v = full ? cols * cols : k * cols;
auto stride_u = full ? rows * rows : k * rows;
auto stride_v = full ? cols * cols : k * cols;
for (int i = 0; i < batches; ++i) {
LapackSvd<T>(X + i * stride,
U + i * stride_u,
Expand Down
4 changes: 2 additions & 2 deletions paddle/phi/kernels/cpu/unpool_grad_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -113,8 +113,8 @@ void Unpool3dGrad(const Context& dev_ctx,
const int output_depth = static_cast<int>(out.dims()[2]);
const int output_height = static_cast<int>(out.dims()[3]);
const int output_width = static_cast<int>(out.dims()[4]);
int input_feasize = input_depth * input_height * input_width;
int output_feasize = output_depth * output_height * output_width;
auto input_feasize = input_depth * input_height * input_width;
auto output_feasize = output_depth * output_height * output_width;
const IndT* indices_data = indices.data<IndT>();

for (int b = 0; b < batch_size; ++b) {
Expand Down
4 changes: 2 additions & 2 deletions paddle/phi/kernels/cpu/unpool_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,8 @@ void Unpool3d(const Context& dev_ctx,
const int output_depth = static_cast<int>(out->dims()[2]);
const int output_height = static_cast<int>(out->dims()[3]);
const int output_width = static_cast<int>(out->dims()[4]);
int input_feasize = input_depth * input_height * input_width;
int output_feasize = output_depth * output_height * output_width;
auto input_feasize = input_depth * input_height * input_width;
auto output_feasize = output_depth * output_height * output_width;
const T* input_data = x.data<T>();
const IndT* indices_data = indices.data<IndT>();
for (int b = 0; b < batch_size; ++b) {
Expand Down
2 changes: 1 addition & 1 deletion paddle/phi/kernels/cpu/viterbi_decode_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ void ViterbiDecodeKernel(const Context& dev_ctx,
std::vector<DenseTensor> historys;
// We create tensor buffer in order to avoid allocating memory frequently
// 10 means allocate 10*batch_size bytes memory, such as int_mask, zero...
int buffer_size = batch_size * (n_labels + 1) * seq_len + 10 * batch_size;
auto buffer_size = batch_size * (n_labels + 1) * seq_len + 10 * batch_size;
DenseTensor int_buffer = Empty<int64_t>(dev_ctx, {buffer_size});
funcs::TensorBuffer int_tensor_buffer(int_buffer);
// create float tensor buffer
Expand Down
4 changes: 2 additions & 2 deletions paddle/phi/kernels/cpu/yolo_loss_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,7 @@ void YoloLossKernel(const Context& dev_ctx,
// If best IoU is bigger then ignore_thresh,
// ignore the objectness loss.
if (best_iou > ignore_thresh) {
int obj_idx = (i * mask_num + j) * stride + k * w + l;
auto obj_idx = (i * mask_num + j) * stride + k * w + l;
obj_mask_data[obj_idx] = static_cast<T>(-1);
}
// all losses should be calculated if best IoU
Expand Down Expand Up @@ -339,7 +339,7 @@ void YoloLossKernel(const Context& dev_ctx,
stride,
score);

int obj_idx = (i * mask_num + mask_idx) * stride + gj * w + gi;
auto obj_idx = (i * mask_num + mask_idx) * stride + gj * w + gi;
obj_mask_data[obj_idx] = score;

int label = gt_label_data[i * b + t];
Expand Down
2 changes: 1 addition & 1 deletion paddle/phi/kernels/funcs/aligned_vector.h
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ static int GetVectorizedSize(const DenseTensor* tensor) {
return 1;
}
constexpr int max_load_bits = 128;
int valid_vec_size = max_load_bits / CHAR_BIT / element_size;
auto valid_vec_size = max_load_bits / CHAR_BIT / element_size;
uint64_t address = reinterpret_cast<uint64_t>(tensor->data());

// Currently, decide to deal with no more than 4 data once while adopting
Expand Down
26 changes: 13 additions & 13 deletions paddle/phi/kernels/funcs/blas/blas_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -1620,13 +1620,13 @@ void Blas<phi::CPUContext>::BatchedGEMMWithHead(CBLAS_TRANSPOSE transA,
int sub_width = W2 / head_number;

for (int i = 0; i < head_number; i++) {
int sub_matA_offset = (transA == CblasNoTrans)
? i * (W1 / head_number)
: i * (W1 / head_number) * H1;
int sub_matB_offset = (transB == CblasNoTrans)
? i * (W2 / head_number)
: i * (W2 / head_number) * H2;
int sub_matC_offset = i * W2 / head_number;
auto sub_matA_offset = (transA == CblasNoTrans)
? i * (W1 / head_number)
: i * (W1 / head_number) * H1;
auto sub_matB_offset = (transB == CblasNoTrans)
? i * (W2 / head_number)
: i * (W2 / head_number) * H2;
auto sub_matC_offset = i * W2 / head_number;
for (int k = 0; k < batchCount; ++k) {
a_array[k] = &A[k * strideA] + sub_matA_offset;
b_array[k] = &B[k * strideB] + sub_matB_offset;
Expand Down Expand Up @@ -1665,12 +1665,12 @@ void Blas<phi::CPUContext>::BatchedGEMMWithHead(CBLAS_TRANSPOSE transA,
int sub_width = W1 / head_number;

for (int i = 0; i < head_number; i++) {
int sub_matA_offset = (transA == CblasNoTrans)
? i * (W1 / head_number)
: i * (W1 / head_number) * H1;
int sub_matB_offset = (transB == CblasNoTrans)
? i * (W1 / head_number) * W2
: i * (W1 / head_number);
auto sub_matA_offset = (transA == CblasNoTrans)
? i * (W1 / head_number)
: i * (W1 / head_number) * H1;
auto sub_matB_offset = (transB == CblasNoTrans)
? i * (W1 / head_number) * W2
: i * (W1 / head_number);
int sub_matC_offset = i * W2;
for (int k = 0; k < batchCount; ++k) {
a_array[k] = &A[k * strideA] + sub_matA_offset;
Expand Down
6 changes: 3 additions & 3 deletions paddle/phi/kernels/funcs/block_radix_topk.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ class BlockRadixTopKGlobalMemory {
assert(k < size && k > 0);
int target_k = k;
UnsignedBits key_pattern = 0;
int digit_pos = sizeof(KeyT) * 8 - RADIX_BITS;
auto digit_pos = sizeof(KeyT) * 8 - RADIX_BITS;
for (; digit_pos >= 0; digit_pos -= RADIX_BITS) {
UpdateSharedBins(data, size, digit_pos, key_pattern);
InclusiveScanBins();
Expand Down Expand Up @@ -239,7 +239,7 @@ class BlockRadixTopKRegister {

#pragma unroll
for (unsigned int KEY = 0; KEY < ITEMS_PER_THREAD; KEY++) {
int idx = KEY * BLOCK_SIZE + tid_;
auto idx = KEY * BLOCK_SIZE + tid_;
unsigned_keys[KEY] = KeyTraits::TwiddleIn(unsigned_keys[KEY]);
if (GREATER) unsigned_keys[KEY] = ~unsigned_keys[KEY];
if (idx < valid_count) search_mask_ |= (1U << KEY);
Expand All @@ -248,7 +248,7 @@ class BlockRadixTopKRegister {
int target_k = k;
int prefix_k = 0;

for (int digit_pos = sizeof(KeyT) * 8 - RADIX_BITS; digit_pos >= 0;
for (auto digit_pos = sizeof(KeyT) * 8 - RADIX_BITS; digit_pos >= 0;
digit_pos -= RADIX_BITS) {
UpdateSharedBins(unsigned_keys, digit_pos, prefix_k);
InclusiveScanBins();
Expand Down
4 changes: 2 additions & 2 deletions paddle/phi/kernels/funcs/broadcast_function.h
Original file line number Diff line number Diff line change
Expand Up @@ -445,7 +445,7 @@ void LaunchBroadcastKernel(
const int blocks = 8;
int read_lens = configs[0].buf_len;
auto stream = dev_ctx.x_context()->xpu_stream;
int main_offset = (numel / (read_lens * threads)) * read_lens * threads;
auto main_offset = (numel / (read_lens * threads)) * read_lens * threads;
int tail_tid = numel % (read_lens * threads);

VectorizedBroadcastKernel<Functor, OutT, Arity, NumOuts, VecSize, false>
Expand All @@ -465,7 +465,7 @@ void LaunchBroadcastKernel(
auto stream = dev_ctx.stream();
auto threads = gpu_config.GetBlockSize();
auto blocks = gpu_config.block_per_grid;
int main_offset = (numel / (VecSize * threads)) * VecSize * threads;
auto main_offset = (numel / (VecSize * threads)) * VecSize * threads;
int tail_tid = numel % (VecSize * threads);

if (classifier.all_elementwise) {
Expand Down
4 changes: 2 additions & 2 deletions paddle/phi/kernels/funcs/correlation_funcs.cu.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,8 @@ __global__ void channel_first(const T *input,
int64_t global_idx = static_cast<int64_t>(blockIdx.x);
int64_t stride = static_cast<int64_t>(gridDim.x);

int p_H = H + 2 * pad_size;
int p_W = W + 2 * pad_size;
auto p_H = H + 2 * pad_size;
auto p_W = W + 2 * pad_size;
int64_t p_dimcw = channel * p_W;
int64_t p_dimchw = channel * p_H * p_W;

Expand Down
Loading
Loading