-
Notifications
You must be signed in to change notification settings - Fork 2.8k
Description
device float do_separable_conv_y(float pixels[CY][CCX], int H, int W, bool sq = false) {//此时4242变成了4232
auto block = cg::this_thread_block();
int local_y = block.thread_index().y + 5;//这里+5是为了要找到每个核处理的像素中心位置,线程还是那个线程,只是处理的像素以哪里为中心
int local_x = block.thread_index().x + 5;//这里x为什么会有+5??
float val = 0.0f;
// if (threadIdx.x == 31 && blockIdx.x == 0 && blockIdx.y == 0 && blockIdx.z == 0) {
// printf("local_y:%d,local_x:%d\n",local_y,local_x);
// }
val += G_00 * pixels[local_y - 5][local_x];
val += G_01 * pixels[local_y - 4][local_x];
val += G_02 * pixels[local_y - 3][local_x];
val += G_03 * pixels[local_y - 2][local_x];
val += G_04 * pixels[local_y - 1][local_x];
val += G_05 * pixels[local_y ][local_x];
val += G_06 * pixels[local_y + 1][local_x];
val += G_07 * pixels[local_y + 2][local_x];
val += G_08 * pixels[local_y + 3][local_x];
val += G_09 * pixels[local_y + 4][local_x];
val += G_10 * pixels[local_y + 5][local_x];
return val;
}
In the do_separable_conv_y function, the input shared memory array pixels has dimensions [CY][CCX], which is