From 9a9b173cd178e7c07a98896a009c2a2021a6b247 Mon Sep 17 00:00:00 2001 From: cudawarped <12133430+cudawarped@users.noreply.github.com> Date: Thu, 7 Aug 2025 21:55:10 +0300 Subject: [PATCH 1/3] cuda: update videostab for cuda 13.0 --- modules/videostab/src/cuda/global_motion.cu | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/modules/videostab/src/cuda/global_motion.cu b/modules/videostab/src/cuda/global_motion.cu index 7eca6ff76b7..c20ccfc2ed4 100644 --- a/modules/videostab/src/cuda/global_motion.cu +++ b/modules/videostab/src/cuda/global_motion.cu @@ -52,6 +52,11 @@ namespace cv { namespace cuda { namespace device { namespace globmotion { __constant__ float cml[9]; __constant__ float cmr[9]; +struct is_zero +{ + __host__ __device__ bool operator()(uchar x) const { return x == 0; } +}; + int compactPoints(int N, float *points0, float *points1, const uchar *mask) { thrust::device_ptr dpoints0((float2*)points0); @@ -60,7 +65,7 @@ int compactPoints(int N, float *points0, float *points1, const uchar *mask) return (int)(thrust::remove_if(thrust::make_zip_iterator(thrust::make_tuple(dpoints0, dpoints1)), thrust::make_zip_iterator(thrust::make_tuple(dpoints0 + N, dpoints1 + N)), - dmask, thrust::not1(thrust::identity())) + dmask, is_zero()) - thrust::make_zip_iterator(make_tuple(dpoints0, dpoints1))); } From 700a1725ddaab843afd8be7033551d46867aff18 Mon Sep 17 00:00:00 2001 From: cudawarped <12133430+cudawarped@users.noreply.github.com> Date: Sat, 9 Aug 2025 08:03:36 +0300 Subject: [PATCH 2/3] cudafilters: Add comment for use with CUDA streams --- .../include/opencv2/cudafilters.hpp | 42 +++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/modules/cudafilters/include/opencv2/cudafilters.hpp b/modules/cudafilters/include/opencv2/cudafilters.hpp index 2aa9c846462..d92bdde2caa 100644 --- a/modules/cudafilters/include/opencv2/cudafilters.hpp +++ b/modules/cudafilters/include/opencv2/cudafilters.hpp @@ -97,6 +97,9 @@ center. @param borderMode Pixel extrapolation method. For details, see borderInterpolate . @param borderVal Default border value. +@note +If applied in a CUDA Stream, a distinct filter instance must be created for each Stream. Sharing a single instance across multiple streams is unsupported and may lead to undefined behavior due to stream-specific internal state. + @sa boxFilter */ CV_EXPORTS_W Ptr createBoxFilter(int srcType, int dstType, Size ksize, Point anchor = Point(-1, -1), @@ -115,6 +118,9 @@ center. @param borderMode Pixel extrapolation method. For details, see borderInterpolate . @param borderVal Default border value. +@note +If applied in a CUDA Stream, a distinct filter instance must be created for each Stream. Sharing a single instance across multiple streams is unsupported and may lead to undefined behavior due to stream-specific internal state. + @sa filter2D */ CV_EXPORTS_W Ptr createLinearFilter(int srcType, int dstType, InputArray kernel, Point anchor = Point(-1, -1), @@ -134,6 +140,9 @@ applied (see getDerivKernels ). @param borderMode Pixel extrapolation method. For details, see borderInterpolate . @param borderVal Default border value. +@note +If applied in a CUDA Stream, a distinct filter instance must be created for each Stream. Sharing a single instance across multiple streams is unsupported and may lead to undefined behavior due to stream-specific internal state. + @sa Laplacian */ CV_EXPORTS_W Ptr createLaplacianFilter(int srcType, int dstType, int ksize = 1, double scale = 1, @@ -156,6 +165,9 @@ the aperture center. borderInterpolate. @param columnBorderMode Pixel extrapolation method in the horizontal direction. +@note +If applied in a CUDA Stream, a distinct filter instance must be created for each Stream. Sharing a single instance across multiple streams is unsupported and may lead to undefined behavior due to stream-specific internal state. + @sa sepFilter2D */ CV_EXPORTS_W Ptr createSeparableLinearFilter(int srcType, int dstType, InputArray rowKernel, InputArray columnKernel, @@ -178,6 +190,9 @@ applied. For details, see getDerivKernels . @param rowBorderMode Pixel extrapolation method in the vertical direction. For details, see borderInterpolate. @param columnBorderMode Pixel extrapolation method in the horizontal direction. + +@note +If applied in a CUDA Stream, a distinct filter instance must be created for each Stream. Sharing a single instance across multiple streams is unsupported and may lead to undefined behavior due to stream-specific internal state. */ CV_EXPORTS_W Ptr createDerivFilter(int srcType, int dstType, int dx, int dy, int ksize, bool normalize = false, double scale = 1, @@ -196,6 +211,9 @@ applied. For details, see getDerivKernels . borderInterpolate. @param columnBorderMode Pixel extrapolation method in the horizontal direction. +@note +If applied in a CUDA Stream, a distinct filter instance must be created for each Stream. Sharing a single instance across multiple streams is unsupported and may lead to undefined behavior due to stream-specific internal state. + @sa Sobel */ CV_EXPORTS_W Ptr createSobelFilter(int srcType, int dstType, int dx, int dy, int ksize = 3, @@ -213,6 +231,9 @@ applied. See getDerivKernels for details. borderInterpolate. @param columnBorderMode Pixel extrapolation method in the horizontal direction. +@note +If applied in a CUDA Stream, a distinct filter instance must be created for each Stream. Sharing a single instance across multiple streams is unsupported and may lead to undefined behavior due to stream-specific internal state. + @sa Scharr */ CV_EXPORTS_W Ptr createScharrFilter(int srcType, int dstType, int dx, int dy, @@ -233,6 +254,9 @@ CV_EXPORTS_W Ptr createScharrFilter(int srcType, int dstType, int dx, in borderInterpolate. @param columnBorderMode Pixel extrapolation method in the horizontal direction. +@note +If applied in a CUDA Stream, a distinct filter instance must be created for each Stream. Sharing a single instance across multiple streams is unsupported and may lead to undefined behavior due to stream-specific internal state. + @sa GaussianBlur */ CV_EXPORTS_W Ptr createGaussianFilter(int srcType, int dstType, Size ksize, @@ -258,6 +282,9 @@ CV_EXPORTS_W Ptr createGaussianFilter(int srcType, int dstType, Size ksi is at the center. @param iterations Number of times erosion and dilation to be applied. +@note +If applied in a CUDA Stream, a distinct filter instance must be created for each Stream. Sharing a single instance across multiple streams is unsupported and may lead to undefined behavior due to stream-specific internal state. + @sa morphologyEx */ CV_EXPORTS_W Ptr createMorphologyFilter(int op, int srcType, InputArray kernel, Point anchor = Point(-1, -1), int iterations = 1); @@ -272,6 +299,9 @@ CV_EXPORTS_W Ptr createMorphologyFilter(int op, int srcType, InputArray @param anchor Anchor point. The default value (-1) means that the anchor is at the kernel center. @param borderMode Pixel extrapolation method. For details, see borderInterpolate . @param borderVal Default border value. + +@note +If applied in a CUDA Stream, a distinct filter instance must be created for each Stream. Sharing a single instance across multiple streams is unsupported and may lead to undefined behavior due to stream-specific internal state. */ CV_EXPORTS_W Ptr createBoxMaxFilter(int srcType, Size ksize, Point anchor = Point(-1, -1), @@ -284,6 +314,9 @@ CV_EXPORTS_W Ptr createBoxMaxFilter(int srcType, Size ksize, @param anchor Anchor point. The default value (-1) means that the anchor is at the kernel center. @param borderMode Pixel extrapolation method. For details, see borderInterpolate . @param borderVal Default border value. + +@note +If applied in a CUDA Stream, a distinct filter instance must be created for each Stream. Sharing a single instance across multiple streams is unsupported and may lead to undefined behavior due to stream-specific internal state. */ CV_EXPORTS_W Ptr createBoxMinFilter(int srcType, Size ksize, Point anchor = Point(-1, -1), @@ -300,6 +333,9 @@ CV_EXPORTS_W Ptr createBoxMinFilter(int srcType, Size ksize, @param anchor Anchor point. The default value (-1) means that the anchor is at the kernel center. @param borderMode Pixel extrapolation method. For details, see borderInterpolate . @param borderVal Default border value. + +@note +If applied in a CUDA Stream, a distinct filter instance must be created for each Stream. Sharing a single instance across multiple streams is unsupported and may lead to undefined behavior due to stream-specific internal state. */ CV_EXPORTS_W Ptr createRowSumFilter(int srcType, int dstType, int ksize, int anchor = -1, int borderMode = BORDER_DEFAULT, Scalar borderVal = Scalar::all(0)); @@ -311,6 +347,9 @@ CV_EXPORTS_W Ptr createRowSumFilter(int srcType, int dstType, int ksize, @param anchor Anchor point. The default value (-1) means that the anchor is at the kernel center. @param borderMode Pixel extrapolation method. For details, see borderInterpolate . @param borderVal Default border value. + +@note +If applied in a CUDA Stream, a distinct filter instance must be created for each Stream. Sharing a single instance across multiple streams is unsupported and may lead to undefined behavior due to stream-specific internal state. */ CV_EXPORTS_W Ptr createColumnSumFilter(int srcType, int dstType, int ksize, int anchor = -1, int borderMode = BORDER_DEFAULT, Scalar borderVal = Scalar::all(0)); @@ -329,6 +368,9 @@ Outputs an image that has been filtered using a median-filtering formulation. Details on this algorithm can be found in: Green, O., 2017. "Efficient scalable median filtering using histogram-based operations", IEEE Transactions on Image Processing, 27(5), pp.2217-2228. + +@note +If applied in a CUDA Stream, a distinct filter instance must be created for each Stream. Sharing a single instance across multiple streams is unsupported and may lead to undefined behavior due to stream-specific internal state. */ CV_EXPORTS_W Ptr createMedianFilter(int srcType, int windowSize, int partition = 128); From 7d168ffe029140c0537c391f80060f173be8ba90 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5vard=20Haug?= Date: Mon, 11 Aug 2025 12:07:46 +0200 Subject: [PATCH 3/3] Merge pull request #3983 from mitresthen:warn-warpaffine-overlap Add assert to ensure using non-overlapping memory regions #3983 This pr addresses this issue: https://github.com/opencv/opencv/issues/27429 where the user did not realize that the warpaffine function requires non-overlapping src and dst memory regions. The code now compares the input memory regions and asserts that they do not overlap. There is also a test for this functionality. ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [x] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [ ] The feature is well documented and sample code can be built with the project CMake --- .../include/opencv2/cudawarping.hpp | 2 ++ modules/cudawarping/src/warp.cpp | 2 ++ modules/cudawarping/test/test_warp_affine.cpp | 30 +++++++++++++++++++ 3 files changed, 34 insertions(+) diff --git a/modules/cudawarping/include/opencv2/cudawarping.hpp b/modules/cudawarping/include/opencv2/cudawarping.hpp index b9ca957358e..6c920fa8c24 100644 --- a/modules/cudawarping/include/opencv2/cudawarping.hpp +++ b/modules/cudawarping/include/opencv2/cudawarping.hpp @@ -118,6 +118,7 @@ CV_EXPORTS_W void resize(InputArray src, OutputArray dst, Size dsize, double fx= @param src Source image. CV_8U , CV_16U , CV_32S , or CV_32F depth and 1, 3, or 4 channels are supported. @param dst Destination image with the same type as src . The size is dsize . + **In-place operation (src == dst) is not supported and will result in an error.** @param M *2x3* Mat or UMat transformation matrix. @param dsize Size of the destination image. @param flags Combination of interpolation methods (see resize) and the optional flag @@ -127,6 +128,7 @@ INTER_NEAREST , INTER_LINEAR , and INTER_CUBIC interpolation methods are support @param borderValue @param stream Stream for the asynchronous version. +@note In-place operation is not supported. If src and dst refer to the same data, the behavior is undefined. @sa warpAffine */ CV_EXPORTS void warpAffine(InputArray src, OutputArray dst, InputArray M, Size dsize, int flags = INTER_LINEAR, diff --git a/modules/cudawarping/src/warp.cpp b/modules/cudawarping/src/warp.cpp index 8690f54085d..839b786ce45 100644 --- a/modules/cudawarping/src/warp.cpp +++ b/modules/cudawarping/src/warp.cpp @@ -208,6 +208,8 @@ void cv::cuda::warpAffine(InputArray _src, OutputArray _dst, InputArray _M, Size _dst.create(dsize, src.type()); GpuMat dst = _dst.getGpuMat(); + CV_Assert( src.data != dst.data && "In-place operation not supported for cv::cuda::warpAffine" ); + Size wholeSize; Point ofs; src.locateROI(wholeSize, ofs); diff --git a/modules/cudawarping/test/test_warp_affine.cpp b/modules/cudawarping/test/test_warp_affine.cpp index d26a5fdeb7c..ebbba914ced 100644 --- a/modules/cudawarping/test/test_warp_affine.cpp +++ b/modules/cudawarping/test/test_warp_affine.cpp @@ -222,6 +222,36 @@ CUDA_TEST_P(WarpAffine, Accuracy) EXPECT_MAT_NEAR(dst_gold, dst, src.depth() == CV_32F ? 1e-1 : 1.0); } +CUDA_TEST_P(WarpAffine, OverlapDetection) +{ + cv::Mat src = randomMat(size, type); + ASSERT_FALSE(src.empty()); + cv::cuda::GpuMat gpuSrc; + gpuSrc.upload(src); + + cv::Mat M = cv::Mat::eye(2, 3, CV_64FC1); + int flags = interpolation; + if (inverse) + flags |= cv::WARP_INVERSE_MAP; + + { + cv::cuda::GpuMat gpuDst(gpuSrc, cv::Rect(0, 0, size.width, size.height)); + + EXPECT_THROW( + cv::cuda::warpAffine(gpuSrc, gpuDst, M, size, flags, borderType, cv::Scalar::all(0)), + cv::Exception); + } + + { + cv::cuda::GpuMat gpuDst(size, gpuSrc.type()); + ASSERT_NE(gpuSrc.data, gpuDst.data); // Confirm they are distinct + + EXPECT_NO_THROW({ + cv::cuda::warpAffine(gpuSrc, gpuDst, M, size, flags, borderType, cv::Scalar::all(0)); + }); + } +} + INSTANTIATE_TEST_CASE_P(CUDA_Warping, WarpAffine, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES,