From 2234f250df701b7520d09380bd525998a4ff35bf Mon Sep 17 00:00:00 2001 From: Sidney Rubidge Date: Mon, 24 Jan 2022 12:51:30 -0400 Subject: [PATCH] fix: Add NHWC tensor layout option fix: Enable outputting NCHW or NHWC tensor layout docs: Add tensor layout options to README docs: fix readme formatting --- README.md | 2 +- .../dnn_image_encoder_node.hpp | 3 ++ .../src/dnn_image_encoder_node.cpp | 36 +++++++++++++++++-- 3 files changed, 38 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index bfa28dd..2de0176 100644 --- a/README.md +++ b/README.md @@ -329,7 +329,7 @@ performed by this node. This node is capable of image color space conversion, im #### Available Components | Component | Topics Subscribed | Topics Published | Parameters | | ----------------- | -------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `DnnImageEncoderNode` | `image`: The image that should be encoded into a tensor | `encoded_tensor`: The resultant tensor after converting the `image`
| `network_image_width`: The image width that the network expects. This will be used to resize the input `image` width. The default value is `224`.
`network_image_height`: The image height that the network expects. This will be used to resize the input `image` height. The default value is `224`.
`network_image_encoding`: The image encoding that the network expects. This will be used to convert the color space of the `image`. This should be either `rgb8` (default), `bgr8`, or `mono8`.
`maintain_aspect_ratio`: A flag for the encoder to crop the input image to get the aspect ratio of `network_image_width` and `network_image_height` before resizing. The default value is set to `False`.
`center_crop`: A flag for the encoder to crop the center of the image if `maintain_aspect_ratio` is set to `True`. The default value is set to `False`.
`tensor_name`: The name of the input tensor, which is `input` by default.
`network_normalization_type`: The type of network normalization that should be performed on the network. This can be either `none` for no normalization, `unit_scaling` for normalization between 0 to 1, `positive_negative` for normalization between -1 to 1, and `image_normalization` for performing this normalization:
`(image / 255 - mean) / standard_deviation`
The default value is `unit_scaling`.
`image_mean`: If `network_normalization_type` is set to `image_normalization`, the mean of the images per channel will be used for this normalization process, which is `[0.5, 0.5, 0.5]` by default.
`image_stddev`: If `network_normalization_type` is set to `image_normalization`, the standard deviation of the images per channel will be used for this normalization process, which is `[0.5, 0.5, 0.5]` by default. | +| `DnnImageEncoderNode` | `image`: The image that should be encoded into a tensor | `encoded_tensor`: The resultant tensor after converting the `image`
| `network_image_width`: The image width that the network expects. This will be used to resize the input `image` width. The default value is `224`.
`network_image_height`: The image height that the network expects. This will be used to resize the input `image` height. The default value is `224`.
`network_image_encoding`: The image encoding that the network expects. This will be used to convert the color space of the `image`. This should be either `rgb8` (default), `bgr8`, or `mono8`.
`maintain_aspect_ratio`: A flag for the encoder to crop the input image to get the aspect ratio of `network_image_width` and `network_image_height` before resizing. The default value is set to `False`.
`center_crop`: A flag for the encoder to crop the center of the image if `maintain_aspect_ratio` is set to `True`. The default value is set to `False`.
`tensor_name`: The name of the input tensor, which is `input` by default.
`network_normalization_type`: The type of network normalization that should be performed on the network. This can be either `none` for no normalization, `unit_scaling` for normalization between 0 to 1, `positive_negative` for normalization between -1 to 1, and `image_normalization` for performing this normalization:
`(image / 255 - mean) / standard_deviation`
The default value is `unit_scaling`.
`image_mean`: If `network_normalization_type` is set to `image_normalization`, the mean of the images per channel will be used for this normalization process, which is `[0.5, 0.5, 0.5]` by default.
`image_stddev`: If `network_normalization_type` is set to `image_normalization`, the standard deviation of the images per channel will be used for this normalization process, which is `[0.5, 0.5, 0.5]` by default.
`tensor_layout`: The [tensor layout](https://docs.nvidia.com/deeplearning/performance/dl-performance-convolutional/index.html#tensor-layout) of the published tensor. The options are `nchw` (default) and `nhwc`. | **Note:** For best results, crop/resize input images to the same dimensions your DNN model is expecting. `DnnImageEncoderNode` will skew the aspect ratio of input images to the target dimensions. diff --git a/isaac_ros_dnn_encoders/include/isaac_ros_dnn_encoders/dnn_image_encoder_node.hpp b/isaac_ros_dnn_encoders/include/isaac_ros_dnn_encoders/dnn_image_encoder_node.hpp index 4f6d3b3..90d3f8c 100644 --- a/isaac_ros_dnn_encoders/include/isaac_ros_dnn_encoders/dnn_image_encoder_node.hpp +++ b/isaac_ros_dnn_encoders/include/isaac_ros_dnn_encoders/dnn_image_encoder_node.hpp @@ -53,6 +53,9 @@ class DnnImageEncoderNode : public rclcpp::Node // Name of the published Tensor message const std::string tensor_name_; + // The layout of the tensor (i.e. NCHW or NHWC) + const std::string tensor_layout_; + // Method to normalize image. Supported types are "unit_scaling" (range is [0, 1]), // and "positive_negative" (range is [-1, 1]) and "none" for no normalization const std::string network_normalization_type_; diff --git a/isaac_ros_dnn_encoders/src/dnn_image_encoder_node.cpp b/isaac_ros_dnn_encoders/src/dnn_image_encoder_node.cpp index 590c30b..d9ccadc 100644 --- a/isaac_ros_dnn_encoders/src/dnn_image_encoder_node.cpp +++ b/isaac_ros_dnn_encoders/src/dnn_image_encoder_node.cpp @@ -29,6 +29,12 @@ enum NormalizationTypes kImageNormalization }; +enum TensorLayouts +{ + NCHW, + NHWC +}; + const std::unordered_map g_str_to_normalization_type({ {"none", NormalizationTypes::kNone}, {"unit_scaling", NormalizationTypes::kUnitScaling}, @@ -36,6 +42,11 @@ const std::unordered_map g_str_to_normalization_type({ {"image_normalization", NormalizationTypes::kImageNormalization}} ); +const std::unordered_map g_str_to_tensor_layout({ + {"nchw", TensorLayouts::NCHW}, + {"nhwc", TensorLayouts::NHWC}} +); + const std::unordered_map g_str_to_image_encoding({ {"rgb8", sensor_msgs::image_encodings::RGB8}, {"bgr8", sensor_msgs::image_encodings::BGR8}, @@ -57,6 +68,7 @@ struct DnnImageEncoderNode::DnnImageEncoderImpl std::string image_encoding_; std::string normalization_type_; std::string tensor_name_; + std::string tensor_layout_; bool maintain_aspect_ratio_; bool center_crop_; std::vector image_mean_; @@ -70,6 +82,7 @@ struct DnnImageEncoderNode::DnnImageEncoderImpl image_encoding_ = node->network_image_encoding_; normalization_type_ = node->network_normalization_type_; tensor_name_ = node->tensor_name_; + tensor_layout_ = node->tensor_layout_; maintain_aspect_ratio_ = node->maintain_aspect_ratio_; center_crop_ = node->center_crop_; image_mean_ = node->image_mean_; @@ -142,8 +155,21 @@ struct DnnImageEncoderNode::DnnImageEncoderImpl image_resized.convertTo(image_resized, CV_32F); } - // Convert to NCHW - cv::Mat cv_tensor = cv::dnn::blobFromImage(image_resized); + // Convert tensor layout to the tensor_layout specified by the parameters + cv::Mat cv_tensor; + switch (g_str_to_tensor_layout.at(tensor_layout_)) { + case TensorLayouts::NHWC: + { + int sz[] = {1, image_resized.rows, image_resized.cols, image_resized.channels()}; + cv_tensor = cv::Mat(4, sz, CV_32F, image_resized.data); + break; + } + // Default to NCHW + default: + cv_tensor = cv::dnn::blobFromImage(image_resized); + } + + // Convert CV matrix to ROS2 tensor message auto tensor_list_msg = isaac_ros_nvengine_interfaces::msg::TensorList(); @@ -186,6 +212,7 @@ DnnImageEncoderNode::DnnImageEncoderNode(const rclcpp::NodeOptions options) image_mean_(declare_parameter>("image_mean", {0.5, 0.5, 0.5})), image_stddev_(declare_parameter>("image_stddev", {0.5, 0.5, 0.5})), tensor_name_(declare_parameter("tensor_name", "input")), + tensor_layout_(declare_parameter("tensor_layout", "nchw")), network_normalization_type_(declare_parameter( "network_normalization_type", "unit_scaling")), // Subscriber @@ -201,6 +228,11 @@ DnnImageEncoderNode::DnnImageEncoderNode(const rclcpp::NodeOptions options) // Impl initialization impl_(std::make_unique()) { + if (g_str_to_tensor_layout.find(tensor_layout_) == g_str_to_tensor_layout.end()) { + throw std::runtime_error( + "Error: received unsupported tensor layout: " + tensor_layout_); + } + if (g_str_to_image_encoding.find(network_image_encoding_) == g_str_to_image_encoding.end()) { throw std::runtime_error( "Error: received unsupported network image encoding: " + network_image_encoding_);