From 2234f250df701b7520d09380bd525998a4ff35bf Mon Sep 17 00:00:00 2001
From: Sidney Rubidge <sidneyrubidge@gmail.com>
Date: Mon, 24 Jan 2022 12:51:30 -0400
Subject: [PATCH] fix: Add NHWC tensor layout option

fix: Enable outputting NCHW or NHWC tensor layout

docs: Add tensor layout options to README

docs: fix readme formatting
---
 README.md                                     |  2 +-
 .../dnn_image_encoder_node.hpp                |  3 ++
 .../src/dnn_image_encoder_node.cpp            | 36 +++++++++++++++++--
 3 files changed, 38 insertions(+), 3 deletions(-)
diff --git a/README.md b/README.md
index bfa28dd..2de0176 100644
--- a/README.md
+++ b/README.md
@@ -329,7 +329,7 @@ performed by this node. This node is capable of image color space conversion, im
 #### Available Components
 | Component         | Topics Subscribed                                              | Topics Published                                                                                                                                                                                                           | Parameters                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            |
 | ----------------- | -------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `DnnImageEncoderNode` | `image`: The image that should be encoded into a tensor | `encoded_tensor`: The resultant tensor after converting the `image` <br> | `network_image_width`: The image width that the network expects. This will be used to resize the input `image` width. The default value is `224`. <br> `network_image_height`: The image height that the network expects. This will be used to resize the input `image` height. The default value is `224`. <br> `network_image_encoding`: The image encoding that the network expects. This will be used to convert the color space of the `image`. This should be either `rgb8` (default), `bgr8`, or `mono8`. <br> `maintain_aspect_ratio`: A flag for the encoder to crop the input image to get the aspect ratio of `network_image_width` and `network_image_height` before resizing. The default value is set to `False`. <br> `center_crop`: A flag for the encoder to crop the center of the image if `maintain_aspect_ratio` is set to `True`. The default value is set to `False`. <br>  `tensor_name`: The name of the input tensor, which is `input` by default. <br> `network_normalization_type`: The type of network normalization that should be performed on the network. This can be either `none` for no normalization, `unit_scaling` for normalization between 0 to 1, `positive_negative` for normalization between -1 to 1, and `image_normalization` for performing this normalization: <br>`(image / 255 - mean) / standard_deviation` <br> The default value is `unit_scaling`. <br> `image_mean`: If `network_normalization_type` is set to `image_normalization`, the mean of the images per channel will be used for this normalization process, which is `[0.5, 0.5, 0.5]` by default. <br> `image_stddev`: If `network_normalization_type` is set to `image_normalization`, the standard deviation of the images per channel will be used for this normalization process, which is `[0.5, 0.5, 0.5]` by default. |
+| `DnnImageEncoderNode` | `image`: The image that should be encoded into a tensor | `encoded_tensor`: The resultant tensor after converting the `image` <br> | `network_image_width`: The image width that the network expects. This will be used to resize the input `image` width. The default value is `224`. <br> `network_image_height`: The image height that the network expects. This will be used to resize the input `image` height. The default value is `224`. <br> `network_image_encoding`: The image encoding that the network expects. This will be used to convert the color space of the `image`. This should be either `rgb8` (default), `bgr8`, or `mono8`. <br> `maintain_aspect_ratio`: A flag for the encoder to crop the input image to get the aspect ratio of `network_image_width` and `network_image_height` before resizing. The default value is set to `False`. <br> `center_crop`: A flag for the encoder to crop the center of the image if `maintain_aspect_ratio` is set to `True`. The default value is set to `False`. <br>  `tensor_name`: The name of the input tensor, which is `input` by default. <br> `network_normalization_type`: The type of network normalization that should be performed on the network. This can be either `none` for no normalization, `unit_scaling` for normalization between 0 to 1, `positive_negative` for normalization between -1 to 1, and `image_normalization` for performing this normalization: <br>`(image / 255 - mean) / standard_deviation` <br> The default value is `unit_scaling`. <br> `image_mean`: If `network_normalization_type` is set to `image_normalization`, the mean of the images per channel will be used for this normalization process, which is `[0.5, 0.5, 0.5]` by default. <br> `image_stddev`: If `network_normalization_type` is set to `image_normalization`, the standard deviation of the images per channel will be used for this normalization process, which is `[0.5, 0.5, 0.5]` by default. <br> `tensor_layout`: The [tensor layout](https://docs.nvidia.com/deeplearning/performance/dl-performance-convolutional/index.html#tensor-layout) of the published tensor. The options are `nchw` (default) and `nhwc`. |
 
 **Note:** For best results, crop/resize input images to the same dimensions your DNN model is expecting. `DnnImageEncoderNode` will skew the aspect ratio of input images to the target dimensions.
 
diff --git a/isaac_ros_dnn_encoders/include/isaac_ros_dnn_encoders/dnn_image_encoder_node.hpp b/isaac_ros_dnn_encoders/include/isaac_ros_dnn_encoders/dnn_image_encoder_node.hpp
index 4f6d3b3..90d3f8c 100644
--- a/isaac_ros_dnn_encoders/include/isaac_ros_dnn_encoders/dnn_image_encoder_node.hpp
+++ b/isaac_ros_dnn_encoders/include/isaac_ros_dnn_encoders/dnn_image_encoder_node.hpp
@@ -53,6 +53,9 @@ class DnnImageEncoderNode : public rclcpp::Node
   // Name of the published Tensor message
   const std::string tensor_name_;
 
+  // The layout of the tensor (i.e. NCHW or NHWC)
+  const std::string tensor_layout_;
+
   // Method to normalize image. Supported types are "unit_scaling" (range is [0, 1]),
   // and "positive_negative" (range is [-1, 1]) and "none" for no normalization
   const std::string network_normalization_type_;
diff --git a/isaac_ros_dnn_encoders/src/dnn_image_encoder_node.cpp b/isaac_ros_dnn_encoders/src/dnn_image_encoder_node.cpp
index 590c30b..d9ccadc 100644
--- a/isaac_ros_dnn_encoders/src/dnn_image_encoder_node.cpp
+++ b/isaac_ros_dnn_encoders/src/dnn_image_encoder_node.cpp
@@ -29,6 +29,12 @@ enum NormalizationTypes
   kImageNormalization
 };
 
+enum TensorLayouts
+{
+  NCHW,
+  NHWC
+};
+
 const std::unordered_map<std::string, int32_t> g_str_to_normalization_type({
     {"none", NormalizationTypes::kNone},
     {"unit_scaling", NormalizationTypes::kUnitScaling},
@@ -36,6 +42,11 @@ const std::unordered_map<std::string, int32_t> g_str_to_normalization_type({
     {"image_normalization", NormalizationTypes::kImageNormalization}}
 );
 
+const std::unordered_map<std::string, int32_t> g_str_to_tensor_layout({
+    {"nchw", TensorLayouts::NCHW},
+    {"nhwc", TensorLayouts::NHWC}}
+);
+
 const std::unordered_map<std::string, std::string> g_str_to_image_encoding({
     {"rgb8", sensor_msgs::image_encodings::RGB8},
     {"bgr8", sensor_msgs::image_encodings::BGR8},
@@ -57,6 +68,7 @@ struct DnnImageEncoderNode::DnnImageEncoderImpl
   std::string image_encoding_;
   std::string normalization_type_;
   std::string tensor_name_;
+  std::string tensor_layout_;
   bool maintain_aspect_ratio_;
   bool center_crop_;
   std::vector<double> image_mean_;
@@ -70,6 +82,7 @@ struct DnnImageEncoderNode::DnnImageEncoderImpl
     image_encoding_ = node->network_image_encoding_;
     normalization_type_ = node->network_normalization_type_;
     tensor_name_ = node->tensor_name_;
+    tensor_layout_ = node->tensor_layout_;
     maintain_aspect_ratio_ = node->maintain_aspect_ratio_;
     center_crop_ = node->center_crop_;
     image_mean_ = node->image_mean_;
@@ -142,8 +155,21 @@ struct DnnImageEncoderNode::DnnImageEncoderImpl
         image_resized.convertTo(image_resized, CV_32F);
     }
 
-    // Convert to NCHW
-    cv::Mat cv_tensor = cv::dnn::blobFromImage(image_resized);
+    // Convert tensor layout to the tensor_layout specified by the parameters
+    cv::Mat cv_tensor;
+    switch (g_str_to_tensor_layout.at(tensor_layout_)) {
+      case TensorLayouts::NHWC:
+      {
+        int sz[] = {1, image_resized.rows, image_resized.cols, image_resized.channels()};
+        cv_tensor = cv::Mat(4, sz, CV_32F, image_resized.data);
+        break;
+      }
+      // Default to NCHW
+      default:
+        cv_tensor = cv::dnn::blobFromImage(image_resized);
+    }
+
+    
 
     // Convert CV matrix to ROS2 tensor message
     auto tensor_list_msg = isaac_ros_nvengine_interfaces::msg::TensorList();
@@ -186,6 +212,7 @@ DnnImageEncoderNode::DnnImageEncoderNode(const rclcpp::NodeOptions options)
   image_mean_(declare_parameter<std::vector<double>>("image_mean", {0.5, 0.5, 0.5})),
   image_stddev_(declare_parameter<std::vector<double>>("image_stddev", {0.5, 0.5, 0.5})),
   tensor_name_(declare_parameter<std::string>("tensor_name", "input")),
+  tensor_layout_(declare_parameter<std::string>("tensor_layout", "nchw")),
   network_normalization_type_(declare_parameter<std::string>(
       "network_normalization_type", "unit_scaling")),
   // Subscriber
@@ -201,6 +228,11 @@ DnnImageEncoderNode::DnnImageEncoderNode(const rclcpp::NodeOptions options)
   // Impl initialization
   impl_(std::make_unique<DnnImageEncoderImpl>())
 {
+  if (g_str_to_tensor_layout.find(tensor_layout_) == g_str_to_tensor_layout.end()) {
+    throw std::runtime_error(
+            "Error: received unsupported tensor layout: " + tensor_layout_);
+  }
+
   if (g_str_to_image_encoding.find(network_image_encoding_) == g_str_to_image_encoding.end()) {
     throw std::runtime_error(
             "Error: received unsupported network image encoding: " + network_image_encoding_);