diff --git a/README.md b/README.md
index bfa28dd..2de0176 100644
--- a/README.md
+++ b/README.md
@@ -329,7 +329,7 @@ performed by this node. This node is capable of image color space conversion, im
#### Available Components
| Component | Topics Subscribed | Topics Published | Parameters |
| ----------------- | -------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `DnnImageEncoderNode` | `image`: The image that should be encoded into a tensor | `encoded_tensor`: The resultant tensor after converting the `image`
| `network_image_width`: The image width that the network expects. This will be used to resize the input `image` width. The default value is `224`.
`network_image_height`: The image height that the network expects. This will be used to resize the input `image` height. The default value is `224`.
`network_image_encoding`: The image encoding that the network expects. This will be used to convert the color space of the `image`. This should be either `rgb8` (default), `bgr8`, or `mono8`.
`maintain_aspect_ratio`: A flag for the encoder to crop the input image to get the aspect ratio of `network_image_width` and `network_image_height` before resizing. The default value is set to `False`.
`center_crop`: A flag for the encoder to crop the center of the image if `maintain_aspect_ratio` is set to `True`. The default value is set to `False`.
`tensor_name`: The name of the input tensor, which is `input` by default.
`network_normalization_type`: The type of network normalization that should be performed on the network. This can be either `none` for no normalization, `unit_scaling` for normalization between 0 to 1, `positive_negative` for normalization between -1 to 1, and `image_normalization` for performing this normalization:
`(image / 255 - mean) / standard_deviation`
The default value is `unit_scaling`.
`image_mean`: If `network_normalization_type` is set to `image_normalization`, the mean of the images per channel will be used for this normalization process, which is `[0.5, 0.5, 0.5]` by default.
`image_stddev`: If `network_normalization_type` is set to `image_normalization`, the standard deviation of the images per channel will be used for this normalization process, which is `[0.5, 0.5, 0.5]` by default. |
+| `DnnImageEncoderNode` | `image`: The image that should be encoded into a tensor | `encoded_tensor`: The resultant tensor after converting the `image`
| `network_image_width`: The image width that the network expects. This will be used to resize the input `image` width. The default value is `224`.
`network_image_height`: The image height that the network expects. This will be used to resize the input `image` height. The default value is `224`.
`network_image_encoding`: The image encoding that the network expects. This will be used to convert the color space of the `image`. This should be either `rgb8` (default), `bgr8`, or `mono8`.
`maintain_aspect_ratio`: A flag for the encoder to crop the input image to get the aspect ratio of `network_image_width` and `network_image_height` before resizing. The default value is set to `False`.
`center_crop`: A flag for the encoder to crop the center of the image if `maintain_aspect_ratio` is set to `True`. The default value is set to `False`.
`tensor_name`: The name of the input tensor, which is `input` by default.
`network_normalization_type`: The type of network normalization that should be performed on the network. This can be either `none` for no normalization, `unit_scaling` for normalization between 0 to 1, `positive_negative` for normalization between -1 to 1, and `image_normalization` for performing this normalization:
`(image / 255 - mean) / standard_deviation`
The default value is `unit_scaling`.
`image_mean`: If `network_normalization_type` is set to `image_normalization`, the mean of the images per channel will be used for this normalization process, which is `[0.5, 0.5, 0.5]` by default.
`image_stddev`: If `network_normalization_type` is set to `image_normalization`, the standard deviation of the images per channel will be used for this normalization process, which is `[0.5, 0.5, 0.5]` by default.
`tensor_layout`: The [tensor layout](https://docs.nvidia.com/deeplearning/performance/dl-performance-convolutional/index.html#tensor-layout) of the published tensor. The options are `nchw` (default) and `nhwc`. |
**Note:** For best results, crop/resize input images to the same dimensions your DNN model is expecting. `DnnImageEncoderNode` will skew the aspect ratio of input images to the target dimensions.
diff --git a/isaac_ros_dnn_encoders/include/isaac_ros_dnn_encoders/dnn_image_encoder_node.hpp b/isaac_ros_dnn_encoders/include/isaac_ros_dnn_encoders/dnn_image_encoder_node.hpp
index 4f6d3b3..90d3f8c 100644
--- a/isaac_ros_dnn_encoders/include/isaac_ros_dnn_encoders/dnn_image_encoder_node.hpp
+++ b/isaac_ros_dnn_encoders/include/isaac_ros_dnn_encoders/dnn_image_encoder_node.hpp
@@ -53,6 +53,9 @@ class DnnImageEncoderNode : public rclcpp::Node
// Name of the published Tensor message
const std::string tensor_name_;
+ // The layout of the tensor (i.e. NCHW or NHWC)
+ const std::string tensor_layout_;
+
// Method to normalize image. Supported types are "unit_scaling" (range is [0, 1]),
// and "positive_negative" (range is [-1, 1]) and "none" for no normalization
const std::string network_normalization_type_;
diff --git a/isaac_ros_dnn_encoders/src/dnn_image_encoder_node.cpp b/isaac_ros_dnn_encoders/src/dnn_image_encoder_node.cpp
index 590c30b..d9ccadc 100644
--- a/isaac_ros_dnn_encoders/src/dnn_image_encoder_node.cpp
+++ b/isaac_ros_dnn_encoders/src/dnn_image_encoder_node.cpp
@@ -29,6 +29,12 @@ enum NormalizationTypes
kImageNormalization
};
+enum TensorLayouts
+{
+ NCHW,
+ NHWC
+};
+
const std::unordered_map g_str_to_normalization_type({
{"none", NormalizationTypes::kNone},
{"unit_scaling", NormalizationTypes::kUnitScaling},
@@ -36,6 +42,11 @@ const std::unordered_map g_str_to_normalization_type({
{"image_normalization", NormalizationTypes::kImageNormalization}}
);
+const std::unordered_map g_str_to_tensor_layout({
+ {"nchw", TensorLayouts::NCHW},
+ {"nhwc", TensorLayouts::NHWC}}
+);
+
const std::unordered_map g_str_to_image_encoding({
{"rgb8", sensor_msgs::image_encodings::RGB8},
{"bgr8", sensor_msgs::image_encodings::BGR8},
@@ -57,6 +68,7 @@ struct DnnImageEncoderNode::DnnImageEncoderImpl
std::string image_encoding_;
std::string normalization_type_;
std::string tensor_name_;
+ std::string tensor_layout_;
bool maintain_aspect_ratio_;
bool center_crop_;
std::vector image_mean_;
@@ -70,6 +82,7 @@ struct DnnImageEncoderNode::DnnImageEncoderImpl
image_encoding_ = node->network_image_encoding_;
normalization_type_ = node->network_normalization_type_;
tensor_name_ = node->tensor_name_;
+ tensor_layout_ = node->tensor_layout_;
maintain_aspect_ratio_ = node->maintain_aspect_ratio_;
center_crop_ = node->center_crop_;
image_mean_ = node->image_mean_;
@@ -142,8 +155,21 @@ struct DnnImageEncoderNode::DnnImageEncoderImpl
image_resized.convertTo(image_resized, CV_32F);
}
- // Convert to NCHW
- cv::Mat cv_tensor = cv::dnn::blobFromImage(image_resized);
+ // Convert tensor layout to the tensor_layout specified by the parameters
+ cv::Mat cv_tensor;
+ switch (g_str_to_tensor_layout.at(tensor_layout_)) {
+ case TensorLayouts::NHWC:
+ {
+ int sz[] = {1, image_resized.rows, image_resized.cols, image_resized.channels()};
+ cv_tensor = cv::Mat(4, sz, CV_32F, image_resized.data);
+ break;
+ }
+ // Default to NCHW
+ default:
+ cv_tensor = cv::dnn::blobFromImage(image_resized);
+ }
+
+
// Convert CV matrix to ROS2 tensor message
auto tensor_list_msg = isaac_ros_nvengine_interfaces::msg::TensorList();
@@ -186,6 +212,7 @@ DnnImageEncoderNode::DnnImageEncoderNode(const rclcpp::NodeOptions options)
image_mean_(declare_parameter>("image_mean", {0.5, 0.5, 0.5})),
image_stddev_(declare_parameter>("image_stddev", {0.5, 0.5, 0.5})),
tensor_name_(declare_parameter("tensor_name", "input")),
+ tensor_layout_(declare_parameter("tensor_layout", "nchw")),
network_normalization_type_(declare_parameter(
"network_normalization_type", "unit_scaling")),
// Subscriber
@@ -201,6 +228,11 @@ DnnImageEncoderNode::DnnImageEncoderNode(const rclcpp::NodeOptions options)
// Impl initialization
impl_(std::make_unique())
{
+ if (g_str_to_tensor_layout.find(tensor_layout_) == g_str_to_tensor_layout.end()) {
+ throw std::runtime_error(
+ "Error: received unsupported tensor layout: " + tensor_layout_);
+ }
+
if (g_str_to_image_encoding.find(network_image_encoding_) == g_str_to_image_encoding.end()) {
throw std::runtime_error(
"Error: received unsupported network image encoding: " + network_image_encoding_);