Hi all,
I've been running some rough performance tests on transferring images
using nodelets, and have found the overhead to be higher than I
expected. For reference, I'm running ROS cturtle on Ubuntu 10.04 64 bit
(stock kernel 2.6.32-24-generic) on a 2 GHz Intel Core 2 Duo laptop. CPU
use monitored approximately using htop.
Exchanging one 1920x1280 bgr8 image at 10 fps incurs ~30% CPU use for
the nodelet manager. As expected, this is better than when the nodelets
are run standalone (then ~25% CPU each), but I wanted to check if I'm
doing something wrong that might affect nodelet performance. I've been
using raw Image publishing rather than image_transport, to keep the test
as simple as possible.
The image 'source' is just an uninitialised unsigned char array of the
correct size. As a comparison, I created a node that merely copies the
source array a number of times, and found that 5 memcpy operations
produce about the same CPU load as the best nodelet case, around 30%.
Of course CPU caching may improve the speed of those 5 consecutive
memcpy operations, so I'm not saying "ROS is copying the data 5 times".
I've attached the relevant source files. The question is: Is this the
best we can do at present for image transfer, and if not, are there
modifications on the horizon that will improve performance?
Joel
--
Research Assistant
Ocean Systems Laboratory
Heriot-Watt University, UK
--
Heriot-Watt University is a Scottish charity
registered under charity number SC000278.
#include <ros/ros.h>
#include <nodelet/nodelet.h>
#include <pluginlib/class_list_macros.h>
#include <sensor_msgs/Image.h>
#include <sensor_msgs/fill_image.h>
#include "image_const.h"
namespace image_test
{
class RawImagePubTest : public nodelet::Nodelet
{
public:
RawImagePubTest() : frameCount_(0) {}
private:
virtual void onInit()
{
ros::NodeHandle& private_nh = getMTNodeHandle();
pub_ = private_nh.advertise<sensor_msgs::Image>("camera/image", 1);
timer_ = private_nh.createTimer(ros::Duration(0.1), &RawImagePubTest::timerCallback, this);
}
void timerCallback(const ros::TimerEvent& e)
{
++frameCount_;
// data is intentionally left uninitialised, so we're only seeing transfer CPU usage.
NODELET_INFO_STREAM(frameCount_ << " Publishing raw image, size " << width << " x " << height);
sensor_msgs::fillImage(image_, "bgr8", height, width, 3 * width, data);
pub_.publish(image_);
}
ros::Timer timer_;
ros::Publisher pub_;
unsigned frameCount_;
sensor_msgs::Image image_;
unsigned char data[dataSize];
};
PLUGINLIB_DECLARE_CLASS(image_test, RawImagePubTest, image_test::RawImagePubTest, nodelet::Nodelet);
}
#include <ros/ros.h>
#include <nodelet/nodelet.h>
#include <pluginlib/class_list_macros.h>
#include <sensor_msgs/Image.h>
namespace image_test
{
class RawImageSubTest : public nodelet::Nodelet
{
public:
RawImageSubTest() : frameCount_(0) {}
private:
virtual void onInit()
{
ros::NodeHandle& private_nh = getMTNodeHandle();
sub_ = private_nh.subscribe<sensor_msgs::Image>("camera/image", 1, &RawImageSubTest::imageCallback, this);
}
void imageCallback(const sensor_msgs::ImageConstPtr& image)
{
++frameCount_;
NODELET_INFO_STREAM(frameCount_ << " Received image size " << image->width << " x " << image->height);
}
unsigned frameCount_;
ros::Subscriber sub_;
};
PLUGINLIB_DECLARE_CLASS(image_test, RawImageSubTest, image_test::RawImageSubTest, nodelet::Nodelet);
}
#include <ros/ros.h>
#include <nodelet/nodelet.h>
#include <pluginlib/class_list_macros.h>
#include "image_const.h"
namespace image_test
{
const int numCopies = 5;
class CopyImageTest : public nodelet::Nodelet
{
public:
CopyImageTest() : frameCount_(0) {}
private:
virtual void onInit()
{
ros::NodeHandle& private_nh = getMTNodeHandle();
timer_ = private_nh.createTimer(ros::Duration(0.1), &CopyImageTest::timerCallback, this);
}
void timerCallback(const ros::TimerEvent& e)
{
++frameCount_;
// data[0] is intentionally left uninitialised, so we're only seeing copy CPU usage.
NODELET_INFO_STREAM(frameCount_ << " Copying raw image " << numCopies << " times, size " << width << " x " << height);
for(int i=0; i<numCopies; ++i)
{
memcpy(data[i+1], data[i], dataSize);
}
}
ros::Timer timer_;
unsigned frameCount_;
unsigned char data[numCopies+1][dataSize];
};
PLUGINLIB_DECLARE_CLASS(image_test, CopyImageTest, image_test::CopyImageTest, nodelet::Nodelet);
}
#ifndef IMAGE_CONST_H_
#define IMAGE_CONST_H_
namespace image_test
{
const size_t width = 1920;
const size_t height = 1280;
const size_t dataSize = width * height * 3;
}
#endif /* IMAGE_CONST_H_ */