[ros-users] Nodelet image transfer performance

Top Page
Attachments:
Message as email
+ (text/plain)
+ (text/html)
+ raw_image_pub_test.cpp (text/x-c++src)
+ raw_image_sub_test.cpp (text/x-c++src)
+ copy_image_test.cpp (text/x-c++src)
+ image_const.h (text/x-chdr)
Delete this message
Reply to this message
Author: Cartwright, Joel J
Date:  
To: ros-users
Subject: [ros-users] Nodelet image transfer performance
Hi all,

I've been running some rough performance tests on transferring images
using nodelets, and have found the overhead to be higher than I
expected. For reference, I'm running ROS cturtle on Ubuntu 10.04 64 bit
(stock kernel 2.6.32-24-generic) on a 2 GHz Intel Core 2 Duo laptop. CPU
use monitored approximately using htop.

Exchanging one 1920x1280 bgr8 image at 10 fps incurs ~30% CPU use for
the nodelet manager. As expected, this is better than when the nodelets
are run standalone (then ~25% CPU each), but I wanted to check if I'm
doing something wrong that might affect nodelet performance. I've been
using raw Image publishing rather than image_transport, to keep the test
as simple as possible.

The image 'source' is just an uninitialised unsigned char array of the
correct size. As a comparison, I created a node that merely copies the
source array a number of times, and found that 5 memcpy operations
produce about the same CPU load as the best nodelet case, around 30%.
Of course CPU caching may improve the speed of those 5 consecutive
memcpy operations, so I'm not saying "ROS is copying the data 5 times".

I've attached the relevant source files. The question is: Is this the
best we can do at present for image transfer, and if not, are there
modifications on the horizon that will improve performance?

Joel
--
Research Assistant
Ocean Systems Laboratory
Heriot-Watt University, UK


--
Heriot-Watt University is a Scottish charity
registered under charity number SC000278.

#include <ros/ros.h>
#include <nodelet/nodelet.h>
#include <pluginlib/class_list_macros.h>
#include <sensor_msgs/Image.h>
#include <sensor_msgs/fill_image.h>

#include "image_const.h"

namespace image_test
{

class RawImagePubTest : public nodelet::Nodelet
{
public:
    RawImagePubTest() : frameCount_(0) {}


private:
    virtual void onInit()
    {
        ros::NodeHandle& private_nh = getMTNodeHandle();
        pub_ = private_nh.advertise<sensor_msgs::Image>("camera/image", 1);
        timer_ = private_nh.createTimer(ros::Duration(0.1), &RawImagePubTest::timerCallback, this);
    }


    void timerCallback(const ros::TimerEvent& e)
    {
        ++frameCount_;
        // data is intentionally left uninitialised, so we're only seeing transfer CPU usage.
        NODELET_INFO_STREAM(frameCount_ << " Publishing raw image, size " << width << " x " << height);
        sensor_msgs::fillImage(image_, "bgr8", height, width, 3 * width, data);
        pub_.publish(image_);
    }


    ros::Timer timer_;
    ros::Publisher pub_;
    unsigned frameCount_;
    sensor_msgs::Image image_;
    unsigned char data[dataSize];
};


PLUGINLIB_DECLARE_CLASS(image_test, RawImagePubTest, image_test::RawImagePubTest, nodelet::Nodelet);
}
#include <ros/ros.h>
#include <nodelet/nodelet.h>
#include <pluginlib/class_list_macros.h>
#include <sensor_msgs/Image.h>

namespace image_test
{

class RawImageSubTest : public nodelet::Nodelet
{
public:
    RawImageSubTest() : frameCount_(0) {}


private:
    virtual void onInit()
    {
        ros::NodeHandle& private_nh = getMTNodeHandle();
        sub_ = private_nh.subscribe<sensor_msgs::Image>("camera/image", 1, &RawImageSubTest::imageCallback, this);
    }


    void imageCallback(const sensor_msgs::ImageConstPtr& image)
    {
        ++frameCount_;
        NODELET_INFO_STREAM(frameCount_ << " Received image size " << image->width << " x " << image->height);
    }


    unsigned frameCount_;
    ros::Subscriber sub_;
};


PLUGINLIB_DECLARE_CLASS(image_test, RawImageSubTest, image_test::RawImageSubTest, nodelet::Nodelet);
}
#include <ros/ros.h>
#include <nodelet/nodelet.h>
#include <pluginlib/class_list_macros.h>

#include "image_const.h"

namespace image_test
{
const int numCopies = 5;

class CopyImageTest : public nodelet::Nodelet
{
public:
    CopyImageTest() : frameCount_(0) {}


private:
    virtual void onInit()
    {
        ros::NodeHandle& private_nh = getMTNodeHandle();
        timer_ = private_nh.createTimer(ros::Duration(0.1), &CopyImageTest::timerCallback, this);
    }


    void timerCallback(const ros::TimerEvent& e)
    {
        ++frameCount_;
        // data[0] is intentionally left uninitialised, so we're only seeing copy CPU usage.
        NODELET_INFO_STREAM(frameCount_ << " Copying raw image " << numCopies << " times, size " << width << " x " << height);


        for(int i=0; i<numCopies; ++i)
        {
            memcpy(data[i+1], data[i], dataSize);
        }
    }
    ros::Timer timer_;
    unsigned frameCount_;
    unsigned char data[numCopies+1][dataSize];
};


PLUGINLIB_DECLARE_CLASS(image_test, CopyImageTest, image_test::CopyImageTest, nodelet::Nodelet);
}
#ifndef IMAGE_CONST_H_
#define IMAGE_CONST_H_

namespace image_test
{
const size_t width = 1920;
const size_t height = 1280;
const size_t dataSize = width * height * 3;
}

#endif /* IMAGE_CONST_H_ */