[ros-users] Question about cmake and SSE optimizations

Jose Gonzalez de0a100 at gmail.com
Tue Mar 23 18:47:02 UTC 2010


Here is the version I got with SSE3 (I'm not familiar with MSVC so I
didn't include it)

# check for SSE extensions
include(CheckCXXSourceRuns)
if( CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX )
 set(SSE_FLAGS)

 set(CMAKE_REQUIRED_FLAGS "-msse3")
 check_cxx_source_runs("
   #include <pmmintrin.h>

   int main()
   {
      __m128d a, b;
      double vals[2] = {0};
      a = _mm_loadu_pd(vals);
      b = _mm_hadd_pd(a,a);
      _mm_storeu_pd(vals, b);
      return 0;
   }"
   HAS_SSE3_EXTENSIONS)

 set(CMAKE_REQUIRED_FLAGS "-msse2")
 check_cxx_source_runs("
   #include <emmintrin.h>

   int main()
   {
       __m128d a, b;
       double vals[2] = {0};
       a = _mm_loadu_pd(vals);
       b = _mm_add_pd(a,a);
       _mm_storeu_pd(vals,b);
       return 0;
    }"
    HAS_SSE2_EXTENSIONS)

 set(CMAKE_REQUIRED_FLAGS "-msse")
 check_cxx_source_runs("
   #include <xmmintrin.h>
   int main()
   {
       __m128 a, b;
       float vals[4] = {0};
       a = _mm_loadu_ps(vals);
       b = a;
       b = _mm_add_ps(a,b);
       _mm_storeu_ps(vals,b);
       return 0;
   }"
   HAS_SSE_EXTENSIONS)

 set(CMAKE_REQUIRED_FLAGS)

 if(HAS_SSE3_EXTENSIONS)
   message(STATUS "Using SSE3 extensions")
   set(SSE_FLAGS "-msse3 -mfpmath=sse")
 elseif(HAS_SSE2_EXTENSIONS)
   message(STATUS "Using SSE2 extensions")
   set(SSE_FLAGS "-msse2 -mfpmath=sse")
 elseif(HAS_SSE_EXTENSIONS)
   message(STATUS "Using SSE extensions")
   set(SSE_FLAGS "-msse -mfpmath=sse")
 endif()

 add_definitions(${SSE_FLAGS})
elseif(MSVC)
 check_cxx_source_runs("
   #include <emmintrin.h>

   int main()
   {
       __m128d a, b;
       double vals[2] = {0};
       a = _mm_loadu_pd(vals);
       b = _mm_add_pd(a,a);
       _mm_storeu_pd(vals,b);
       return 0;
    }"
    HAS_SSE2_EXTENSIONS)
 if( HAS_SSE2_EXTENSIONS )
   message(STATUS "Using SSE2 extensions")
   add_definitions( "/arch:SSE2 /fp:fast -D__SSE__ -D__SSE2__" )
 endif()
endif()

On Tue, Mar 23, 2010 at 2:24 PM, Radu Bogdan Rusu <rusu at willowgarage.com> wrote:
> In terms of SSE only, we can safely go to the highest SSE version available.
>
> Brian Gerkey wrote:
>> It would be great to see this functionality wrapped up in a single
>> CMake macro, something like:
>>   rosbuild_add_sse_flags(target)
>> It would do checks, compute flags, then call
>> rosbuild_add_compile_flags(target ...).  Of course, this assumes that
>> you want the highest / best level of SSE available; I'm not familiar
>> enough with these optimizations to know if that's appropriate, or if
>> you'd rather have finer-grained control.
>>
>> I'd happily add such a macro to rosbuild, for general reuse.  Come to
>> think of it, this would be a nice module to contribute to CMake
>> itself.
>>
>>       brian.
>>
>> On Tue, Mar 23, 2010 at 11:14 AM, Radu Bogdan Rusu
>> <rusu at willowgarage.com> wrote:
>>> Great job Rosen! That's exactly what I had in mind.
>>>
>>> We should add SSE3 and SSE4. Gotta love that dot product :)
>>>
>>> Cheers,
>>> Radu.
>>>
>>> Rosen Diankov wrote:
>>>> i'm pasting cmake code i use for sse checks. it checks for both sse1
>>>> and sse2, although similar checks can be used for sse3-5, there's also
>>>> a check for msvc
>>>>
>>>> # check for SSE extensions
>>>> include(CheckCXXSourceRuns)
>>>> if( CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX )
>>>>   set(SSE_FLAGS)
>>>>
>>>>   set(CMAKE_REQUIRED_FLAGS "-msse2")
>>>>   check_cxx_source_runs("
>>>>     #include <emmintrin.h>
>>>>
>>>>     int main()
>>>>     {
>>>>         __m128d a, b;
>>>>         double vals[2] = {0};
>>>>         a = _mm_loadu_pd(vals);
>>>>         b = _mm_add_pd(a,a);
>>>>         _mm_storeu_pd(vals,b);
>>>>         return 0;
>>>>      }"
>>>>      HAS_SSE2_EXTENSIONS)
>>>>
>>>>   set(CMAKE_REQUIRED_FLAGS "-msse")
>>>>   check_cxx_source_runs("
>>>>     #include <xmmintrin.h>
>>>>     int main()
>>>>     {
>>>>         __m128 a, b;
>>>>         float vals[4] = {0};
>>>>         a = _mm_loadu_ps(vals);
>>>>         b = a;
>>>>         b = _mm_add_ps(a,b);
>>>>         _mm_storeu_ps(vals,b);
>>>>         return 0;
>>>>     }"
>>>>     HAS_SSE_EXTENSIONS)
>>>>
>>>>   set(CMAKE_REQUIRED_FLAGS)
>>>>
>>>>   if(HAS_SSE2_EXTENSIONS)
>>>>     message(STATUS "Using SSE2 extensions")
>>>>     set(SSE_FLAGS "-msse2 -mfpmath=sse")
>>>>   elseif(HAS_SSE_EXTENSIONS)
>>>>     message(STATUS "Using SSE extensions")
>>>>     set(SSE_FLAGS "-msse -mfpmath=sse")
>>>>   endif()
>>>>
>>>>   add_definitions(${SSE_FLAGS})
>>>> elseif(MSVC)
>>>>   check_cxx_source_runs("
>>>>     #include <emmintrin.h>
>>>>
>>>>     int main()
>>>>     {
>>>>         __m128d a, b;
>>>>         double vals[2] = {0};
>>>>         a = _mm_loadu_pd(vals);
>>>>         b = _mm_add_pd(a,a);
>>>>         _mm_storeu_pd(vals,b);
>>>>         return 0;
>>>>      }"
>>>>      HAS_SSE2_EXTENSIONS)
>>>>   if( HAS_SSE2_EXTENSIONS )
>>>>     message(STATUS "Using SSE2 extensions")
>>>>     add_definitions( "/arch:SSE2 /fp:fast -D__SSE__ -D__SSE2__" )
>>>>   endif()
>>>> endif()
>>>>
>>>>
>>>>
>>>>
>>>>
>>>> 2010/3/24 Jose Gonzalez <de0a100 at gmail.com>:
>>>>> Thanks for the comments?
>>>>>
>>>>> What about  check_cxx_compiler_flag("-msse2" COMPILER_SUPPORT_SSE2)?
>>>>> Have you ever tried it?
>>>>>
>>>>> On Tue, Mar 23, 2010 at 12:16 PM, Radu Bogdan Rusu
>>>>> <rusu at willowgarage.com> wrote:
>>>>>> Depending on what compiler and cpu you use, some of these flags might be already turned on by default. You can check
>>>>>> that using:
>>>>>>
>>>>>> $ gcc -Q -O3 --help=target
>>>>>>
>>>>>> for example.
>>>>>>
>>>>>> Other than that, we don't have a proper way to enable/disable different SSE optimizations for now. Most ROS packages
>>>>>> cram a add_definitions ("-msse... etc") in their CMakeLists.txt.
>>>>>>
>>>>>> Patches to make this more standard across multiple platforms and enable/disable SSE1-5 automatically would be great!
>>>>>>
>>>>>> Cheers,
>>>>>> Radu.
>>>>>>
>>>>>> Jose Gonzalez wrote:
>>>>>>> Hi,
>>>>>>>
>>>>>>> Doing some experiments in the past with OpenCV (linux/g++) I realized
>>>>>>> that there is a significant performance difference between the
>>>>>>> versions compiled with autoconf and cmake. Digging in the compilation
>>>>>>> scripts I came to the conclusion that the only extra flags that were
>>>>>>> added by the autoconf scripts were the SSE optimizations (and openMP).
>>>>>>>
>>>>>>> Talking about the SSE optimizations, I can see how ROS sets the flags
>>>>>>> in a similar way. When the detected platform is i686, all the SSE
>>>>>>> optimizations are disabled by default (there is a comment "SSE/SSE2
>>>>>>> might probably be not available for some old AMD or VIA processors").
>>>>>>> Before finding out a solution by myself I'd like to ask the list if
>>>>>>> somebody knows a standard way of testing for SSE optimizations in
>>>>>>> cmake.
>>>>>>>
>>>>>>> Thanks,
>>>>>>> Jose
>>>>>>>
>>>>>>> ------------------------------------------------------------------------------
>>>>>>> Download Intel® Parallel Studio Eval
>>>>>>> Try the new software tools for yourself. Speed compiling, find bugs
>>>>>>> proactively, and fine-tune applications for parallel performance.
>>>>>>> See why Intel Parallel Studio got high marks during beta.
>>>>>>> http://p.sf.net/sfu/intel-sw-dev
>>>>>>> _______________________________________________
>>>>>>> ros-users mailing list
>>>>>>> ros-users at lists.sourceforge.net
>>>>>>> https://lists.sourceforge.net/lists/listinfo/ros-users
>>>>>>> _______________________________________________
>>>>>>> ros-users mailing list
>>>>>>> ros-users at code.ros.org
>>>>>>> https://code.ros.org/mailman/listinfo/ros-users
>>>>>> --
>>>>>> | Radu Bogdan Rusu | http://rbrusu.com/
>>>>>> _______________________________________________
>>>>>> ros-users mailing list
>>>>>> ros-users at code.ros.org
>>>>>> https://code.ros.org/mailman/listinfo/ros-users
>>>>>>
>>>>> _______________________________________________
>>>>> ros-users mailing list
>>>>> ros-users at code.ros.org
>>>>> https://code.ros.org/mailman/listinfo/ros-users
>>>>>
>>> --
>>> | Radu Bogdan Rusu | http://rbrusu.com/
>>> _______________________________________________
>>> ros-users mailing list
>>> ros-users at code.ros.org
>>> https://code.ros.org/mailman/listinfo/ros-users
>>>
>
> --
> | Radu Bogdan Rusu | http://rbrusu.com/
> _______________________________________________
> ros-users mailing list
> ros-users at code.ros.org
> https://code.ros.org/mailman/listinfo/ros-users
>



More information about the ros-users mailing list