FIAS . Impressum . Privacy

performance issure in Vc::Memory

Sandro Wenzel [please enable javascript to see the address]
Wed Mar 12 20:36:24 CET 2014


Dear All,

By inspecting some assembly, I have recently made a surprising observation
on the behaviour of Vc::Memory.
The problem appears when I try to copy between Vc::Memory objects ( see
function bar2 ) and I realized that this copy is not using vector
instructions as I would have thought. In contrast, I have implement the
copy directly on vectors ( as in function bar4 ), I am getting the expected
assembly.

I compile the examples below with
g++-[4.7|4.8] -O3 -mavx ...
with Vc0.8


#include <Vc/Vc>

// result ok
void bar1( Vc::Vector<double> const & a,  Vc::Vector<double> & b)
{
  b=a;
}

// result surprising
void bar2( Vc::Memory<Vc::Vector<double>, 5> const & a,
Vc::Memory<Vc::Vector<double>, 5>  & b)
{
  b=a;
}

// result ok
void bar3( Vc::Memory<Vc::Vector<double>, 5> const & a,
Vc::Memory<Vc::Vector<double>, 5>  & b)
{
  b+=a;
}

// result ok
void bar4( Vc::Memory<Vc::Vector<double>, 5> const & a,
Vc::Memory<Vc::Vector<double>, 5>  & b)
{
  for( int i=0;i<1+5/Vc::Vector<double>::Size;++i )
    {
      Vc::Vector<double> tmp=a.vector(i); // this intermediate is forced by
some compilers because a direct assignment b.vector(i)=a.vector(i) is not
understood
      b.vector(i) = tmp;
    }
}

/// assembly follows:

0000000000000000 <_Z4bar1RKN2Vc3AVX6VectorIdEERS2_>:
   0:   c5 fd 28 07             vmovapd (%rdi),%ymm0
   4:   c5 fd 29 06             vmovapd %ymm0,(%rsi)
   8:   c5 f8 77                vzeroupper
   b:   c3                      retq
   c:   0f 1f 40 00             nopl   0x0(%rax)

0000000000000010 <_Z4bar2RKN2Vc6MemoryINS_3AVX6VectorIdEELm5ELm0EEERS4_>:
  10:   48 8b 07                mov    (%rdi),%rax
  13:   48 89 06                mov    %rax,(%rsi)
  16:   48 8b 47 08             mov    0x8(%rdi),%rax
  1a:   48 89 46 08             mov    %rax,0x8(%rsi)
  1e:   48 8b 47 10             mov    0x10(%rdi),%rax
  22:   48 89 46 10             mov    %rax,0x10(%rsi)
  26:   48 8b 47 18             mov    0x18(%rdi),%rax
  2a:   48 89 46 18             mov    %rax,0x18(%rsi)
  2e:   48 8b 47 20             mov    0x20(%rdi),%rax
  32:   48 89 46 20             mov    %rax,0x20(%rsi)
  36:   48 8b 47 28             mov    0x28(%rdi),%rax
  3a:   48 89 46 28             mov    %rax,0x28(%rsi)
  3e:   48 8b 47 30             mov    0x30(%rdi),%rax
  42:   48 89 46 30             mov    %rax,0x30(%rsi)
  46:   48 8b 47 38             mov    0x38(%rdi),%rax
  4a:   48 89 46 38             mov    %rax,0x38(%rsi)
  4e:   c3                      retq
  4f:   90                      nop

0000000000000050 <_Z4bar3RKN2Vc6MemoryINS_3AVX6VectorIdEELm5ELm0EEERS4_>:
  50:   c5 fd 28 06             vmovapd (%rsi),%ymm0
  54:   c5 fd 58 07             vaddpd (%rdi),%ymm0,%ymm0
  58:   c5 fd 29 06             vmovapd %ymm0,(%rsi)
  5c:   c5 fd 28 46 20          vmovapd 0x20(%rsi),%ymm0
  61:   c5 fd 58 47 20          vaddpd 0x20(%rdi),%ymm0,%ymm0
  66:   c5 fd 29 46 20          vmovapd %ymm0,0x20(%rsi)
  6b:   c5 f8 77                vzeroupper
  6e:   c3                      retq
  6f:   90                      nop

0000000000000070 <_Z4bar4RKN2Vc6MemoryINS_3AVX6VectorIdEELm5ELm0EEERS4_>:
  70:   c5 fd 28 07             vmovapd (%rdi),%ymm0
  74:   c5 fd 29 06             vmovapd %ymm0,(%rsi)
  78:   c5 fd 28 47 20          vmovapd 0x20(%rdi),%ymm0
  7d:   c5 fd 29 46 20          vmovapd %ymm0,0x20(%rsi)
  82:   c5 f8 77                vzeroupper
  85:   c3                      retq


-------------

Best

Sandro

-- 
Dr. Sandro Wenzel
PH / SFT
CERN
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://compeng.uni-frankfurt.de/pipermail/vc/attachments/20140312/8f05b022/attachment.html>


More information about the Vc mailing list
FIAS . Impressum . Privacy