<div dir="ltr"><div>Dear All,</div><div><br></div><div>By inspecting some assembly, I have recently made a surprising observation on the behaviour of Vc::Memory.</div><div>The problem appears when I try to copy between Vc::Memory objects ( see function bar2 ) and I realized that this copy is not using vector instructions as I would have thought. In contrast, I have implement the copy directly on vectors ( as in function bar4 ), I am getting the expected assembly.</div>
<div><br></div><div>I compile the examples below with</div><div>g++-[4.7|4.8] -O3 -mavx ...</div><div>with Vc0.8</div><div><br></div><div><br></div><div>#include <Vc/Vc></div><div><br></div><div>// result ok</div><div>
void bar1( Vc::Vector<double> const & a, Vc::Vector<double> & b)</div><div>{</div><div> b=a;</div><div>}</div><div><br></div><div>// result surprising</div><div>void bar2( Vc::Memory<Vc::Vector<double>, 5> const & a, Vc::Memory<Vc::Vector<double>, 5> & b)</div>
<div>{</div><div> b=a;</div><div>}</div><div><br></div><div>// result ok</div><div>void bar3( Vc::Memory<Vc::Vector<double>, 5> const & a, Vc::Memory<Vc::Vector<double>, 5> & b)</div><div>
{</div><div> b+=a;</div><div>}</div><div><br></div><div>// result ok</div><div>void bar4( Vc::Memory<Vc::Vector<double>, 5> const & a, Vc::Memory<Vc::Vector<double>, 5> & b)</div><div>{</div>
<div> for( int i=0;i<1+5/Vc::Vector<double>::Size;++i )</div><div> {</div><div> Vc::Vector<double> tmp=a.vector(i); // this intermediate is forced by some compilers because a direct assignment b.vector(i)=a.vector(i) is not understood</div>
<div> b.vector(i) = tmp;</div><div> }</div><div>}</div><div><br></div><div>/// assembly follows:</div><br clear="all"><div><div>0000000000000000 <_Z4bar1RKN2Vc3AVX6VectorIdEERS2_>:</div><div> 0: c5 fd 28 07 vmovapd (%rdi),%ymm0</div>
<div> 4: c5 fd 29 06 vmovapd %ymm0,(%rsi)</div><div> 8: c5 f8 77 vzeroupper </div><div> b: c3 retq </div><div> c: 0f 1f 40 00 nopl 0x0(%rax)</div>
<div><br></div><div>0000000000000010 <_Z4bar2RKN2Vc6MemoryINS_3AVX6VectorIdEELm5ELm0EEERS4_>:</div><div> 10: 48 8b 07 mov (%rdi),%rax</div><div> 13: 48 89 06 mov %rax,(%rsi)</div>
<div> 16: 48 8b 47 08 mov 0x8(%rdi),%rax</div><div> 1a: 48 89 46 08 mov %rax,0x8(%rsi)</div><div> 1e: 48 8b 47 10 mov 0x10(%rdi),%rax</div><div> 22: 48 89 46 10 mov %rax,0x10(%rsi)</div>
<div> 26: 48 8b 47 18 mov 0x18(%rdi),%rax</div><div> 2a: 48 89 46 18 mov %rax,0x18(%rsi)</div><div> 2e: 48 8b 47 20 mov 0x20(%rdi),%rax</div><div> 32: 48 89 46 20 mov %rax,0x20(%rsi)</div>
<div> 36: 48 8b 47 28 mov 0x28(%rdi),%rax</div><div> 3a: 48 89 46 28 mov %rax,0x28(%rsi)</div><div> 3e: 48 8b 47 30 mov 0x30(%rdi),%rax</div><div> 42: 48 89 46 30 mov %rax,0x30(%rsi)</div>
<div> 46: 48 8b 47 38 mov 0x38(%rdi),%rax</div><div> 4a: 48 89 46 38 mov %rax,0x38(%rsi)</div><div> 4e: c3 retq </div><div> 4f: 90 nop</div>
<div><br></div><div>0000000000000050 <_Z4bar3RKN2Vc6MemoryINS_3AVX6VectorIdEELm5ELm0EEERS4_>:</div><div> 50: c5 fd 28 06 vmovapd (%rsi),%ymm0</div><div> 54: c5 fd 58 07 vaddpd (%rdi),%ymm0,%ymm0</div>
<div> 58: c5 fd 29 06 vmovapd %ymm0,(%rsi)</div><div> 5c: c5 fd 28 46 20 vmovapd 0x20(%rsi),%ymm0</div><div> 61: c5 fd 58 47 20 vaddpd 0x20(%rdi),%ymm0,%ymm0</div><div> 66: c5 fd 29 46 20 vmovapd %ymm0,0x20(%rsi)</div>
<div> 6b: c5 f8 77 vzeroupper </div><div> 6e: c3 retq </div><div> 6f: 90 nop</div><div><br></div><div>0000000000000070 <_Z4bar4RKN2Vc6MemoryINS_3AVX6VectorIdEELm5ELm0EEERS4_>:</div>
<div> 70: c5 fd 28 07 vmovapd (%rdi),%ymm0</div><div> 74: c5 fd 29 06 vmovapd %ymm0,(%rsi)</div><div> 78: c5 fd 28 47 20 vmovapd 0x20(%rdi),%ymm0</div><div> 7d: c5 fd 29 46 20 vmovapd %ymm0,0x20(%rsi)</div>
<div> 82: c5 f8 77 vzeroupper </div><div> 85: c3 retq </div><div><br></div></div><div><br></div><div>-------------</div><div><br></div><div>Best</div><div><br></div><div>Sandro</div>
<div><br></div>-- <br><div dir="ltr">Dr. Sandro Wenzel<br><div>PH / SFT</div><div>CERN <br><br></div></div></div>