n mov ecx ,32 ; load counter with 32
nl1: movq mm0,[esi] ; load 8 bytes
n add esi,8 ; inc src pntr
n paddusb mm0,[edx] ; packed unsigned add
bytes
n add edx,8 ; inc src pntr
n movq [edi],mm0 ;
store 8 byte result
n add edi,8 ; inc dest pntr
n loop nz,l1 ; dec counter,
n ; repeat non zero