singe/video/blend_mmx-gas.s
2019-11-11 14:53:02 -06:00

89 lines
1.8 KiB
ArmAsm

# MMX BLEND function
# by Matt Ownby - Feb 10th, 2004
# For GNU Assembler
.data
.globl _asm_line1
_asm_line1:
.space 8
.globl _asm_line2
_asm_line2:
.space 8
.globl _asm_dest
_asm_dest:
.space 8
.globl _asm_iterations
_asm_iterations:
.space 4
####################################
.text
.globl _blend_mmx
_blend_mmx:
push %ebp # needs to be preserved
push %esi # points to asm_line2
push %ebx # # of iterations we are to do
push %ecx # # of iterations we have done
push %edx # points to asm_line1
# eax points to asm_dest, but eax doesn't need to be preserved because
# it is assumed to hold the return value
####################
movl _asm_iterations, %ebx
xorl %ecx, %ecx
pxor %mm7, %mm7
pxor %mm6, %mm6
movl _asm_line1, %edx
movl _asm_line2, %esi
movl _asm_dest, %eax
MainLoop:
movq (%edx,%ecx), %mm0 # load 8 bytes from asm_line1
movq %mm0, %mm2
punpcklbw %mm6, %mm0 # convert to 16-bit, preserve bottom
punpckhbw %mm7, %mm2 # convert to 16-bit, preserve top
movq (%esi,%ecx), %mm1 # load 8 bytes from asm_line2
movq %mm1, %mm3
punpcklbw %mm6, %mm1 # convert to 16-bit, preserve bottom
punpckhbw %mm7, %mm3 # convert to 16-bit, preserve top
# add bytes together with each other
paddw %mm1, %mm0
paddw %mm3, %mm2
# divide results by 2 (average together)
psrlw $1, %mm0
psrlw $1, %mm2
packuswb %mm2, %mm0 # merge unpacked 16-bit words into 8 packed bytes
movq %mm0, (%eax, %ecx) # store final result back to system memory
add $8, %ecx # advance index over the 8 bytes we've just handled
cmp %ebx, %ecx # is our current index less than the total iterations ?
jl MainLoop # if we have more iterations to do, then loop
#####################
emms
pop %edx
pop %ecx
pop %ebx
pop %esi
pop %ebp
ret
####################################################