singe/video/blend_mmx-masm.asm
2019-11-11 14:53:02 -06:00

97 lines
No EOL
1.8 KiB
NASM

; MMX BLEND function
; by Matt Ownby
; For MASM (Microschlop) only
.486
.MMX
.model flat, c
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
.DATA
ALIGN 8
PUBLIC asm_line1
asm_line1 dq 0
PUBLIC asm_line2
asm_line2 dq 0
PUBLIC asm_dest
asm_dest dq 0
PUBLIC asm_iterations
asm_iterations dd 0
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
.CODE
PUBLIC blend_mmx
blend_mmx PROC NEAR
push ebp ; needs to be preserved
push esi ; points to asm_line2
push ebx ; # of iterations we are to do
push ecx ; # of iterations we have done
push edx ; points to asm_line1
; eax points to asm_dest, but eax doesn't need to be preserved because
; it is assumed to hold the return value
;;;;;;;;;;;;;;;;;;;;
mov ebx, asm_iterations
xor ecx, ecx
pxor mm7, mm7
pxor mm6, mm6
mov edx, dword ptr[asm_line1]
mov esi, dword ptr[asm_line2]
mov eax, dword ptr[asm_dest]
MainLoop:
movq mm0, [edx+ecx] ; load 8 bytes from asm_line1
movq mm2, mm0
punpcklbw mm0, mm6 ; convert to 16-bit, preserve bottom
punpckhbw mm2, mm7 ; convert to 16-bit, preserve top
movq mm1, [esi+ecx] ; load 8 bytes from asm_line2
movq mm3, mm1
punpcklbw mm1, mm6 ; convert to 16-bit, preserve bottom
punpckhbw mm3, mm7 ; convert to 16-bit, preserve top
; add bytes together with each other
paddw mm0, mm1
paddw mm2, mm3
; divide results by 2 (average together)
psrlw mm0, 1
psrlw mm2, 1
packuswb mm0, mm2 ; merge unpacked 16-bit words into 8 packed bytes
movq [eax+ecx], mm0 ; store final result back to system memory
add ecx, 8 ; advance index over the 8 bytes we've just handled
cmp ecx, ebx ; is our current index less than the total iterations ?
jl MainLoop ; if we have more iterations to do, then loop
;;;;;;;;;;;;;;;;;;;;;
emms
pop edx
pop ecx
pop ebx
pop esi
pop ebp
ret
blend_mmx ENDP
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
END