97 lines
No EOL
1.8 KiB
NASM
97 lines
No EOL
1.8 KiB
NASM
; MMX BLEND function
|
|
; by Matt Ownby
|
|
; For MASM (Microschlop) only
|
|
|
|
.486
|
|
.MMX
|
|
|
|
.model flat, c
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
.DATA
|
|
|
|
ALIGN 8
|
|
|
|
PUBLIC asm_line1
|
|
asm_line1 dq 0
|
|
|
|
PUBLIC asm_line2
|
|
asm_line2 dq 0
|
|
|
|
PUBLIC asm_dest
|
|
asm_dest dq 0
|
|
|
|
PUBLIC asm_iterations
|
|
asm_iterations dd 0
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
.CODE
|
|
|
|
PUBLIC blend_mmx
|
|
blend_mmx PROC NEAR
|
|
|
|
push ebp ; needs to be preserved
|
|
push esi ; points to asm_line2
|
|
push ebx ; # of iterations we are to do
|
|
push ecx ; # of iterations we have done
|
|
push edx ; points to asm_line1
|
|
; eax points to asm_dest, but eax doesn't need to be preserved because
|
|
; it is assumed to hold the return value
|
|
|
|
;;;;;;;;;;;;;;;;;;;;
|
|
|
|
mov ebx, asm_iterations
|
|
xor ecx, ecx
|
|
pxor mm7, mm7
|
|
pxor mm6, mm6
|
|
mov edx, dword ptr[asm_line1]
|
|
mov esi, dword ptr[asm_line2]
|
|
mov eax, dword ptr[asm_dest]
|
|
|
|
MainLoop:
|
|
movq mm0, [edx+ecx] ; load 8 bytes from asm_line1
|
|
movq mm2, mm0
|
|
|
|
punpcklbw mm0, mm6 ; convert to 16-bit, preserve bottom
|
|
punpckhbw mm2, mm7 ; convert to 16-bit, preserve top
|
|
|
|
movq mm1, [esi+ecx] ; load 8 bytes from asm_line2
|
|
movq mm3, mm1
|
|
|
|
punpcklbw mm1, mm6 ; convert to 16-bit, preserve bottom
|
|
punpckhbw mm3, mm7 ; convert to 16-bit, preserve top
|
|
|
|
; add bytes together with each other
|
|
paddw mm0, mm1
|
|
paddw mm2, mm3
|
|
|
|
; divide results by 2 (average together)
|
|
psrlw mm0, 1
|
|
psrlw mm2, 1
|
|
|
|
packuswb mm0, mm2 ; merge unpacked 16-bit words into 8 packed bytes
|
|
movq [eax+ecx], mm0 ; store final result back to system memory
|
|
|
|
add ecx, 8 ; advance index over the 8 bytes we've just handled
|
|
cmp ecx, ebx ; is our current index less than the total iterations ?
|
|
|
|
jl MainLoop ; if we have more iterations to do, then loop
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
emms
|
|
|
|
pop edx
|
|
pop ecx
|
|
pop ebx
|
|
pop esi
|
|
pop ebp
|
|
|
|
ret
|
|
blend_mmx ENDP
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
END |