Much much faster HQ3X filter code [MaxSt]

This commit is contained in:
pagefault
2003-07-29 01:37:55 +00:00
parent 72aa93af20
commit 8ac67e89ae
3 changed files with 660 additions and 205 deletions

View File

@@ -23,6 +23,7 @@
%include "macros.mac"
EXTSYM vidbuffer,curblank,MMXSupport,GUIOn,GUIOn2,
EXTSYM vidbufferofsb
EXTSYM FilteredGUI,resolutn,lineleft,cfield
EXTSYM hirestiledat,newengen,SpecialLine,HalfTrans
EXTSYM hq3xFilter
@@ -37,7 +38,9 @@ NEWSYM CopyVWinAsmStart
%endif
SECTION .bss
NEWSYM firstline, resd 1
NEWSYM prevline, resd 1
NEWSYM nextline, resd 1
NEWSYM deltaptr, resd 1
NEWSYM xcounter, resd 1
NEWSYM w1, resd 1
NEWSYM w2, resd 1
@@ -417,6 +420,8 @@ NEWSYM copy768x720x16bwin
mov esi,[vidbuffer]
mov edi,[WinVidMemStart]
add esi,16*2+256*2+32*2
mov ecx,[vidbufferofsb]
mov [deltaptr],ecx
cmp byte[FilteredGUI],0
jne .filtergui
cmp byte[GUIOn2],1
@@ -460,7 +465,8 @@ nointerp:
hq3x:
mov dl,[resolutn]
mov byte[lineleft],dl
mov byte[firstline],1
mov dword[prevline],0
mov dword[nextline],576
mov ebx,hirestiledat+1
cmp byte[GUIOn],1
je .loopy
@@ -472,54 +478,106 @@ hq3x:
cmp byte[ebx],1
jbe .nohires
call HighResProc
jmp .nextline
.nohires
movzx eax,word[esi]
mov [w5],eax
mov [w6],eax
mov edx,eax
cmp byte[firstline],1
je .firstline
movzx eax,word[esi-576]
.firstline
mov [w2],eax
mov [w3],eax
cmp byte[lineleft], 1
je .lastline
movzx edx,word[esi+576]
.lastline
mov dword[xcounter],256
mov [w8],edx
mov [w9],edx
.loopx
mov eax,[w2]
mov [w1],eax
mov eax,[w5]
mov [w4],eax
mov eax,[w8]
mov [w7],eax
mov eax,[w3]
mov [w2],eax
mov eax,[w6]
mov [w5],eax
mov eax,[w9]
mov [w8],eax
cmp dword[xcounter],1
je .a
movzx eax,word[esi+2]
mov [w6],eax
cmp byte[firstline],1
je .first
movzx eax,word[esi-576+2]
.first
mov [w3],eax
mov eax,[w6]
cmp byte[lineleft],1
je .last
movzx eax,word[esi+576+2]
.last
mov [w9],eax
mov edx,[deltaptr]
mov ecx,128
mov eax,0xAAAAAAAA
.a
mov [edx],eax
add edx,4
dec ecx
jnz .a
mov [deltaptr],edx
jmp .nexty
.nohires
mov dword[xcounter],254 ; x={Xres-2, Xres-1} are special cases.
; x=0 - special case
mov edx,[deltaptr]
mov ecx,[prevline]
mov eax,[nextline]
movq mm2,[esi+ecx]
movq mm3,[esi]
movq mm4,[esi+eax]
movq mm5,mm2
movq mm6,mm3
movq mm7,mm4
pcmpeqw mm2,[edx+ecx]
pcmpeqw mm3,[edx]
pcmpeqw mm4,[edx+eax]
pand mm2,mm3
pand mm2,mm4
movd eax,mm2
inc eax
jz .loopx_end
movd eax,mm5
movzx edx,ax
mov [w1],edx
mov [w2],edx
shr eax,16
mov [w3],eax
movd eax,mm6
movzx edx,ax
mov [w4],edx
mov [w5],edx
shr eax,16
mov [w6],eax
movd eax,mm7
movzx edx,ax
mov [w7],edx
mov [w8],edx
shr eax,16
mov [w9],eax
jmp .flags
.loopx
mov edx,[deltaptr]
mov ecx,[prevline]
mov eax,[nextline]
movq mm2,[esi+ecx-2]
movq mm3,[esi-2]
movq mm4,[esi+eax-2]
movq mm5,mm2
movq mm6,mm3
movq mm7,mm4
pcmpeqw mm2,[edx+ecx-2]
pcmpeqw mm3,[edx-2]
pcmpeqw mm4,[edx+eax-2]
pand mm2,mm3
pand mm2,mm4
movd ebx,mm2
psrlq mm2,32
movd eax,mm2
cwde
and eax,ebx
inc eax
jz .loopx_end
movd eax,mm5
mov [edx+ecx-2],ax
movzx edx,ax
mov [w1],edx
shr eax,16
mov [w2],eax
psrlq mm5,32
movd eax,mm5
movzx edx,ax
mov [w3],edx
movd eax,mm6
movzx edx,ax
mov [w4],edx
shr eax,16
mov [w5],eax
psrlq mm6,32
movd eax,mm6
movzx edx,ax
mov [w6],edx
movd eax,mm7
movzx edx,ax
mov [w7],edx
shr eax,16
mov [w8],eax
psrlq mm7,32
movd eax,mm7
movzx edx,ax
mov [w9],edx
.flags
mov ebx,[RGBtoYUVPtr]
mov eax,[w5]
xor ecx,ecx
@@ -2329,7 +2387,7 @@ hq3x:
..@cross0
mov edx,eax
shl eax,16
mov ax,dx
or eax,edx
mov [edi],eax
mov [edi+4],ax
mov [edi+ebx],eax
@@ -2340,7 +2398,7 @@ hq3x:
..@cross1
mov edx,eax
shl eax,16
mov ax,dx
or eax,edx
mov ecx,[w2]
and edx,[HalfTrans]
and ecx,[HalfTrans]
@@ -2361,7 +2419,7 @@ hq3x:
..@cross2
mov edx,eax
shl eax,16
mov ax,dx
or eax,edx
mov ecx,[w4]
and edx,[HalfTrans]
and ecx,[HalfTrans]
@@ -2381,7 +2439,7 @@ hq3x:
..@cross4
mov edx,eax
shl eax,16
mov ax,dx
or eax,edx
mov ecx,[w6]
and edx,[HalfTrans]
and ecx,[HalfTrans]
@@ -2401,7 +2459,7 @@ hq3x:
..@cross8
mov edx,eax
shl eax,16
mov ax,dx
or eax,edx
mov ecx,[w8]
and edx,[HalfTrans]
and ecx,[HalfTrans]
@@ -2421,22 +2479,132 @@ hq3x:
jmp .loopx_end
.loopx_end
add esi,2
add edi,6
dec dword[xcounter]
jz .nextline
jmp .loopx
.nextline
mov byte[firstline],0
add esi,64
add edi,[AddEndBytes]
add edi,ebx
add edi,ebx
mov ebx,[InterPtr]
inc ebx
dec byte[lineleft]
jz .fin
jmp .loopy
add esi,2
add dword[deltaptr],2
add edi,6
dec dword[xcounter]
jle .xres_2
jmp .loopx
.xres_2
; x=Xres-2 - special case
jl .xres_1
mov edx,[deltaptr]
mov ecx,[prevline]
mov eax,[nextline]
movq mm2,[esi+ecx-4]
movq mm3,[esi-4]
movq mm4,[esi+eax-4]
movq mm5,mm2
movq mm6,mm3
movq mm7,mm4
pcmpeqw mm2,[edx+ecx-4]
pcmpeqw mm3,[edx-4]
pcmpeqw mm4,[edx+eax-4]
pand mm2,mm3
pand mm2,mm4
psrlq mm2,16
movd ebx,mm2
psrlq mm2,32
movd eax,mm2
cwde
and eax,ebx
inc eax
jz .loopx_end
psrlq mm5,16
psrlq mm6,16
psrlq mm7,16
movd eax,mm5
mov [edx+ecx-2],ax
movzx edx,ax
mov [w1],edx
shr eax,16
mov [w2],eax
psrlq mm5,32
movd eax,mm5
mov [w3],eax
movd eax,mm6
movzx edx,ax
mov [w4],edx
shr eax,16
mov [w5],eax
psrlq mm6,32
movd eax,mm6
mov [w6],eax
movd eax,mm7
movzx edx,ax
mov [w7],edx
shr eax,16
mov [w8],eax
psrlq mm7,32
movd eax,mm7
mov [w9],eax
jmp .flags
.xres_1
cmp dword[xcounter],-1
jl .endofline
; x=Xres-1 - special case
mov edx,[deltaptr]
mov ecx,[prevline]
mov eax,[nextline]
movq mm2,[esi+ecx-6]
movq mm3,[esi-6]
movq mm4,[esi+eax-6]
movq mm5,mm2
movq mm6,mm3
movq mm7,mm4
pcmpeqw mm2,[edx+ecx-6]
pcmpeqw mm3,[edx-6]
pcmpeqw mm4,[edx+eax-6]
pand mm2,mm3
pand mm2,mm4
psrlq mm2,32
movd eax,mm2
inc eax
jz .loopx_end
psrlq mm5,32
psrlq mm6,32
psrlq mm7,32
movd eax,mm5
mov [edx+ecx-2],eax
movzx edx,ax
mov [w1],edx
shr eax,16
mov [w2],eax
mov [w3],eax
movd eax,mm6
movzx edx,ax
mov [w4],edx
shr eax,16
mov [w5],eax
mov [w6],eax
movd eax,mm7
movzx edx,ax
mov [w7],edx
shr eax,16
mov [w8],eax
mov [w9],eax
jmp .flags
.endofline
mov ebx,[NumBytesPerLine]
.nexty
add esi,64
add dword[deltaptr],64
add edi,[AddEndBytes]
add edi,ebx
add edi,ebx
mov ebx,[InterPtr]
inc ebx
dec byte[lineleft]
jz .fin
cmp byte[lineleft],1
je .lastline
mov dword[nextline],576
mov dword[prevline],-576
jmp .loopy
.lastline
mov dword[nextline],0
mov dword[prevline],-576
jmp .loopy
.fin
emms
pop es