Fixed colour bleeding problem (Thanks TRAC!)

This commit is contained in:
pagefault
2003-03-24 20:04:03 +00:00
parent 5a3b655b77
commit 52b3109da4

View File

@@ -2773,7 +2773,6 @@ drawsprngw16bmsnthr:
xor edi,edi xor edi,edi
normalwsprng16b sprdrawprawb16bngmsnthr,sprdrawprbwb16bngmsnthr normalwsprng16b sprdrawprawb16bngmsnthr,sprdrawprbwb16bngmsnthr
ProcessTransparencies: ProcessTransparencies:
cmp byte[NGNoTransp],0 cmp byte[NGNoTransp],0
je .yestransp je .yestransp
@@ -2910,6 +2909,7 @@ ProcessTransparencies:
jnz near .fullsubtract jnz near .fullsubtract
push ebx push ebx
push esi push esi
push edi ;
mov ecx,256 mov ecx,256
mov ebp,[HalfTrans] mov ebp,[HalfTrans]
xor edx,edx xor edx,edx
@@ -2920,16 +2920,50 @@ ProcessTransparencies:
test ax,bx test ax,bx
jz .notranspfa jz .notranspfa
mov dx,[esi+75036*2] mov dx,[esi+75036*2]
mov ebp,((1<<5)+(1<<10)+(1<<15))
mov edi,edx
; get LSBs
and edi,ebp
and edx,~((1<<5)+(1<<10)+(1<<15))
and ebp,eax
and eax,~((1<<5)+(1<<10)+(1<<15))
; add LSBs
add ebp,edi
mov edi,((2<<5)+(2<<10)+(2<<15))
; add colors
add edx,eax
and edi,ebp
mov eax,((1<<5)+(1<<10)+(1<<15))
add edx,edi
and ebp,eax
and eax,edx
add edx,ebp
; Perform saturation
; save carry bits
mov edi,eax
; correct for carry/overflow into LSBs of adjacent color components
sub edx,eax
shr edi,5 ; shift carry bits down to color LSBs
neg edi ; subtract from zero to get mask...
add edi,eax ; add in carry bits to correct for borrows
or edx,edi ; apply saturation mask
%if 0
and eax,ebp and eax,ebp
and edx,ebp and edx,ebp
add edx,eax add edx,eax
shr edx,1 shr edx,1
mov dx,[fulladdtab+edx*2] mov dx,[fulladdtab+edx*2]
%endif
mov [esi],dx mov [esi],dx
.notranspfa .notranspfa
add esi,2 add esi,2
dec ecx dec ecx
jnz .nextfa jnz .nextfa
pop edi ;
pop esi pop esi
pop ebx pop ebx
jmp .donetransp jmp .donetransp
@@ -3007,10 +3041,13 @@ ProcessTransparencies:
dec ecx dec ecx
jnz .faddl2h jnz .faddl2h
jmp .faddloopdoneh jmp .faddloopdoneh
.prochalfaddnext
movq [esi-8],mm0
.prochalfadd .prochalfadd
test dword[esi+75036*2],eax test [esi+75036*2],eax
jnz near .faddloopbh jnz near .faddloopbh
test dword[esi+75036*2+4],eax test [esi+75036*2+4],eax
jnz near .faddloopbh jnz near .faddloopbh
mov ebx,[esi] mov ebx,[esi]
and ebx,eax and ebx,eax
@@ -3022,18 +3059,19 @@ ProcessTransparencies:
jne near .faddlooph jne near .faddlooph
.prochalfadddo .prochalfadddo
movq mm0,[esi] movq mm0,[esi]
movq mm1,[esi+75036*2]
pand mm0,[HalfTrans] pand mm0,[HalfTrans]
pand mm1,[HalfTrans] movq mm1,[esi+75036*2]
psrlw mm0,1 psrlw mm0,1
pand mm1,[HalfTrans]
psrlw mm1,1 psrlw mm1,1
paddw mm0,mm1 paddw mm0,mm1
movq [esi],mm0 add esi,byte 8
add esi,8
dec ecx dec ecx
jnz .prochalfadd jnz .prochalfaddnext
movq [esi-8],mm0
jmp .faddloopdoneh jmp .faddloopdoneh
.procfulladdnext:
.procfulladdnext
movq [esi-8],mm0 movq [esi-8],mm0
.procfulladd .procfulladd
mov ebx,[esi] mov ebx,[esi]
@@ -3068,34 +3106,36 @@ ProcessTransparencies:
psllw mm2,%2 psllw mm2,%2
paddusw mm0,mm1 paddusw mm0,mm1
pand mm0,[FullBitAnd] pand mm0,[FullBitAnd]
psllw mm3,%3
movq mm1,mm4 movq mm1,mm4
psllw mm4,%2 psllw mm4,%2
add esi,byte 8 paddusw mm2,mm4
%if %1>0 %if %1>0
psrlw mm0,%1 psrlw mm0,%1
%endif %endif
paddusw mm2,mm4
psllw mm3,%3
pand mm2,[FullBitAnd] pand mm2,[FullBitAnd]
psllw mm1,%3 psllw mm1,%3
psrlw mm2,%2 psrlw mm2,%2
paddusw mm3,mm1 paddusw mm3,mm1
por mm0,mm2
pand mm3,[FullBitAnd] pand mm3,[FullBitAnd]
por mm0,mm2
psrlw mm3,%3 psrlw mm3,%3
add esi,byte 8
por mm0,mm3 por mm0,mm3
dec ecx dec ecx
jnz near .procfulladdnext jnz near .procfulladdnext
movq [esi],mm0 movq [esi-8],mm0
jmp .faddloopdoneh jmp .faddloopdoneh
.faddloophnext
movq [esi-8],mm0
.faddlooph .faddlooph
mov ebx,dword[esi] mov ebx,[esi]
test ebx,eax test ebx,eax
jz near .faddl2h jz near .faddl2h
and ebx,eax and ebx,eax
cmp ebx,eax cmp ebx,eax
jne .faddloopbh jne .faddloopbh
mov ebx,dword[esi+4] mov ebx,[esi+4]
and ebx,eax and ebx,eax
cmp ebx,eax cmp ebx,eax
jne .faddloopbh jne .faddloopbh
@@ -3111,66 +3151,67 @@ ProcessTransparencies:
jne .faddloopbh jne .faddloopbh
jmp .procfulladddo jmp .procfulladddo
.faddla .faddla
test dword[esi+75036*2+4],eax test [esi+75036*2+4],eax
jz near .prochalfadddo jz near .prochalfadddo
.faddloopbh .faddloopbh
movq mm0,[esi] movq mm0,[esi]
movq mm5,mm0
movq mm6,mm0
pand mm5,[UnusedBitXor]
movq mm1,[esi+75036*2] movq mm1,[esi+75036*2]
movq mm5,mm0
pand mm5,[UnusedBitXor]
movq mm7,mm1 movq mm7,mm1
movq mm2,mm0
pand mm1,[UnusedBitXor] pand mm1,[UnusedBitXor]
movq mm3,mm0 movq mm2,mm0
movq mm4,mm1 movq mm4,mm1
%if %1>0 %if %1>0
psllw mm0,%1 psllw mm0,%1
psllw mm1,%1 psllw mm1,%1
%endif %endif
movq mm3,mm2
psllw mm2,%2
movq mm6,mm3
psllw mm3,%3
paddusw mm0,mm1 paddusw mm0,mm1
pand mm0,[FullBitAnd]
movq mm1,mm4 movq mm1,mm4
psllw mm4,%2
pand mm0,[FullBitAnd]
paddusw mm2,mm4
movq mm4,mm1
psllw mm1,%3
pand mm2,[FullBitAnd]
paddusw mm3,mm1
psrlw mm2,%2
%if %1>0 %if %1>0
psrlw mm0,%1 psrlw mm0,%1
%endif %endif
psllw mm2,%2
psllw mm1,%2
paddusw mm2,mm1
pand mm2,[FullBitAnd]
movq mm1,mm4
psrlw mm2,%2
psllw mm3,%3
psllw mm1,%3
paddusw mm3,mm1
pand mm3,[FullBitAnd] pand mm3,[FullBitAnd]
psrlw mm3,%3
por mm0,mm3
por mm0,mm2 por mm0,mm2
pand mm6,[UnusedBit] pand mm6,[UnusedBit]
psrlw mm3,%3
pcmpeqw mm6,[UnusedBit] pcmpeqw mm6,[UnusedBit]
por mm0,mm3
pand mm7,[UnusedBit]
pand mm4,mm6
pand mm4,[HalfTrans]
pand mm0,mm6 pand mm0,mm6
movq mm1,mm5 movq mm1,mm5
pand mm1,mm6
pand mm4,mm6
pxor mm6,[UnusedBitXor]
pand mm5,mm6 pand mm5,mm6
pand mm7,[UnusedBit] pand mm5,[HalfTrans]
pand mm4,[HalfTrans]
pand mm1,[HalfTrans]
psrlw mm1,1
psrlw mm4,1 psrlw mm4,1
paddw mm1,mm4
pcmpeqw mm7,[UnusedBit] pcmpeqw mm7,[UnusedBit]
psrlw mm5,1
; pxor mm6,[UnusedBitXor]
pand mm0,mm7 pand mm0,mm7
pxor mm7,[UnusedBitXor] pxor mm7,[UnusedBitXor]
pand mm1,mm7 paddw mm5,mm4
por mm0,mm1 ; pand mm6,mm1
pandn mm6,mm1
pand mm5,mm7
por mm0,mm6
por mm0,mm5 por mm0,mm5
movq [esi],mm0 add esi,byte 8
add esi,8
dec ecx dec ecx
jnz near .faddlooph jnz near .faddloophnext
movq [esi-8],mm0
.faddloopdoneh .faddloopdoneh
pop ebx pop ebx
pop esi pop esi
@@ -3182,71 +3223,75 @@ ProcessTransparencies:
mov ecx,64 mov ecx,64
mov eax,[UnusedBit] mov eax,[UnusedBit]
.fsubl2h .fsubl2h
test dword[esi],eax test [esi],eax
jnz .fsubloopbh jnz .fsubloopbh
test dword[esi+4],eax .fsubl2h_2
test [esi+4],eax
jnz .fsubloopbh jnz .fsubloopbh
add esi,8 add esi,byte 8
dec ecx dec ecx
jnz .fsubl2h jnz .fsubl2h
jmp .fsubloopdoneh jmp .fsubloopdoneh
.fsubloophnext
movq [esi-8],mm0
.fsublooph .fsublooph
test dword[esi],eax test [esi],eax
jz .fsubl2h jz .fsubl2h_2
.fsubloopbh .fsubloopbh
movq mm0,[esi] movq mm0,[esi]
movq mm5,mm0
movq mm6,mm0
pxor mm0,[UnusedBitXor]
pand mm5,[UnusedBitXor]
movq mm1,[esi+75036*2] movq mm1,[esi+75036*2]
movq mm5,mm0
pxor mm0,[UnusedBitXor]
movq mm7,mm1 movq mm7,mm1
movq mm2,mm0
pand mm1,[UnusedBitXor] pand mm1,[UnusedBitXor]
movq mm3,mm0 movq mm2,mm0
movq mm4,mm1 movq mm4,mm1
%if %1>0 %if %1>0
psllw mm0,%1 psllw mm0,%1
psllw mm1,%1 psllw mm1,%1
%endif %endif
movq mm3,mm2
paddusw mm0,mm1 paddusw mm0,mm1
psllw mm2,%2
pand mm0,[FullBitAnd] pand mm0,[FullBitAnd]
movq mm1,mm4 movq mm1,mm4
psllw mm4,%2
movq mm6,mm5
paddusw mm2,mm4
psllw mm3,%3
pand mm2,[FullBitAnd]
psllw mm1,%3
%if %1>0 %if %1>0
psrlw mm0,%1 psrlw mm0,%1
%endif %endif
psllw mm2,%2 paddusw mm3,mm1
psllw mm1,%2
paddusw mm2,mm1
pand mm2,[FullBitAnd]
psrlw mm2,%2
psllw mm3,%3
psllw mm4,%3
paddusw mm3,mm4
pand mm3,[FullBitAnd] pand mm3,[FullBitAnd]
psrlw mm2,%2
pand mm5,[UnusedBitXor]
psrlw mm3,%3 psrlw mm3,%3
por mm0,mm3
por mm0,mm2
pand mm6,[UnusedBit] pand mm6,[UnusedBit]
pxor mm0,[UnusedBitXor] por mm0,mm2
pcmpeqw mm6,[UnusedBit] pcmpeqw mm6,[UnusedBit]
por mm0,mm3
pxor mm0,[UnusedBitXor]
pand mm0,mm6 pand mm0,mm6
pxor mm6,[UnusedBitXor] add esi,byte 8
pand mm5,mm6 ; pxor mm6,[UnusedBitXor]
pand mm7,[UnusedBit]
movq mm1,mm0 movq mm1,mm0
pand mm1,[HalfTrans] pand mm7,[UnusedBit]
psrlw mm1,1 ; pand mm6,mm5
pandn mm6,mm5
pand mm0,[HalfTrans]
psrlw mm0,1
pcmpeqw mm7,[UnusedBit] pcmpeqw mm7,[UnusedBit]
pand mm0,mm7
pxor mm7,[UnusedBitXor]
pand mm1,mm7 pand mm1,mm7
pxor mm7,[UnusedBitXor]
por mm1,mm6
pand mm0,mm7
por mm0,mm1 por mm0,mm1
por mm0,mm5
movq [esi],mm0
add esi,8
dec ecx dec ecx
jnz near .fsublooph jnz near .fsubloophnext
movq [esi-8],mm0
.fsubloopdoneh .fsubloopdoneh
pop esi pop esi
pop ebx pop ebx
@@ -3260,23 +3305,24 @@ ProcessTransparencies:
mov ecx,64 mov ecx,64
mov eax,[UnusedBit] mov eax,[UnusedBit]
.faddl2 .faddl2
test dword[esi],eax test [esi],eax
jnz .faddloopb jnz .faddloopb
.faddl2_2 .faddl2_2
test dword[esi+4],eax test [esi+4],eax
jnz .faddloopb jnz .faddloopb
add esi,8 add esi,byte 8
dec ecx dec ecx
jnz .faddl2 jnz .faddl2
jmp .faddloopdone jmp .faddloopdone
.faddloopnext .faddloopnext
movq [esi-8],mm0 movq [esi-8],mm0
.faddloop .faddloop
test dword[esi],eax test [esi],eax
jz .faddl2_2 jz .faddl2_2
.faddloopb .faddloopb
movq mm0,[esi] movq mm0,[esi]
movq mm1,[esi+75036*2] movq mm1,[esi+75036*2]
; save old pixels to preserve any not needing arithmetic
movq mm6,mm0 movq mm6,mm0
pand mm0,[UnusedBitXor] pand mm0,[UnusedBitXor]
movq mm4,mm1 movq mm4,mm1
@@ -3285,9 +3331,11 @@ ProcessTransparencies:
psllw mm0,%1 psllw mm0,%1
psllw mm1,%1 psllw mm1,%1
movq mm3,mm2 movq mm3,mm2
pand mm1,[FullBitAnd] ;*
movq mm5,mm2 movq mm5,mm2
%else %else
movq mm3,mm0 movq mm3,mm0
pand mm1,[FullBitAnd] ;*
movq mm5,mm0 movq mm5,mm0
%endif %endif
paddusw mm0,mm1 paddusw mm0,mm1
@@ -3295,10 +3343,12 @@ ProcessTransparencies:
psllw mm2,%2 psllw mm2,%2
movq mm1,mm4 movq mm1,mm4
psllw mm4,%2 psllw mm4,%2
pand mm4,[FullBitAnd] ;*
paddusw mm2,mm4 paddusw mm2,mm4
psllw mm3,%3 psllw mm3,%3
pand mm2,[FullBitAnd] pand mm2,[FullBitAnd]
psllw mm1,%3 psllw mm1,%3
pand mm1,[FullBitAnd] ;*
psrlw mm2,%2 psrlw mm2,%2
paddusw mm3,mm1 paddusw mm3,mm1
pand mm3,[FullBitAnd] pand mm3,[FullBitAnd]
@@ -3306,15 +3356,18 @@ ProcessTransparencies:
psrlw mm0,%1 psrlw mm0,%1
%endif %endif
psrlw mm3,%3 psrlw mm3,%3
; get alpha bits
pand mm6,[UnusedBit] pand mm6,[UnusedBit]
por mm0,mm2 por mm0,mm2
; generate mask for combining pixels with and without arithmetic
pcmpeqw mm6,[UnusedBit] pcmpeqw mm6,[UnusedBit]
por mm0,mm3 por mm0,mm3
pand mm0,mm6 pand mm0,mm6
pxor mm6,[UnusedBitXor] ; pxor mm6,[UnusedBitXor]
pand mm5,mm6 ; pand mm6,mm5
pandn mm6,mm5
add esi,byte 8 add esi,byte 8
por mm0,mm5 por mm0,mm6
dec ecx dec ecx
jnz near .faddloopnext jnz near .faddloopnext
movq [esi-8],mm0 movq [esi-8],mm0
@@ -3322,67 +3375,72 @@ ProcessTransparencies:
pop esi pop esi
pop ebx pop ebx
jmp .donetransp jmp .donetransp
.fullsubtract .fullsubtract
push ebx push ebx
push esi push esi
mov ecx,64 mov ecx,64
mov eax,[UnusedBit] mov eax,[UnusedBit]
.fsubl2 .fsubl2
test dword[esi],eax test [esi],eax
jnz .fsubloopb jnz .fsubloopb
test dword[esi+4],eax .fsubl2_2
test [esi+4],eax
jnz .fsubloopb jnz .fsubloopb
add esi,8 add esi,byte 8
dec ecx dec ecx
jnz .fsubl2 jnz .fsubl2
jmp .fsubloopdone jmp .fsubloopdone
.fsubloopnext
movq [esi-8],mm0
.fsubloop .fsubloop
test dword[esi],eax test [esi],eax
jz .fsubl2 jz .fsubl2_2
.fsubloopb .fsubloopb
movq mm0,[esi] movq mm0,[esi]
movq mm5,mm0
movq mm6,mm0
pxor mm0,[UnusedBitXor]
pand mm5,[UnusedBitXor]
movq mm1,[esi+75036*2] movq mm1,[esi+75036*2]
movq mm2,mm0 movq mm5,mm0
pxor mm0,[UnusedBitXor]
movq mm6,mm5
pand mm1,[UnusedBitXor] pand mm1,[UnusedBitXor]
movq mm3,mm0 movq mm2,mm0
movq mm4,mm1 movq mm4,mm1
%if %1>0 %if %1>0
psllw mm0,%1 psllw mm0,%1
psllw mm1,%1 psllw mm1,%1
%endif %endif
movq mm3,mm2
paddusw mm0,mm1 paddusw mm0,mm1
psllw mm2,%2
pand mm0,[FullBitAnd] pand mm0,[FullBitAnd]
movq mm1,mm4 movq mm1,mm4
pand mm5,[UnusedBitXor]
psllw mm4,%2
paddusw mm2,mm4
%if %1>0 %if %1>0
psrlw mm0,%1 psrlw mm0,%1
%endif %endif
psllw mm2,%2
psllw mm1,%2
paddusw mm2,mm1
pand mm2,[FullBitAnd] pand mm2,[FullBitAnd]
psrlw mm2,%2
psllw mm3,%3 psllw mm3,%3
psllw mm4,%3 psllw mm1,%3
paddusw mm3,mm4 add esi,byte 8
psrlw mm2,%2
paddusw mm3,mm1
pand mm3,[FullBitAnd] pand mm3,[FullBitAnd]
psrlw mm3,%3
por mm0,mm3
por mm0,mm2 por mm0,mm2
psrlw mm3,%3
pand mm6,[UnusedBit] pand mm6,[UnusedBit]
por mm0,mm3
pxor mm0,[UnusedBitXor] pxor mm0,[UnusedBitXor]
pcmpeqw mm6,[UnusedBit] pcmpeqw mm6,[UnusedBit]
pand mm0,mm6 pand mm0,mm6
pxor mm6,[UnusedBitXor] ; pxor mm6,[UnusedBitXor]
pand mm5,mm6 ; pand mm6,mm5
por mm0,mm5 pandn mm6,mm5
movq [esi],mm0 por mm0,mm6
add esi,8
dec ecx dec ecx
jnz near .fsubloop jnz near .fsubloopnext
movq [esi-8],mm0
.fsubloopdone .fsubloopdone
pop esi pop esi
pop ebx pop ebx