Rollback of gfx code to fix nasty colour problems

This commit is contained in:
pagefault
2003-10-31 01:22:59 +00:00
parent f933a7ca23
commit c6fae329d5

View File

@@ -2773,6 +2773,7 @@ drawsprngw16bmsnthr:
xor edi,edi xor edi,edi
normalwsprng16b sprdrawprawb16bngmsnthr,sprdrawprbwb16bngmsnthr normalwsprng16b sprdrawprawb16bngmsnthr,sprdrawprbwb16bngmsnthr
ProcessTransparencies: ProcessTransparencies:
cmp byte[NGNoTransp],0 cmp byte[NGNoTransp],0
je .yestransp je .yestransp
@@ -2909,7 +2910,6 @@ ProcessTransparencies:
jnz near .fullsubtract jnz near .fullsubtract
push ebx push ebx
push esi push esi
push edi ;
mov ecx,256 mov ecx,256
mov ebp,[HalfTrans] mov ebp,[HalfTrans]
xor edx,edx xor edx,edx
@@ -2920,50 +2920,16 @@ ProcessTransparencies:
test ax,bx test ax,bx
jz .notranspfa jz .notranspfa
mov dx,[esi+75036*2] mov dx,[esi+75036*2]
mov ebp,((1<<5)+(1<<10)+(1<<15))
mov edi,edx
; get LSBs
and edi,ebp
and edx,~((1<<5)+(1<<10)+(1<<15))
and ebp,eax
and eax,~((1<<5)+(1<<10)+(1<<15))
; add LSBs
add ebp,edi
mov edi,((2<<5)+(2<<10)+(2<<15))
; add colors
add edx,eax
and edi,ebp
mov eax,((1<<5)+(1<<10)+(1<<15))
add edx,edi
and ebp,eax
and eax,edx
add edx,ebp
; Perform saturation
; save carry bits
mov edi,eax
; correct for carry/overflow into LSBs of adjacent color components
sub edx,eax
shr edi,5 ; shift carry bits down to color LSBs
neg edi ; subtract from zero to get mask...
add edi,eax ; add in carry bits to correct for borrows
or edx,edi ; apply saturation mask
%if 0
and eax,ebp and eax,ebp
and edx,ebp and edx,ebp
add edx,eax add edx,eax
shr edx,1 shr edx,1
mov dx,[fulladdtab+edx*2] mov dx,[fulladdtab+edx*2]
%endif
mov [esi],dx mov [esi],dx
.notranspfa .notranspfa
add esi,2 add esi,2
dec ecx dec ecx
jnz .nextfa jnz .nextfa
pop edi ;
pop esi pop esi
pop ebx pop ebx
jmp .donetransp jmp .donetransp
@@ -3041,13 +3007,10 @@ ProcessTransparencies:
dec ecx dec ecx
jnz .faddl2h jnz .faddl2h
jmp .faddloopdoneh jmp .faddloopdoneh
.prochalfaddnext
movq [esi-8],mm0
.prochalfadd .prochalfadd
test [esi+75036*2],eax test dword[esi+75036*2],eax
jnz near .faddloopbh jnz near .faddloopbh
test [esi+75036*2+4],eax test dword[esi+75036*2+4],eax
jnz near .faddloopbh jnz near .faddloopbh
mov ebx,[esi] mov ebx,[esi]
and ebx,eax and ebx,eax
@@ -3059,19 +3022,18 @@ ProcessTransparencies:
jne near .faddlooph jne near .faddlooph
.prochalfadddo .prochalfadddo
movq mm0,[esi] movq mm0,[esi]
pand mm0,[HalfTrans]
movq mm1,[esi+75036*2] movq mm1,[esi+75036*2]
psrlw mm0,1 pand mm0,[HalfTrans]
pand mm1,[HalfTrans] pand mm1,[HalfTrans]
psrlw mm0,1
psrlw mm1,1 psrlw mm1,1
paddw mm0,mm1 paddw mm0,mm1
add esi,byte 8 movq [esi],mm0
add esi,8
dec ecx dec ecx
jnz .prochalfaddnext jnz .prochalfadd
movq [esi-8],mm0
jmp .faddloopdoneh jmp .faddloopdoneh
.procfulladdnext:
.procfulladdnext
movq [esi-8],mm0 movq [esi-8],mm0
.procfulladd .procfulladd
mov ebx,[esi] mov ebx,[esi]
@@ -3106,36 +3068,34 @@ ProcessTransparencies:
psllw mm2,%2 psllw mm2,%2
paddusw mm0,mm1 paddusw mm0,mm1
pand mm0,[FullBitAnd] pand mm0,[FullBitAnd]
psllw mm3,%3
movq mm1,mm4 movq mm1,mm4
psllw mm4,%2 psllw mm4,%2
paddusw mm2,mm4 add esi,byte 8
%if %1>0 %if %1>0
psrlw mm0,%1 psrlw mm0,%1
%endif %endif
paddusw mm2,mm4
psllw mm3,%3
pand mm2,[FullBitAnd] pand mm2,[FullBitAnd]
psllw mm1,%3 psllw mm1,%3
psrlw mm2,%2 psrlw mm2,%2
paddusw mm3,mm1 paddusw mm3,mm1
pand mm3,[FullBitAnd]
por mm0,mm2 por mm0,mm2
pand mm3,[FullBitAnd]
psrlw mm3,%3 psrlw mm3,%3
add esi,byte 8
por mm0,mm3 por mm0,mm3
dec ecx dec ecx
jnz near .procfulladdnext jnz near .procfulladdnext
movq [esi-8],mm0 movq [esi],mm0
jmp .faddloopdoneh jmp .faddloopdoneh
.faddloophnext
movq [esi-8],mm0
.faddlooph .faddlooph
mov ebx,[esi] mov ebx,dword[esi]
test ebx,eax test ebx,eax
jz near .faddl2h jz near .faddl2h
and ebx,eax and ebx,eax
cmp ebx,eax cmp ebx,eax
jne .faddloopbh jne .faddloopbh
mov ebx,[esi+4] mov ebx,dword[esi+4]
and ebx,eax and ebx,eax
cmp ebx,eax cmp ebx,eax
jne .faddloopbh jne .faddloopbh
@@ -3151,67 +3111,66 @@ ProcessTransparencies:
jne .faddloopbh jne .faddloopbh
jmp .procfulladddo jmp .procfulladddo
.faddla .faddla
test [esi+75036*2+4],eax test dword[esi+75036*2+4],eax
jz near .prochalfadddo jz near .prochalfadddo
.faddloopbh .faddloopbh
movq mm0,[esi] movq mm0,[esi]
movq mm1,[esi+75036*2]
movq mm5,mm0 movq mm5,mm0
movq mm6,mm0
pand mm5,[UnusedBitXor] pand mm5,[UnusedBitXor]
movq mm1,[esi+75036*2]
movq mm7,mm1 movq mm7,mm1
pand mm1,[UnusedBitXor]
movq mm2,mm0 movq mm2,mm0
pand mm1,[UnusedBitXor]
movq mm3,mm0
movq mm4,mm1 movq mm4,mm1
%if %1>0 %if %1>0
psllw mm0,%1 psllw mm0,%1
psllw mm1,%1 psllw mm1,%1
%endif %endif
movq mm3,mm2
psllw mm2,%2
movq mm6,mm3
psllw mm3,%3
paddusw mm0,mm1 paddusw mm0,mm1
movq mm1,mm4
psllw mm4,%2
pand mm0,[FullBitAnd] pand mm0,[FullBitAnd]
paddusw mm2,mm4 movq mm1,mm4
movq mm4,mm1
psllw mm1,%3
pand mm2,[FullBitAnd]
paddusw mm3,mm1
psrlw mm2,%2
%if %1>0 %if %1>0
psrlw mm0,%1 psrlw mm0,%1
%endif %endif
psllw mm2,%2
psllw mm1,%2
paddusw mm2,mm1
pand mm2,[FullBitAnd]
movq mm1,mm4
psrlw mm2,%2
psllw mm3,%3
psllw mm1,%3
paddusw mm3,mm1
pand mm3,[FullBitAnd] pand mm3,[FullBitAnd]
psrlw mm3,%3
por mm0,mm3
por mm0,mm2 por mm0,mm2
pand mm6,[UnusedBit] pand mm6,[UnusedBit]
psrlw mm3,%3
pcmpeqw mm6,[UnusedBit] pcmpeqw mm6,[UnusedBit]
por mm0,mm3
pand mm7,[UnusedBit]
pand mm4,mm6
pand mm4,[HalfTrans]
pand mm0,mm6 pand mm0,mm6
movq mm1,mm5 movq mm1,mm5
pand mm1,mm6
pand mm4,mm6
pxor mm6,[UnusedBitXor]
pand mm5,mm6 pand mm5,mm6
pand mm5,[HalfTrans] pand mm7,[UnusedBit]
pand mm4,[HalfTrans]
pand mm1,[HalfTrans]
psrlw mm1,1
psrlw mm4,1 psrlw mm4,1
paddw mm1,mm4
pcmpeqw mm7,[UnusedBit] pcmpeqw mm7,[UnusedBit]
psrlw mm5,1
; pxor mm6,[UnusedBitXor]
pand mm0,mm7 pand mm0,mm7
pxor mm7,[UnusedBitXor] pxor mm7,[UnusedBitXor]
paddw mm5,mm4 pand mm1,mm7
; pand mm6,mm1 por mm0,mm1
pandn mm6,mm1
pand mm5,mm7
por mm0,mm6
por mm0,mm5 por mm0,mm5
add esi,byte 8 movq [esi],mm0
add esi,8
dec ecx dec ecx
jnz near .faddloophnext jnz near .faddlooph
movq [esi-8],mm0
.faddloopdoneh .faddloopdoneh
pop ebx pop ebx
pop esi pop esi
@@ -3223,75 +3182,71 @@ ProcessTransparencies:
mov ecx,64 mov ecx,64
mov eax,[UnusedBit] mov eax,[UnusedBit]
.fsubl2h .fsubl2h
test [esi],eax test dword[esi],eax
jnz .fsubloopbh jnz .fsubloopbh
.fsubl2h_2 test dword[esi+4],eax
test [esi+4],eax
jnz .fsubloopbh jnz .fsubloopbh
add esi,byte 8 add esi,8
dec ecx dec ecx
jnz .fsubl2h jnz .fsubl2h
jmp .fsubloopdoneh jmp .fsubloopdoneh
.fsubloophnext
movq [esi-8],mm0
.fsublooph .fsublooph
test [esi],eax test dword[esi],eax
jz .fsubl2h_2 jz .fsubl2h
.fsubloopbh .fsubloopbh
movq mm0,[esi] movq mm0,[esi]
movq mm1,[esi+75036*2]
movq mm5,mm0 movq mm5,mm0
movq mm6,mm0
pxor mm0,[UnusedBitXor] pxor mm0,[UnusedBitXor]
pand mm5,[UnusedBitXor]
movq mm1,[esi+75036*2]
movq mm7,mm1 movq mm7,mm1
pand mm1,[UnusedBitXor]
movq mm2,mm0 movq mm2,mm0
pand mm1,[UnusedBitXor]
movq mm3,mm0
movq mm4,mm1 movq mm4,mm1
%if %1>0 %if %1>0
psllw mm0,%1 psllw mm0,%1
psllw mm1,%1 psllw mm1,%1
%endif %endif
movq mm3,mm2
paddusw mm0,mm1 paddusw mm0,mm1
psllw mm2,%2
pand mm0,[FullBitAnd] pand mm0,[FullBitAnd]
movq mm1,mm4 movq mm1,mm4
psllw mm4,%2
movq mm6,mm5
paddusw mm2,mm4
psllw mm3,%3
pand mm2,[FullBitAnd]
psllw mm1,%3
%if %1>0 %if %1>0
psrlw mm0,%1 psrlw mm0,%1
%endif %endif
paddusw mm3,mm1 psllw mm2,%2
pand mm3,[FullBitAnd] psllw mm1,%2
paddusw mm2,mm1
pand mm2,[FullBitAnd]
psrlw mm2,%2 psrlw mm2,%2
pand mm5,[UnusedBitXor] psllw mm3,%3
psllw mm4,%3
paddusw mm3,mm4
pand mm3,[FullBitAnd]
psrlw mm3,%3 psrlw mm3,%3
pand mm6,[UnusedBit]
por mm0,mm2
pcmpeqw mm6,[UnusedBit]
por mm0,mm3 por mm0,mm3
por mm0,mm2
pand mm6,[UnusedBit]
pxor mm0,[UnusedBitXor] pxor mm0,[UnusedBitXor]
pcmpeqw mm6,[UnusedBit]
pand mm0,mm6 pand mm0,mm6
add esi,byte 8 pxor mm6,[UnusedBitXor]
; pxor mm6,[UnusedBitXor] pand mm5,mm6
movq mm1,mm0
pand mm7,[UnusedBit] pand mm7,[UnusedBit]
; pand mm6,mm5 movq mm1,mm0
pandn mm6,mm5 pand mm1,[HalfTrans]
pand mm0,[HalfTrans] psrlw mm1,1
psrlw mm0,1
pcmpeqw mm7,[UnusedBit] pcmpeqw mm7,[UnusedBit]
pand mm1,mm7
pxor mm7,[UnusedBitXor]
por mm1,mm6
pand mm0,mm7 pand mm0,mm7
pxor mm7,[UnusedBitXor]
pand mm1,mm7
por mm0,mm1 por mm0,mm1
por mm0,mm5
movq [esi],mm0
add esi,8
dec ecx dec ecx
jnz near .fsubloophnext jnz near .fsublooph
movq [esi-8],mm0
.fsubloopdoneh .fsubloopdoneh
pop esi pop esi
pop ebx pop ebx
@@ -3305,24 +3260,23 @@ ProcessTransparencies:
mov ecx,64 mov ecx,64
mov eax,[UnusedBit] mov eax,[UnusedBit]
.faddl2 .faddl2
test [esi],eax test dword[esi],eax
jnz .faddloopb jnz .faddloopb
.faddl2_2 .faddl2_2
test [esi+4],eax test dword[esi+4],eax
jnz .faddloopb jnz .faddloopb
add esi,byte 8 add esi,8
dec ecx dec ecx
jnz .faddl2 jnz .faddl2
jmp .faddloopdone jmp .faddloopdone
.faddloopnext .faddloopnext
movq [esi-8],mm0 movq [esi-8],mm0
.faddloop .faddloop
test [esi],eax test dword[esi],eax
jz .faddl2_2 jz .faddl2_2
.faddloopb .faddloopb
movq mm0,[esi] movq mm0,[esi]
movq mm1,[esi+75036*2] movq mm1,[esi+75036*2]
; save old pixels to preserve any not needing arithmetic
movq mm6,mm0 movq mm6,mm0
pand mm0,[UnusedBitXor] pand mm0,[UnusedBitXor]
movq mm4,mm1 movq mm4,mm1
@@ -3331,11 +3285,9 @@ ProcessTransparencies:
psllw mm0,%1 psllw mm0,%1
psllw mm1,%1 psllw mm1,%1
movq mm3,mm2 movq mm3,mm2
pand mm1,[FullBitAnd] ;*
movq mm5,mm2 movq mm5,mm2
%else %else
movq mm3,mm0 movq mm3,mm0
pand mm1,[FullBitAnd] ;*
movq mm5,mm0 movq mm5,mm0
%endif %endif
paddusw mm0,mm1 paddusw mm0,mm1
@@ -3343,12 +3295,10 @@ ProcessTransparencies:
psllw mm2,%2 psllw mm2,%2
movq mm1,mm4 movq mm1,mm4
psllw mm4,%2 psllw mm4,%2
pand mm4,[FullBitAnd] ;*
paddusw mm2,mm4 paddusw mm2,mm4
psllw mm3,%3 psllw mm3,%3
pand mm2,[FullBitAnd] pand mm2,[FullBitAnd]
psllw mm1,%3 psllw mm1,%3
pand mm1,[FullBitAnd] ;*
psrlw mm2,%2 psrlw mm2,%2
paddusw mm3,mm1 paddusw mm3,mm1
pand mm3,[FullBitAnd] pand mm3,[FullBitAnd]
@@ -3356,18 +3306,15 @@ ProcessTransparencies:
psrlw mm0,%1 psrlw mm0,%1
%endif %endif
psrlw mm3,%3 psrlw mm3,%3
; get alpha bits
pand mm6,[UnusedBit] pand mm6,[UnusedBit]
por mm0,mm2 por mm0,mm2
; generate mask for combining pixels with and without arithmetic
pcmpeqw mm6,[UnusedBit] pcmpeqw mm6,[UnusedBit]
por mm0,mm3 por mm0,mm3
pand mm0,mm6 pand mm0,mm6
; pxor mm6,[UnusedBitXor] pxor mm6,[UnusedBitXor]
; pand mm6,mm5 pand mm5,mm6
pandn mm6,mm5
add esi,byte 8 add esi,byte 8
por mm0,mm6 por mm0,mm5
dec ecx dec ecx
jnz near .faddloopnext jnz near .faddloopnext
movq [esi-8],mm0 movq [esi-8],mm0
@@ -3375,72 +3322,67 @@ ProcessTransparencies:
pop esi pop esi
pop ebx pop ebx
jmp .donetransp jmp .donetransp
.fullsubtract .fullsubtract
push ebx push ebx
push esi push esi
mov ecx,64 mov ecx,64
mov eax,[UnusedBit] mov eax,[UnusedBit]
.fsubl2 .fsubl2
test [esi],eax test dword[esi],eax
jnz .fsubloopb jnz .fsubloopb
.fsubl2_2 test dword[esi+4],eax
test [esi+4],eax
jnz .fsubloopb jnz .fsubloopb
add esi,byte 8 add esi,8
dec ecx dec ecx
jnz .fsubl2 jnz .fsubl2
jmp .fsubloopdone jmp .fsubloopdone
.fsubloopnext
movq [esi-8],mm0
.fsubloop .fsubloop
test [esi],eax test dword[esi],eax
jz .fsubl2_2 jz .fsubl2
.fsubloopb .fsubloopb
movq mm0,[esi] movq mm0,[esi]
movq mm1,[esi+75036*2]
movq mm5,mm0 movq mm5,mm0
movq mm6,mm0
pxor mm0,[UnusedBitXor] pxor mm0,[UnusedBitXor]
movq mm6,mm5 pand mm5,[UnusedBitXor]
pand mm1,[UnusedBitXor] movq mm1,[esi+75036*2]
movq mm2,mm0 movq mm2,mm0
pand mm1,[UnusedBitXor]
movq mm3,mm0
movq mm4,mm1 movq mm4,mm1
%if %1>0 %if %1>0
psllw mm0,%1 psllw mm0,%1
psllw mm1,%1 psllw mm1,%1
%endif %endif
movq mm3,mm2
paddusw mm0,mm1 paddusw mm0,mm1
psllw mm2,%2
pand mm0,[FullBitAnd] pand mm0,[FullBitAnd]
movq mm1,mm4 movq mm1,mm4
pand mm5,[UnusedBitXor]
psllw mm4,%2
paddusw mm2,mm4
%if %1>0 %if %1>0
psrlw mm0,%1 psrlw mm0,%1
%endif %endif
psllw mm2,%2
psllw mm1,%2
paddusw mm2,mm1
pand mm2,[FullBitAnd] pand mm2,[FullBitAnd]
psllw mm3,%3
psllw mm1,%3
add esi,byte 8
psrlw mm2,%2 psrlw mm2,%2
paddusw mm3,mm1 psllw mm3,%3
psllw mm4,%3
paddusw mm3,mm4
pand mm3,[FullBitAnd] pand mm3,[FullBitAnd]
por mm0,mm2
psrlw mm3,%3 psrlw mm3,%3
pand mm6,[UnusedBit]
por mm0,mm3 por mm0,mm3
por mm0,mm2
pand mm6,[UnusedBit]
pxor mm0,[UnusedBitXor] pxor mm0,[UnusedBitXor]
pcmpeqw mm6,[UnusedBit] pcmpeqw mm6,[UnusedBit]
pand mm0,mm6 pand mm0,mm6
; pxor mm6,[UnusedBitXor] pxor mm6,[UnusedBitXor]
; pand mm6,mm5 pand mm5,mm6
pandn mm6,mm5 por mm0,mm5
por mm0,mm6 movq [esi],mm0
add esi,8
dec ecx dec ecx
jnz near .fsubloopnext jnz near .fsubloop
movq [esi-8],mm0
.fsubloopdone .fsubloopdone
pop esi pop esi
pop ebx pop ebx