From c6fae329d50ae68cddb2837f537b6718edbc2664 Mon Sep 17 00:00:00 2001 From: pagefault <> Date: Fri, 31 Oct 2003 01:22:59 +0000 Subject: [PATCH] Rollback of gfx code to fix nasty colour problems --- zsnes/src/video/newgfx16.asm | 276 ++++++++++++++--------------------- 1 file changed, 109 insertions(+), 167 deletions(-) diff --git a/zsnes/src/video/newgfx16.asm b/zsnes/src/video/newgfx16.asm index 78a87755..2bf1e2ec 100644 --- a/zsnes/src/video/newgfx16.asm +++ b/zsnes/src/video/newgfx16.asm @@ -2773,6 +2773,7 @@ drawsprngw16bmsnthr: xor edi,edi normalwsprng16b sprdrawprawb16bngmsnthr,sprdrawprbwb16bngmsnthr + ProcessTransparencies: cmp byte[NGNoTransp],0 je .yestransp @@ -2909,7 +2910,6 @@ ProcessTransparencies: jnz near .fullsubtract push ebx push esi - push edi ; mov ecx,256 mov ebp,[HalfTrans] xor edx,edx @@ -2920,50 +2920,16 @@ ProcessTransparencies: test ax,bx jz .notranspfa mov dx,[esi+75036*2] - - mov ebp,((1<<5)+(1<<10)+(1<<15)) - mov edi,edx - - ; get LSBs - and edi,ebp - and edx,~((1<<5)+(1<<10)+(1<<15)) - and ebp,eax - and eax,~((1<<5)+(1<<10)+(1<<15)) - ; add LSBs - add ebp,edi - mov edi,((2<<5)+(2<<10)+(2<<15)) - ; add colors - add edx,eax - and edi,ebp - mov eax,((1<<5)+(1<<10)+(1<<15)) - add edx,edi - and ebp,eax - and eax,edx - add edx,ebp - ; Perform saturation - - ; save carry bits - mov edi,eax - ; correct for carry/overflow into LSBs of adjacent color components - sub edx,eax - shr edi,5 ; shift carry bits down to color LSBs - neg edi ; subtract from zero to get mask... - add edi,eax ; add in carry bits to correct for borrows - or edx,edi ; apply saturation mask - -%if 0 and eax,ebp and edx,ebp add edx,eax shr edx,1 mov dx,[fulladdtab+edx*2] -%endif mov [esi],dx .notranspfa add esi,2 dec ecx jnz .nextfa - pop edi ; pop esi pop ebx jmp .donetransp @@ -3041,13 +3007,10 @@ ProcessTransparencies: dec ecx jnz .faddl2h jmp .faddloopdoneh - -.prochalfaddnext - movq [esi-8],mm0 .prochalfadd - test [esi+75036*2],eax + test dword[esi+75036*2],eax jnz near .faddloopbh - test [esi+75036*2+4],eax + test dword[esi+75036*2+4],eax jnz near .faddloopbh mov ebx,[esi] and ebx,eax @@ -3059,19 +3022,18 @@ ProcessTransparencies: jne near .faddlooph .prochalfadddo movq mm0,[esi] - pand mm0,[HalfTrans] movq mm1,[esi+75036*2] - psrlw mm0,1 + pand mm0,[HalfTrans] pand mm1,[HalfTrans] + psrlw mm0,1 psrlw mm1,1 paddw mm0,mm1 - add esi,byte 8 + movq [esi],mm0 + add esi,8 dec ecx - jnz .prochalfaddnext - movq [esi-8],mm0 + jnz .prochalfadd jmp .faddloopdoneh - -.procfulladdnext +.procfulladdnext: movq [esi-8],mm0 .procfulladd mov ebx,[esi] @@ -3106,36 +3068,34 @@ ProcessTransparencies: psllw mm2,%2 paddusw mm0,mm1 pand mm0,[FullBitAnd] - psllw mm3,%3 movq mm1,mm4 psllw mm4,%2 - paddusw mm2,mm4 + add esi,byte 8 %if %1>0 psrlw mm0,%1 %endif + paddusw mm2,mm4 + psllw mm3,%3 pand mm2,[FullBitAnd] psllw mm1,%3 psrlw mm2,%2 paddusw mm3,mm1 - pand mm3,[FullBitAnd] por mm0,mm2 + pand mm3,[FullBitAnd] psrlw mm3,%3 - add esi,byte 8 por mm0,mm3 dec ecx jnz near .procfulladdnext - movq [esi-8],mm0 + movq [esi],mm0 jmp .faddloopdoneh -.faddloophnext - movq [esi-8],mm0 .faddlooph - mov ebx,[esi] + mov ebx,dword[esi] test ebx,eax jz near .faddl2h and ebx,eax cmp ebx,eax jne .faddloopbh - mov ebx,[esi+4] + mov ebx,dword[esi+4] and ebx,eax cmp ebx,eax jne .faddloopbh @@ -3151,67 +3111,66 @@ ProcessTransparencies: jne .faddloopbh jmp .procfulladddo .faddla - test [esi+75036*2+4],eax + test dword[esi+75036*2+4],eax jz near .prochalfadddo .faddloopbh movq mm0,[esi] - movq mm1,[esi+75036*2] movq mm5,mm0 + movq mm6,mm0 pand mm5,[UnusedBitXor] + movq mm1,[esi+75036*2] movq mm7,mm1 - pand mm1,[UnusedBitXor] movq mm2,mm0 + pand mm1,[UnusedBitXor] + movq mm3,mm0 movq mm4,mm1 %if %1>0 psllw mm0,%1 psllw mm1,%1 %endif - movq mm3,mm2 - psllw mm2,%2 - movq mm6,mm3 - psllw mm3,%3 paddusw mm0,mm1 - movq mm1,mm4 - psllw mm4,%2 pand mm0,[FullBitAnd] - paddusw mm2,mm4 - movq mm4,mm1 - psllw mm1,%3 - pand mm2,[FullBitAnd] - paddusw mm3,mm1 - psrlw mm2,%2 + movq mm1,mm4 %if %1>0 psrlw mm0,%1 %endif + psllw mm2,%2 + psllw mm1,%2 + paddusw mm2,mm1 + pand mm2,[FullBitAnd] + movq mm1,mm4 + psrlw mm2,%2 + psllw mm3,%3 + psllw mm1,%3 + paddusw mm3,mm1 pand mm3,[FullBitAnd] + psrlw mm3,%3 + por mm0,mm3 por mm0,mm2 pand mm6,[UnusedBit] - psrlw mm3,%3 pcmpeqw mm6,[UnusedBit] - por mm0,mm3 - pand mm7,[UnusedBit] - pand mm4,mm6 - pand mm4,[HalfTrans] pand mm0,mm6 movq mm1,mm5 + pand mm1,mm6 + pand mm4,mm6 + pxor mm6,[UnusedBitXor] pand mm5,mm6 - pand mm5,[HalfTrans] + pand mm7,[UnusedBit] + pand mm4,[HalfTrans] + pand mm1,[HalfTrans] + psrlw mm1,1 psrlw mm4,1 + paddw mm1,mm4 pcmpeqw mm7,[UnusedBit] - psrlw mm5,1 -; pxor mm6,[UnusedBitXor] pand mm0,mm7 pxor mm7,[UnusedBitXor] - paddw mm5,mm4 -; pand mm6,mm1 - pandn mm6,mm1 - pand mm5,mm7 - por mm0,mm6 + pand mm1,mm7 + por mm0,mm1 por mm0,mm5 - add esi,byte 8 + movq [esi],mm0 + add esi,8 dec ecx - jnz near .faddloophnext - movq [esi-8],mm0 + jnz near .faddlooph .faddloopdoneh pop ebx pop esi @@ -3223,75 +3182,71 @@ ProcessTransparencies: mov ecx,64 mov eax,[UnusedBit] .fsubl2h - test [esi],eax + test dword[esi],eax jnz .fsubloopbh -.fsubl2h_2 - test [esi+4],eax + test dword[esi+4],eax jnz .fsubloopbh - add esi,byte 8 + add esi,8 dec ecx jnz .fsubl2h jmp .fsubloopdoneh -.fsubloophnext - movq [esi-8],mm0 .fsublooph - test [esi],eax - jz .fsubl2h_2 + test dword[esi],eax + jz .fsubl2h .fsubloopbh movq mm0,[esi] - movq mm1,[esi+75036*2] movq mm5,mm0 + movq mm6,mm0 pxor mm0,[UnusedBitXor] + pand mm5,[UnusedBitXor] + movq mm1,[esi+75036*2] movq mm7,mm1 - pand mm1,[UnusedBitXor] movq mm2,mm0 + pand mm1,[UnusedBitXor] + movq mm3,mm0 movq mm4,mm1 %if %1>0 psllw mm0,%1 psllw mm1,%1 %endif - movq mm3,mm2 paddusw mm0,mm1 - psllw mm2,%2 pand mm0,[FullBitAnd] movq mm1,mm4 - psllw mm4,%2 - movq mm6,mm5 - paddusw mm2,mm4 - psllw mm3,%3 - pand mm2,[FullBitAnd] - psllw mm1,%3 %if %1>0 psrlw mm0,%1 %endif - paddusw mm3,mm1 - pand mm3,[FullBitAnd] + psllw mm2,%2 + psllw mm1,%2 + paddusw mm2,mm1 + pand mm2,[FullBitAnd] psrlw mm2,%2 - pand mm5,[UnusedBitXor] + psllw mm3,%3 + psllw mm4,%3 + paddusw mm3,mm4 + pand mm3,[FullBitAnd] psrlw mm3,%3 - pand mm6,[UnusedBit] - por mm0,mm2 - pcmpeqw mm6,[UnusedBit] por mm0,mm3 + por mm0,mm2 + pand mm6,[UnusedBit] pxor mm0,[UnusedBitXor] + pcmpeqw mm6,[UnusedBit] pand mm0,mm6 - add esi,byte 8 -; pxor mm6,[UnusedBitXor] - movq mm1,mm0 + pxor mm6,[UnusedBitXor] + pand mm5,mm6 pand mm7,[UnusedBit] -; pand mm6,mm5 - pandn mm6,mm5 - pand mm0,[HalfTrans] - psrlw mm0,1 + movq mm1,mm0 + pand mm1,[HalfTrans] + psrlw mm1,1 pcmpeqw mm7,[UnusedBit] - pand mm1,mm7 - pxor mm7,[UnusedBitXor] - por mm1,mm6 pand mm0,mm7 + pxor mm7,[UnusedBitXor] + pand mm1,mm7 por mm0,mm1 + por mm0,mm5 + movq [esi],mm0 + add esi,8 dec ecx - jnz near .fsubloophnext - movq [esi-8],mm0 + jnz near .fsublooph .fsubloopdoneh pop esi pop ebx @@ -3305,24 +3260,23 @@ ProcessTransparencies: mov ecx,64 mov eax,[UnusedBit] .faddl2 - test [esi],eax + test dword[esi],eax jnz .faddloopb .faddl2_2 - test [esi+4],eax + test dword[esi+4],eax jnz .faddloopb - add esi,byte 8 + add esi,8 dec ecx jnz .faddl2 jmp .faddloopdone .faddloopnext movq [esi-8],mm0 .faddloop - test [esi],eax + test dword[esi],eax jz .faddl2_2 .faddloopb movq mm0,[esi] movq mm1,[esi+75036*2] - ; save old pixels to preserve any not needing arithmetic movq mm6,mm0 pand mm0,[UnusedBitXor] movq mm4,mm1 @@ -3331,11 +3285,9 @@ ProcessTransparencies: psllw mm0,%1 psllw mm1,%1 movq mm3,mm2 - pand mm1,[FullBitAnd] ;* movq mm5,mm2 %else movq mm3,mm0 - pand mm1,[FullBitAnd] ;* movq mm5,mm0 %endif paddusw mm0,mm1 @@ -3343,12 +3295,10 @@ ProcessTransparencies: psllw mm2,%2 movq mm1,mm4 psllw mm4,%2 - pand mm4,[FullBitAnd] ;* paddusw mm2,mm4 psllw mm3,%3 pand mm2,[FullBitAnd] psllw mm1,%3 - pand mm1,[FullBitAnd] ;* psrlw mm2,%2 paddusw mm3,mm1 pand mm3,[FullBitAnd] @@ -3356,18 +3306,15 @@ ProcessTransparencies: psrlw mm0,%1 %endif psrlw mm3,%3 - ; get alpha bits pand mm6,[UnusedBit] por mm0,mm2 - ; generate mask for combining pixels with and without arithmetic pcmpeqw mm6,[UnusedBit] por mm0,mm3 pand mm0,mm6 -; pxor mm6,[UnusedBitXor] -; pand mm6,mm5 - pandn mm6,mm5 + pxor mm6,[UnusedBitXor] + pand mm5,mm6 add esi,byte 8 - por mm0,mm6 + por mm0,mm5 dec ecx jnz near .faddloopnext movq [esi-8],mm0 @@ -3375,72 +3322,67 @@ ProcessTransparencies: pop esi pop ebx jmp .donetransp - .fullsubtract push ebx push esi mov ecx,64 mov eax,[UnusedBit] .fsubl2 - test [esi],eax + test dword[esi],eax jnz .fsubloopb -.fsubl2_2 - test [esi+4],eax + test dword[esi+4],eax jnz .fsubloopb - add esi,byte 8 + add esi,8 dec ecx jnz .fsubl2 jmp .fsubloopdone -.fsubloopnext - movq [esi-8],mm0 .fsubloop - test [esi],eax - jz .fsubl2_2 + test dword[esi],eax + jz .fsubl2 .fsubloopb movq mm0,[esi] - movq mm1,[esi+75036*2] movq mm5,mm0 + movq mm6,mm0 pxor mm0,[UnusedBitXor] - movq mm6,mm5 - pand mm1,[UnusedBitXor] + pand mm5,[UnusedBitXor] + movq mm1,[esi+75036*2] movq mm2,mm0 + pand mm1,[UnusedBitXor] + movq mm3,mm0 movq mm4,mm1 %if %1>0 psllw mm0,%1 psllw mm1,%1 %endif - movq mm3,mm2 paddusw mm0,mm1 - psllw mm2,%2 pand mm0,[FullBitAnd] movq mm1,mm4 - pand mm5,[UnusedBitXor] - psllw mm4,%2 - paddusw mm2,mm4 %if %1>0 psrlw mm0,%1 %endif + psllw mm2,%2 + psllw mm1,%2 + paddusw mm2,mm1 pand mm2,[FullBitAnd] - psllw mm3,%3 - psllw mm1,%3 - add esi,byte 8 psrlw mm2,%2 - paddusw mm3,mm1 + psllw mm3,%3 + psllw mm4,%3 + paddusw mm3,mm4 pand mm3,[FullBitAnd] - por mm0,mm2 psrlw mm3,%3 - pand mm6,[UnusedBit] por mm0,mm3 + por mm0,mm2 + pand mm6,[UnusedBit] pxor mm0,[UnusedBitXor] pcmpeqw mm6,[UnusedBit] pand mm0,mm6 -; pxor mm6,[UnusedBitXor] -; pand mm6,mm5 - pandn mm6,mm5 - por mm0,mm6 + pxor mm6,[UnusedBitXor] + pand mm5,mm6 + por mm0,mm5 + movq [esi],mm0 + add esi,8 dec ecx - jnz near .fsubloopnext - movq [esi-8],mm0 + jnz near .fsubloop .fsubloopdone pop esi pop ebx