diff --git a/zsnes/src/dos/vesa2.asm b/zsnes/src/dos/vesa2.asm index 5c68b6f7..2f06743d 100644 --- a/zsnes/src/dos/vesa2.asm +++ b/zsnes/src/dos/vesa2.asm @@ -25,9 +25,10 @@ EXTSYM InitDrive,gotoroot,InitDir,fulladdtab ; EXTSYM printhex EXTSYM UnusedBit,HalfTrans,UnusedBitXor,ngrposng,nggposng,ngbposng ; EXTSYM printhex -EXTSYM Init_2xSaIMMX,Init_2xSaI -NEWSYM Vesa2AsmStart +EXTSYM Init_2xSaIMMX + + @@ -762,7 +763,6 @@ NEWSYM InitVesa2 .red11 push eax call Init_2xSaIMMX - call Init_2xSaI pop eax ret @@ -799,4 +799,3 @@ NEWSYM RMREGS .ss dw 0 .spare times 20 dd 0 ;---------------------------------------------------------------------- -NEWSYM Vesa2AsmEnd diff --git a/zsnes/src/link.win32 b/zsnes/src/link.win32 index 32546f62..7f967e2f 100644 --- a/zsnes/src/link.win32 +++ b/zsnes/src/link.win32 @@ -1 +1 @@ -/Fezsnesw.exe chips\dsp1proc.obj dos\sw.obj dos\gppro.obj dos\vesa12.obj dos\zsipx.obj dos\modemrtn.obj dos\joy.obj dos\debug.obj dos\vesa2.obj dos\initvid.obj cfgload.obj endmem.obj fixsin.obj init.obj ui.obj vcache.obj water.obj video\procvid.obj win\copyvwin.obj win\winintrf.obj win\winlink.obj win\zloaderw.obj win\ztcp.obj win\zfilew.obj win\zipxw.obj video\makev16b.obj video\makev16t.obj video\makevid.obj video\mode716.obj video\mode716b.obj video\mode716d.obj video\mode716e.obj video\mode716t.obj video\mode7.obj video\mode7ext.obj video\mv16tms.obj video\newg162.obj video\newgfx16.obj video\newgfx2.obj video\newgfx.obj video\m716text.obj video\2xsaiw.obj video\2xsai.obj gui\gui.obj gui\menu.obj cpu\addrni.obj cpu\dma.obj cpu\dsp.obj cpu\dspproc.obj cpu\execute.obj cpu\irq.obj cpu\memory.obj cpu\spc700.obj cpu\stable.obj cpu\table.obj cpu\tableb.obj cpu\tablec.obj chips\dsp1emu.obj chips\fxemu2.obj chips\fxemu2b.obj chips\fxemu2c.obj chips\fxtable.obj chips\sa1proc.obj chips\sa1regs.obj chips\sfxproc.obj zip\unzip.obj zip\zzip.obj zlib.lib wsock32.lib user32.lib gdi32.lib shell32.lib ddraw.lib dsound.lib dinput.lib d3dx.lib /link /section:.text,erw +/Fezsnesw.exe chips\dsp1proc.obj dos\sw.obj dos\gppro.obj dos\vesa12.obj dos\zsipx.obj dos\modemrtn.obj dos\joy.obj dos\debug.obj dos\vesa2.obj dos\initvid.obj cfgload.obj endmem.obj fixsin.obj init.obj ui.obj vcache.obj water.obj video\procvid.obj win\copyvwin.obj win\winintrf.obj win\winlink.obj win\zloaderw.obj win\ztcp.obj win\zfilew.obj win\zipxw.obj video\makev16b.obj video\makev16t.obj video\makevid.obj video\mode716.obj video\mode716b.obj video\mode716d.obj video\mode716e.obj video\mode716t.obj video\mode7.obj video\mode7ext.obj video\mv16tms.obj video\newg162.obj video\newgfx16.obj video\newgfx2.obj video\newgfx.obj video\m716text.obj video\2xsaiw.obj gui\gui.obj gui\menu.obj cpu\addrni.obj cpu\dma.obj cpu\dsp.obj cpu\dspproc.obj cpu\execute.obj cpu\irq.obj cpu\memory.obj cpu\spc700.obj cpu\stable.obj cpu\table.obj cpu\tableb.obj cpu\tablec.obj chips\dsp1emu.obj chips\fxemu2.obj chips\fxemu2b.obj chips\fxemu2c.obj chips\fxtable.obj chips\sa1proc.obj chips\sa1regs.obj chips\sfxproc.obj obj\unzip.obj obj\zzip.obj zlib.lib wsock32.lib user32.lib gdi32.lib shell32.lib ddraw.lib dsound.lib dinput.lib d3dx.lib /link /section:.text,erw diff --git a/zsnes/src/makefile.win b/zsnes/src/makefile.win index 5207e397..cf242f63 100644 --- a/zsnes/src/makefile.win +++ b/zsnes/src/makefile.win @@ -15,13 +15,13 @@ #along with this program; if not, write to the Free Software #Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -#This file depends on link.win32 so if you modify this makefile, +#This file depends on link.win32 so if you modify this makefile, #don't forget to update link.win32 #it is quite ugly but without that trick, the compilation didn't #work for me (command line too long) #put the correct path to your directx sdk here -DXDIR=f:\mssdk +DXDIR=c:\dxvcsdk CHIPDIR=chips CPUDIR=cpu @@ -30,7 +30,6 @@ GUIDIR=gui VIDEODIR=video WINDIR=win OBJDIR=obj -ZIPDIR=zip CHIPSOBJ=${CHIPDIR}/sfxproc.obj ${CHIPDIR}/fxemu2.obj ${CHIPDIR}/dsp1proc.obj\ ${CHIPDIR}/fxemu2b.obj ${CHIPDIR}/fxemu2c.obj ${CHIPDIR}/fxtable.obj\ @@ -39,7 +38,7 @@ CHIPSOBJ=${CHIPDIR}/sfxproc.obj ${CHIPDIR}/fxemu2.obj ${CHIPDIR}/dsp1proc.obj\ CPUOBJ=${CPUDIR}/addrni.obj ${CPUDIR}/dma.obj ${CPUDIR}/dsp.obj ${CPUDIR}/dspproc.obj\ ${CPUDIR}/execute.obj ${CPUDIR}/irq.obj ${CPUDIR}/memory.obj \ ${CPUDIR}/spc700.obj ${CPUDIR}/stable.obj ${CPUDIR}/table.obj\ - ${CPUDIR}/tableb.obj ${CPUDIR}/tablec.obj + ${CPUDIR}/tableb.obj ${CPUDIR}/tablec.obj GUIOBJ=${GUIDIR}/gui.obj ${GUIDIR}/menu.obj @@ -48,7 +47,7 @@ VIDEOBJ=${VIDEODIR}/makev16b.obj ${VIDEODIR}/makev16t.obj ${VIDEODIR}/makevid.ob ${VIDEODIR}/mode716e.obj ${VIDEODIR}/mode716t.obj ${VIDEODIR}/mode7.obj\ ${VIDEODIR}/mode7ext.obj ${VIDEODIR}/mv16tms.obj ${VIDEODIR}/newg162.obj\ ${VIDEODIR}/newgfx16.obj ${VIDEODIR}/newgfx2.obj ${VIDEODIR}/newgfx.obj\ - ${VIDEODIR}/m716text.obj ${VIDEODIR}/2xsaiw.obj ${VIDEODIR}/2xsai.obj\ + ${VIDEODIR}/m716text.obj ${VIDEODIR}/2xsaiw.obj \ ${VIDEODIR}/procvid.obj WINOBJ=${WINDIR}/copyvwin.obj ${WINDIR}/winintrf.obj ${WINDIR}/winlink.obj\ @@ -56,26 +55,23 @@ WINOBJ=${WINDIR}/copyvwin.obj ${WINDIR}/winintrf.obj ${WINDIR}/winlink.obj\ ${WINDIR}/zfilew.obj WINDOSOBJ=${DOSDIR}/debug.obj ${DOSDIR}/joy.obj ${DOSDIR}/modemrtn.obj ${DOSDIR}/vesa2.obj\ - ${DOSDIR}/initvid.obj ${DOSDIR}/sw.obj ${DOSDIR}/gppro.obj ${DOSDIR}/vesa12.obj\ - ${DOSDIR}/zsipx.obj + ${DOSDIR}/initvid.obj ${DOSDIR}/sw.obj ${DOSDIR}/gppro.obj ${DOSDIR}/vesa12.obj -PREOBJ= - -ZIPOBJ=${ZIPDIR}/zzip.obj ${ZIPDIR}/unzip.obj +PREOBJ=${OBJDIR}/unzip.obj ${OBJDIR}/zzip.obj ${DOSDIR}/zsipx.obj MAINOBJ=cfgload.obj endmem.obj fixsin.obj init.obj ui.obj vcache.obj water.obj + - -OBJS=${CHIPSOBJ} ${CPUOBJ} ${WINOBJ} ${GUIOBJ} ${VIDEOBJ} ${ZIPOBJ} ${MAINOBJ} ${WINDOSOBJ} +OBJS=${CHIPSOBJ} ${CPUOBJ} ${WINOBJ} ${GUIOBJ} ${VIDEOBJ} ${PREOBJ} ${MAINOBJ} ${WINDOSOBJ} LIBS= CFLAGS=/c ASM=nasm ASMFLAGS=-f win32 CC=cl -.SUFFIXES: .c .cpp .asm - +.SUFFIXES: .c .cpp .asm + %.obj : %.c ${CC} ${CFLAGS} /Fo$@ $< @@ -100,8 +96,6 @@ ${WINDIR}/zipxw.obj: ${WINDIR}/zipxw.c ${WINDIR}/zloaderw.obj: ${WINDIR}/zloaderw.c ${WINDIR}/ztcp.obj: ${WINDIR}/ztcp.c ${WINDIR}/winlink.obj: ${WINDIR}/winlink.cpp ${WINDIR}/resource.h -${ZIPDIR}/unzip.obj: ${ZIPDIR}/unzip.c ${ZIPDIR}/unzip.h -${ZIPDIR}/zzip.obj: ${ZIPDIR}/zzip.c ${ZIPDIR}/unzip.h ${DOSDIR}/initvid.o:${DOSDIR}/initvid.asm macros.mac ${DOSDIR}/modemrtn.o: ${DOSDIR}/modemrtn.asm macros.mac ${DOSDIR}/zsipx.o: ${DOSDIR}/zsipx.asm @@ -114,7 +108,6 @@ ${DOSDIR}/vesa12.o: ${DOSDIR}/vesa12.asm macros.mac fixsin.obj: fixsin.c water.obj: water.c ${VIDEODIR}/2xsaiw.obj: ${VIDEODIR}/2xsaiw.asm macros.mac -${VIDEODIR}/2xsai.obj: ${VIDEODIR}/2xsai.cpp ${VIDEODIR}/procvid.obj: ${VIDEODIR}/procvid.asm macros.mac ${VIDEODIR}/copyvid.inc ${VIDEODIR}/2xSaImmx.inc ${CHIPDIR}/sa1regs.obj: ${CHIPDIR}/sa1regs.asm macros.mac\ ${CPUDIR}/regs.mac ${CPUDIR}/regsw.mac @@ -214,4 +207,6 @@ clean: del ${WINDIR}\zsnes.res del ${DOSDIR}\*.obj del zsnesw.exe + + diff --git a/zsnes/src/video/2xsaimmx.inc b/zsnes/src/video/2xsaimmx.inc index 4184d1b0..42881620 100644 --- a/zsnes/src/video/2xsaimmx.inc +++ b/zsnes/src/video/2xsaimmx.inc @@ -89,6 +89,716 @@ colorA3 equ 4 + + + + + + +NEWSYM _2xSaISuper2xSaILine +; Store some stuff + push ebp + mov ebp, esp + pushad + +; Prepare the destination +%ifdef __DJGPP__ + ; Set the selector + mov eax, [ebp+dstSegment] + mov fs, ax +%endif + mov edx, [ebp+dstOffset] ; edx points to the screen +; Prepare the source + ; eax points to colorA + mov eax, [ebp+srcPtr] ;eax points to colorA + mov ebx, [ebp+srcPitch] ;ebx contains the source pitch + mov ecx, [ebp+width] ;ecx contains the number of pixels to process + ; eax now points to colorB1 + sub eax, ebx ;eax points to B1 which is the base + +; Main Loop +.Loop: push ecx + + ;-----Check Delta------------------ + mov ecx, [ebp+deltaPtr] + + + ;load source img + movq mm0, [eax+colorB0] + movq mm1, [eax+colorB3] + movq mm2, [eax+ebx+color4] + movq mm3, [eax+ebx+colorS2] + movq mm4, [eax+ebx+ebx+color1] + movq mm5, [eax+ebx+ebx+colorS1] + push eax + add eax, ebx + movq mm6, [eax+ebx+ebx+colorA0] + movq mm7, [eax+ebx+ebx+colorA3] + pop eax + + ;compare to delta + pcmpeqw mm0, [ecx+2+colorB0] + pcmpeqw mm1, [ecx+2+colorB3] + pcmpeqw mm2, [ecx+ebx+2+color4] + pcmpeqw mm3, [ecx+ebx+2+colorS2] + pcmpeqw mm4, [ecx+ebx+ebx+2+color1] + pcmpeqw mm5, [ecx+ebx+ebx+2+colorS1] + add ecx, ebx + pcmpeqw mm6, [ecx+ebx+ebx+2+colorA0] + pcmpeqw mm7, [ecx+ebx+ebx+2+colorA3] + sub ecx, ebx + + + ;compose results + pand mm0, mm1 + pand mm2, mm3 + pand mm4, mm5 + pand mm6, mm7 + pand mm0, mm2 + pand mm4, mm6 + pxor mm7, mm7 + pand mm0, mm4 + movq mm6, [eax+colorB0] + pcmpeqw mm7, mm0 ;did any compare give us a zero ? + + movq [ecx+2+colorB0], mm6 + + packsswb mm7, mm7 + movd ecx, mm7 + test ecx, ecx + jz near .SKIP_PROCESS ;no, so we can skip + + ;End Delta + + ;--------------------------------- + movq mm0, [eax+ebx+color5] + movq mm1, [eax+ebx+color6] + movq mm2, mm0 + movq mm3, mm1 + movq mm4, mm0 + movq mm5, mm1 + + pand mm0, [colorMask] + pand mm1, [colorMask] + + psrlw mm0, 1 + psrlw mm1, 1 + + pand mm3, [lowPixelMask] + paddw mm0, mm1 + + pand mm3, mm2 + paddw mm0, mm3 ;mm0 contains the interpolated values + movq [I56Pixel], mm0 + movq mm7, mm0 + + ;------------------- + movq mm0, mm7 + movq mm1, mm4 ;5,5,5,6 + movq mm2, mm0 + movq mm3, mm1 + + pand mm0, [colorMask] + pand mm1, [colorMask] + + psrlw mm0, 1 + psrlw mm1, 1 + + pand mm3, [lowPixelMask] + paddw mm0, mm1 + + pand mm3, mm2 + paddw mm0, mm3 ;mm0 contains the interpolated values + movq [I5556Pixel], mm0 + ;-------------------- + + movq mm0, mm7 + movq mm1, mm5 ;6,6,6,5 + movq mm2, mm0 + movq mm3, mm1 + + pand mm0, [colorMask] + pand mm1, [colorMask] + + psrlw mm0, 1 + psrlw mm1, 1 + + pand mm3, [lowPixelMask] + paddw mm0, mm1 + + pand mm3, mm2 + paddw mm0, mm3 + movq [I5666Pixel], mm0 + + ;------------------------- + ;------------------------- + movq mm0, [eax+ebx+ebx+color2] + movq mm1, [eax+ebx+ebx+color3] + movq mm2, mm0 + movq mm3, mm1 + movq mm4, mm0 + movq mm5, mm1 + + pand mm0, [colorMask] + pand mm1, [colorMask] + + psrlw mm0, 1 + psrlw mm1, 1 + + pand mm3, [lowPixelMask] + paddw mm0, mm1 + + pand mm3, mm2 + paddw mm0, mm3 + movq [I23Pixel], mm0 + movq mm7, mm0 + + ;--------------------- + movq mm0, mm7 + movq mm1, mm4 ;2,2,2,3 + movq mm2, mm0 + movq mm3, mm1 + + pand mm0, [colorMask] + pand mm1, [colorMask] + + psrlw mm0, 1 + psrlw mm1, 1 + + pand mm3, [lowPixelMask] + paddw mm0, mm1 + + pand mm3, mm2 + paddw mm0, mm3 + movq [I2223Pixel], mm0 + + ;---------------------- + movq mm0, mm7 + movq mm1, mm5 ;3,3,3,2 + movq mm2, mm0 + movq mm3, mm1 + + pand mm0, [colorMask] + pand mm1, [colorMask] + + psrlw mm0, 1 + psrlw mm1, 1 + + pand mm3, [lowPixelMask] + paddw mm0, mm1 + + pand mm3, mm2 + paddw mm0, mm3 + movq [I2333Pixel], mm0 + + + ;-------------------- +;//////////////////////////////// +; Decide which "branch" to take +;-------------------------------- + movq mm0, [eax+ebx+color5] + movq mm1, [eax+ebx+color6] + movq mm6, mm0 + movq mm7, mm1 + pcmpeqw mm0, [eax+ebx+ebx+color3] + pcmpeqw mm1, [eax+ebx+ebx+color2] + pcmpeqw mm6, mm7 + + movq mm2, mm0 + movq mm3, mm0 + + pand mm0, mm1 ;colorA == colorD && colorB == colorC + pxor mm7, mm7 + + pcmpeqw mm2, mm7 + pand mm6, mm0 + pand mm2, mm1 ;colorA != colorD && colorB == colorC + + pcmpeqw mm1, mm7 + + pand mm1, mm3 ;colorA == colorD && colorB != colorC + pxor mm0, mm6 + por mm1, mm6 + movq mm7, mm0 + movq [Mask26], mm2 + packsswb mm7, mm7 + movq [Mask35], mm1 + + movd ecx, mm7 + test ecx, ecx + jz near .SKIP_GUESS + +;--------------------------------------------- + movq mm6, mm0 + movq mm4, [eax+ebx+colorA] + movq mm5, [eax+ebx+colorB] + pxor mm7, mm7 + pand mm6, [ONE] + + movq mm0, [eax+colorE] + movq mm1, [eax+ebx+colorG] + movq mm2, mm0 + movq mm3, mm1 + pcmpeqw mm0, mm4 + pcmpeqw mm1, mm4 + pcmpeqw mm2, mm5 + pcmpeqw mm3, mm5 + pand mm0, mm6 + pand mm1, mm6 + pand mm2, mm6 + pand mm3, mm6 + paddw mm0, mm1 + paddw mm2, mm3 + + pxor mm3, mm3 + pcmpgtw mm0, mm6 + pcmpgtw mm2, mm6 + pcmpeqw mm0, mm3 + pcmpeqw mm2, mm3 + pand mm0, mm6 + pand mm2, mm6 + paddw mm7, mm0 + psubw mm7, mm2 + + movq mm0, [eax+colorF] + movq mm1, [eax+ebx+colorK] + movq mm2, mm0 + movq mm3, mm1 + pcmpeqw mm0, mm4 + pcmpeqw mm1, mm4 + pcmpeqw mm2, mm5 + pcmpeqw mm3, mm5 + pand mm0, mm6 + pand mm1, mm6 + pand mm2, mm6 + pand mm3, mm6 + paddw mm0, mm1 + paddw mm2, mm3 + + pxor mm3, mm3 + pcmpgtw mm0, mm6 + pcmpgtw mm2, mm6 + pcmpeqw mm0, mm3 + pcmpeqw mm2, mm3 + pand mm0, mm6 + pand mm2, mm6 + paddw mm7, mm0 + psubw mm7, mm2 + + push eax + add eax, ebx + movq mm0, [eax+ebx+colorH] + movq mm1, [eax+ebx+ebx+colorN] + movq mm2, mm0 + movq mm3, mm1 + pcmpeqw mm0, mm4 + pcmpeqw mm1, mm4 + pcmpeqw mm2, mm5 + pcmpeqw mm3, mm5 + pand mm0, mm6 + pand mm1, mm6 + pand mm2, mm6 + pand mm3, mm6 + paddw mm0, mm1 + paddw mm2, mm3 + + pxor mm3, mm3 + pcmpgtw mm0, mm6 + pcmpgtw mm2, mm6 + pcmpeqw mm0, mm3 + pcmpeqw mm2, mm3 + pand mm0, mm6 + pand mm2, mm6 + paddw mm7, mm0 + psubw mm7, mm2 + + movq mm0, [eax+ebx+colorL] + movq mm1, [eax+ebx+ebx+colorO] + movq mm2, mm0 + movq mm3, mm1 + pcmpeqw mm0, mm4 + pcmpeqw mm1, mm4 + pcmpeqw mm2, mm5 + pcmpeqw mm3, mm5 + pand mm0, mm6 + pand mm1, mm6 + pand mm2, mm6 + pand mm3, mm6 + paddw mm0, mm1 + paddw mm2, mm3 + + pxor mm3, mm3 + pcmpgtw mm0, mm6 + pcmpgtw mm2, mm6 + pcmpeqw mm0, mm3 + pcmpeqw mm2, mm3 + pand mm0, mm6 + pand mm2, mm6 + paddw mm7, mm0 + psubw mm7, mm2 + + pop eax + movq mm1, mm7 + pxor mm0, mm0 + pcmpgtw mm7, mm0 + pcmpgtw mm0, mm1 + + por mm7, [Mask35] + por mm0, [Mask26] + movq [Mask35], mm7 + movq [Mask26], mm0 + +.SKIP_GUESS: + + ;Start the ASSEMBLY !!! eh... compose all the results together to form the final image... + + + movq mm0, [eax+ebx+color5] + movq mm1, [eax+ebx+ebx+color2] + movq mm2, mm0 + movq mm3, mm1 + movq mm4, mm0 + movq mm5, mm1 + + pand mm0, [colorMask] + pand mm1, [colorMask] + + psrlw mm0, 1 + psrlw mm1, 1 + + pand mm3, [lowPixelMask] + paddw mm0, mm1 + + pand mm3, mm2 + paddw mm0, mm3 ;mm0 contains the interpolated values + ;--------------------------- + + + +%ifdef dfhsdfhsdahdsfhdsfh + + if (color5 == color3 && color2 != color6 && color4 == color5 && color5 != colorA2) + product2a = INTERPOLATE (color2, color5); + else + if (color5 == color1 && color6 == color5 && color4 != color2 && color5 != colorA0) + product2a = INTERPOLATE(color2, color5); + else + product2a = color2; + + if (color2 == color6 && color5 != color3 && color1 == color2 && color2 != colorB2) + product1a = INTERPOLATE (color2, color5); + else + if (color4 == color2 && color3 == color2 && color1 != color5 && color2 != colorB0) + product1a = INTERPOLATE(color2, color5); + else + product1a = color5; + +%endif + + + movq mm7, [Mask26] + movq mm6, [eax+colorB2] + movq mm5, [eax+ebx+ebx+color2] + movq mm4, [eax+ebx+ebx+color1] + pcmpeqw mm4, mm5 + pcmpeqw mm6, mm5 + pxor mm5, mm5 + pand mm7, mm4 + pcmpeqw mm6, mm5 + pand mm7, mm6 + + + + movq mm6, [eax+ebx+ebx+color3] + movq mm5, [eax+ebx+ebx+color2] + movq mm4, [eax+ebx+ebx+color1] + movq mm2, [eax+ebx+color5] + movq mm1, [eax+ebx+color4] + movq mm3, [eax+colorB0] + + pcmpeqw mm2, mm4 + pcmpeqw mm6, mm5 + pcmpeqw mm1, mm5 + pcmpeqw mm3, mm5 + pxor mm5, mm5 + pcmpeqw mm2, mm5 + pcmpeqw mm3, mm5 + pand mm6, mm1 + pand mm2, mm3 + pand mm6, mm2 + por mm7, mm6 + + + movq mm6, mm7 + pcmpeqw mm6, mm5 + pand mm7, mm0 + + movq mm1, [eax+ebx+color5] + pand mm6, mm1 + por mm7, mm6 + movq [final1a], mm7 ;finished 1a + + + + ;-------------------------------- + + movq mm7, [Mask35] + push eax + add eax, ebx + movq mm6, [eax+ebx+ebx+colorA2] + pop eax + movq mm5, [eax+ebx+color5] + movq mm4, [eax+ebx+color4] + pcmpeqw mm4, mm5 + pcmpeqw mm6, mm5 + pxor mm5, mm5 + pand mm7, mm4 + pcmpeqw mm6, mm5 + pand mm7, mm6 + + + + movq mm6, [eax+ebx+color6] + movq mm5, [eax+ebx+color5] + movq mm4, [eax+ebx+color4] + movq mm2, [eax+ebx+ebx+color2] + movq mm1, [eax+ebx+ebx+color1] + push eax + add eax, ebx + movq mm3, [eax+ebx+ebx+colorA0] + pop eax + + pcmpeqw mm2, mm4 + pcmpeqw mm6, mm5 + pcmpeqw mm1, mm5 + pcmpeqw mm3, mm5 + pxor mm5, mm5 + pcmpeqw mm2, mm5 + pcmpeqw mm3, mm5 + pand mm6, mm1 + pand mm2, mm3 + pand mm6, mm2 + por mm7, mm6 + + + movq mm6, mm7 + pcmpeqw mm6, mm5 + pand mm7, mm0 + + movq mm1, [eax+ebx+ebx+color2] + pand mm6, mm1 + por mm7, mm6 + movq [final2a], mm7 ;finished 2a + + + ;-------------------------------------------- + + +%ifdef dfhsdfhsdahdsfhdsfh + if (color6 == color3 && color3 == colorA1 && color2 != colorA2 && color3 != colorA0) + product2b = Q_INTERPOLATE (color3, color3, color3, color2); + else + if (color5 == color2 && color2 == colorA2 && colorA1 != color3 && color2 != colorA3) + product2b = Q_INTERPOLATE (color2, color2, color2, color3); + else + product2b = INTERPOLATE (color2, color3); + + if (color6 == color3 && color6 == colorB1 && color5 != colorB2 && color6 != colorB0) + product1b = Q_INTERPOLATE (color6, color6, color6, color5); + else + if (color5 == color2 && color5 == colorB2 && colorB1 != color6 && color5 != colorB3) + product1b = Q_INTERPOLATE (color6, color5, color5, color5); + else + product1b = INTERPOLATE (color5, color6); +%endif + + push eax + add eax, ebx + pxor mm7, mm7 + movq mm0, [eax+ebx+ebx+colorA0] + movq mm1, [eax+ebx+ebx+colorA1] + movq mm2, [eax+ebx+ebx+colorA2] + movq mm3, [eax+ebx+ebx+colorA3] + pop eax + movq mm4, [eax+ebx+ebx+color2] + movq mm5, [eax+ebx+ebx+color3] + movq mm6, [eax+ebx+color6] + + pcmpeqw mm6, mm5 + pcmpeqw mm1, mm5 + pcmpeqw mm4, mm2 + pcmpeqw mm0, mm5 + pcmpeqw mm4, mm7 + pcmpeqw mm0, mm7 + pand mm0, mm4 + pand mm6, mm1 + pand mm0, mm6 + + movq mm4, [eax+ebx+color2] + movq mm5, [eax+ebx+ebx+color5] + movq mm6, [eax+ebx+ebx+color3] + + pcmpeqw mm5, mm4 + pcmpeqw mm2, mm4 + pcmpeqw mm1, mm6 + pcmpeqw mm3, mm4 + pcmpeqw mm1, mm7 + pcmpeqw mm3, mm7 + pand mm2, mm5 + pand mm1, mm3 + pand mm1, mm2 + + movq mm2, mm0 + movq mm7, [I2333Pixel] + movq mm6, [I2223Pixel] + movq mm5, [I23Pixel] + movq mm4, [Mask35] + movq mm3, [Mask26] + + por mm2, mm4 + pand mm4, [eax+ebx+ebx+color3] + por mm2, mm3 + pand mm3, [eax+ebx+ebx+color2] + por mm2, mm1 + pand mm0, mm7 + pand mm1, mm6 + pxor mm7, mm7 + pcmpeqw mm2, mm7 + por mm0, mm1 + por mm3, mm4 + pand mm2, mm5 + por mm0, mm3 + por mm0, mm2 + movq [final2b], mm0 + + ;----------------------------------- + + + pxor mm7, mm7 + movq mm0, [eax+colorB0] + movq mm1, [eax+colorB1] + movq mm2, [eax+colorB2] + movq mm3, [eax+colorB3] + movq mm4, [eax+ebx+color5] + movq mm5, [eax+ebx+color6] + movq mm6, [eax+ebx+ebx+color3] + + pcmpeqw mm6, mm5 + pcmpeqw mm1, mm5 + pcmpeqw mm4, mm2 + pcmpeqw mm0, mm5 + pcmpeqw mm4, mm7 + pcmpeqw mm0, mm7 + pand mm0, mm4 + pand mm6, mm1 + pand mm0, mm6 + + movq mm4, [eax+ebx+color5] + movq mm5, [eax+ebx+ebx+color2] + movq mm6, [eax+ebx+color6] + + pcmpeqw mm5, mm4 + pcmpeqw mm2, mm4 + pcmpeqw mm1, mm6 + pcmpeqw mm3, mm4 + pcmpeqw mm1, mm7 + pcmpeqw mm3, mm7 + pand mm2, mm5 + pand mm1, mm3 + pand mm1, mm2 + + movq mm2, mm0 + movq mm7, [I5666Pixel] + movq mm6, [I5556Pixel] + movq mm5, [I56Pixel] + movq mm4, [Mask35] + movq mm3, [Mask26] + + por mm2, mm4 + pand mm4, [eax+ebx+color5] + por mm2, mm3 + pand mm3, [eax+ebx+color6] + por mm2, mm1 + pand mm0, mm7 + pand mm1, mm6 + pxor mm7, mm7 + pcmpeqw mm2, mm7 + por mm0, mm1 + por mm3, mm4 + pand mm2, mm5 + por mm0, mm3 + por mm0, mm2 + movq [final1b], mm0 + + ;--------- + + movq mm0, [final1a] + movq mm4, [final2a] + movq mm2, [final1b] + movq mm6, [final2b] + + movq mm1, mm0 + movq mm5, mm4 + + + punpcklwd mm0, mm2 + punpckhwd mm1, mm2 + + punpcklwd mm4, mm6 + punpckhwd mm5, mm6 + + +%ifdef FAR_POINTER + movq [fs:edx], mm0 + movq [fs:edx+8], mm1 + push edx + add edx, [ebp+dstPitch] + movq [fs:edx], mm4 + movq [fs:edx+8], mm5 + pop edx +%else + movq [es:edx], mm0 + movq [es:edx+8], mm1 + push edx + add edx, [ebp+dstPitch] + movq [es:edx], mm4 + movq [es:edx+8], mm5 + pop edx +%endif +.SKIP_PROCESS: + mov ecx, [ebp+deltaPtr] + add ecx, 8 + mov [ebp+deltaPtr], ecx + add edx, 16 + add eax, 8 + + pop ecx + sub ecx, 4 + cmp ecx, 0 + jg near .Loop + +; Restore some stuff + popad + mov esp, ebp + pop ebp + emms + ret + + +;------------------------------------------------------------------------- +;------------------------------------------------------------------------- +;------------------------------------------------------------------------- +;------------------------------------------------------------------------- +;------------------------------------------------------------------------- +;------------------------------------------------------------------------- +;------------------------------------------------------------------------- + + + + + + + NEWSYM _2xSaISuperEagleLine ; Store some stuff push ebp @@ -1337,21 +2047,30 @@ FALSE dd 0x00000000, 0x00000000 TRUE dd 0xffffffff, 0xffffffff ONE dd 0x00010001, 0x00010001 -ACPixel times 8 db 0 -Mask1 times 8 db 0 -Mask2 times 8 db 0 -I56Pixel times 8 db 0 -I23Pixel times 8 db 0 -Mask26 times 8 db 0 -Mask35 times 8 db 0 -Mask26b times 8 db 0 -Mask35b times 8 db 0 -product1a times 8 db 0 -product1b times 8 db 0 -product2a times 8 db 0 -product2b times 8 db 0 -final1a times 8 db 0 -final1b times 8 db 0 -final2a times 8 db 0 -final2b times 8 db 0 + SECTION .bss ALIGN = 32 +ACPixel resb 8 +Mask1 resb 8 +Mask2 resb 8 + +I56Pixel resb 8 +I23Pixel resb 8 +I5556Pixel resb 8 +I2223Pixel resb 8 +I5666Pixel resb 8 +I2333Pixel resb 8 +Mask26 resb 8 +Mask35 resb 8 +Mask26b resb 8 +Mask35b resb 8 +product1a resb 8 +product1b resb 8 +product2a resb 8 +product2b resb 8 +final1a resb 8 +final1b resb 8 +final2a resb 8 +final2b resb 8 + + + diff --git a/zsnes/src/video/2xsaiw.asm b/zsnes/src/video/2xsaiw.asm index c504ce66..56e37404 100644 --- a/zsnes/src/video/2xsaiw.asm +++ b/zsnes/src/video/2xsaiw.asm @@ -17,7 +17,7 @@ %include "macros.mac" -NEWSYM TwoxSaiWAsmStart + ;/*---------------------------------------------------------------------* ; * The following (piece of) code, (part of) the 2xSaI engine, * ; * copyright (c) 1999 by Derek Liauw Kie Fa. * @@ -74,6 +74,707 @@ colorA2 equ 2 colorA3 equ 4 +NEWSYM _2xSaISuper2xSaILineW +; Store some stuff + push ebp + mov ebp, esp + pushad + +; Prepare the destination +%ifdef __DJGPP__ + ; Set the selector + mov eax, [ebp+dstSegment] + mov fs, ax +%endif + mov edx, [ebp+dstOffset] ; edx points to the screen +; Prepare the source + ; eax points to colorA + mov eax, [ebp+srcPtr] ;eax points to colorA + mov ebx, [ebp+srcPitch] ;ebx contains the source pitch + mov ecx, [ebp+width] ;ecx contains the number of pixels to process + ; eax now points to colorB1 + sub eax, ebx ;eax points to B1 which is the base + +; Main Loop +.Loop: push ecx + + ;-----Check Delta------------------ + mov ecx, [ebp+deltaPtr] + + + ;load source img + movq mm0, [eax+colorB0] + movq mm1, [eax+colorB3] + movq mm2, [eax+ebx+color4] + movq mm3, [eax+ebx+colorS2] + movq mm4, [eax+ebx+ebx+color1] + movq mm5, [eax+ebx+ebx+colorS1] + push eax + add eax, ebx + movq mm6, [eax+ebx+ebx+colorA0] + movq mm7, [eax+ebx+ebx+colorA3] + pop eax + + ;compare to delta + pcmpeqw mm0, [ecx+2+colorB0] + pcmpeqw mm1, [ecx+2+colorB3] + pcmpeqw mm2, [ecx+ebx+2+color4] + pcmpeqw mm3, [ecx+ebx+2+colorS2] + pcmpeqw mm4, [ecx+ebx+ebx+2+color1] + pcmpeqw mm5, [ecx+ebx+ebx+2+colorS1] + add ecx, ebx + pcmpeqw mm6, [ecx+ebx+ebx+2+colorA0] + pcmpeqw mm7, [ecx+ebx+ebx+2+colorA3] + sub ecx, ebx + + + ;compose results + pand mm0, mm1 + pand mm2, mm3 + pand mm4, mm5 + pand mm6, mm7 + pand mm0, mm2 + pand mm4, mm6 + pxor mm7, mm7 + pand mm0, mm4 + movq mm6, [eax+colorB0] + pcmpeqw mm7, mm0 ;did any compare give us a zero ? + + movq [ecx+2+colorB0], mm6 + + packsswb mm7, mm7 + movd ecx, mm7 + test ecx, ecx + jz near .SKIP_PROCESS ;no, so we can skip + + ;End Delta + + ;--------------------------------- + movq mm0, [eax+ebx+color5] + movq mm1, [eax+ebx+color6] + movq mm2, mm0 + movq mm3, mm1 + movq mm4, mm0 + movq mm5, mm1 + + pand mm0, [colorMask] + pand mm1, [colorMask] + + psrlw mm0, 1 + psrlw mm1, 1 + + pand mm3, [lowPixelMask] + paddw mm0, mm1 + + pand mm3, mm2 + paddw mm0, mm3 ;mm0 contains the interpolated values + movq [I56Pixel], mm0 + movq mm7, mm0 + + ;------------------- + movq mm0, mm7 + movq mm1, mm4 ;5,5,5,6 + movq mm2, mm0 + movq mm3, mm1 + + pand mm0, [colorMask] + pand mm1, [colorMask] + + psrlw mm0, 1 + psrlw mm1, 1 + + pand mm3, [lowPixelMask] + paddw mm0, mm1 + + pand mm3, mm2 + paddw mm0, mm3 ;mm0 contains the interpolated values + movq [I5556Pixel], mm0 + ;-------------------- + + movq mm0, mm7 + movq mm1, mm5 ;6,6,6,5 + movq mm2, mm0 + movq mm3, mm1 + + pand mm0, [colorMask] + pand mm1, [colorMask] + + psrlw mm0, 1 + psrlw mm1, 1 + + pand mm3, [lowPixelMask] + paddw mm0, mm1 + + pand mm3, mm2 + paddw mm0, mm3 + movq [I5666Pixel], mm0 + + ;------------------------- + ;------------------------- + movq mm0, [eax+ebx+ebx+color2] + movq mm1, [eax+ebx+ebx+color3] + movq mm2, mm0 + movq mm3, mm1 + movq mm4, mm0 + movq mm5, mm1 + + pand mm0, [colorMask] + pand mm1, [colorMask] + + psrlw mm0, 1 + psrlw mm1, 1 + + pand mm3, [lowPixelMask] + paddw mm0, mm1 + + pand mm3, mm2 + paddw mm0, mm3 + movq [I23Pixel], mm0 + movq mm7, mm0 + + ;--------------------- + movq mm0, mm7 + movq mm1, mm4 ;2,2,2,3 + movq mm2, mm0 + movq mm3, mm1 + + pand mm0, [colorMask] + pand mm1, [colorMask] + + psrlw mm0, 1 + psrlw mm1, 1 + + pand mm3, [lowPixelMask] + paddw mm0, mm1 + + pand mm3, mm2 + paddw mm0, mm3 + movq [I2223Pixel], mm0 + + ;---------------------- + movq mm0, mm7 + movq mm1, mm5 ;3,3,3,2 + movq mm2, mm0 + movq mm3, mm1 + + pand mm0, [colorMask] + pand mm1, [colorMask] + + psrlw mm0, 1 + psrlw mm1, 1 + + pand mm3, [lowPixelMask] + paddw mm0, mm1 + + pand mm3, mm2 + paddw mm0, mm3 + movq [I2333Pixel], mm0 + + + ;-------------------- +;//////////////////////////////// +; Decide which "branch" to take +;-------------------------------- + movq mm0, [eax+ebx+color5] + movq mm1, [eax+ebx+color6] + movq mm6, mm0 + movq mm7, mm1 + pcmpeqw mm0, [eax+ebx+ebx+color3] + pcmpeqw mm1, [eax+ebx+ebx+color2] + pcmpeqw mm6, mm7 + + movq mm2, mm0 + movq mm3, mm0 + + pand mm0, mm1 ;colorA == colorD && colorB == colorC + pxor mm7, mm7 + + pcmpeqw mm2, mm7 + pand mm6, mm0 + pand mm2, mm1 ;colorA != colorD && colorB == colorC + + pcmpeqw mm1, mm7 + + pand mm1, mm3 ;colorA == colorD && colorB != colorC + pxor mm0, mm6 + por mm1, mm6 + movq mm7, mm0 + movq [Mask26], mm2 + packsswb mm7, mm7 + movq [Mask35], mm1 + + movd ecx, mm7 + test ecx, ecx + jz near .SKIP_GUESS + +;--------------------------------------------- + movq mm6, mm0 + movq mm4, [eax+ebx+colorA] + movq mm5, [eax+ebx+colorB] + pxor mm7, mm7 + pand mm6, [ONE] + + movq mm0, [eax+colorE] + movq mm1, [eax+ebx+colorG] + movq mm2, mm0 + movq mm3, mm1 + pcmpeqw mm0, mm4 + pcmpeqw mm1, mm4 + pcmpeqw mm2, mm5 + pcmpeqw mm3, mm5 + pand mm0, mm6 + pand mm1, mm6 + pand mm2, mm6 + pand mm3, mm6 + paddw mm0, mm1 + paddw mm2, mm3 + + pxor mm3, mm3 + pcmpgtw mm0, mm6 + pcmpgtw mm2, mm6 + pcmpeqw mm0, mm3 + pcmpeqw mm2, mm3 + pand mm0, mm6 + pand mm2, mm6 + paddw mm7, mm0 + psubw mm7, mm2 + + movq mm0, [eax+colorF] + movq mm1, [eax+ebx+colorK] + movq mm2, mm0 + movq mm3, mm1 + pcmpeqw mm0, mm4 + pcmpeqw mm1, mm4 + pcmpeqw mm2, mm5 + pcmpeqw mm3, mm5 + pand mm0, mm6 + pand mm1, mm6 + pand mm2, mm6 + pand mm3, mm6 + paddw mm0, mm1 + paddw mm2, mm3 + + pxor mm3, mm3 + pcmpgtw mm0, mm6 + pcmpgtw mm2, mm6 + pcmpeqw mm0, mm3 + pcmpeqw mm2, mm3 + pand mm0, mm6 + pand mm2, mm6 + paddw mm7, mm0 + psubw mm7, mm2 + + push eax + add eax, ebx + movq mm0, [eax+ebx+colorH] + movq mm1, [eax+ebx+ebx+colorN] + movq mm2, mm0 + movq mm3, mm1 + pcmpeqw mm0, mm4 + pcmpeqw mm1, mm4 + pcmpeqw mm2, mm5 + pcmpeqw mm3, mm5 + pand mm0, mm6 + pand mm1, mm6 + pand mm2, mm6 + pand mm3, mm6 + paddw mm0, mm1 + paddw mm2, mm3 + + pxor mm3, mm3 + pcmpgtw mm0, mm6 + pcmpgtw mm2, mm6 + pcmpeqw mm0, mm3 + pcmpeqw mm2, mm3 + pand mm0, mm6 + pand mm2, mm6 + paddw mm7, mm0 + psubw mm7, mm2 + + movq mm0, [eax+ebx+colorL] + movq mm1, [eax+ebx+ebx+colorO] + movq mm2, mm0 + movq mm3, mm1 + pcmpeqw mm0, mm4 + pcmpeqw mm1, mm4 + pcmpeqw mm2, mm5 + pcmpeqw mm3, mm5 + pand mm0, mm6 + pand mm1, mm6 + pand mm2, mm6 + pand mm3, mm6 + paddw mm0, mm1 + paddw mm2, mm3 + + pxor mm3, mm3 + pcmpgtw mm0, mm6 + pcmpgtw mm2, mm6 + pcmpeqw mm0, mm3 + pcmpeqw mm2, mm3 + pand mm0, mm6 + pand mm2, mm6 + paddw mm7, mm0 + psubw mm7, mm2 + + pop eax + movq mm1, mm7 + pxor mm0, mm0 + pcmpgtw mm7, mm0 + pcmpgtw mm0, mm1 + + por mm7, [Mask35] + por mm0, [Mask26] + movq [Mask35], mm7 + movq [Mask26], mm0 + +.SKIP_GUESS: + + ;Start the ASSEMBLY !!! eh... compose all the results together to form the final image... + + + movq mm0, [eax+ebx+color5] + movq mm1, [eax+ebx+ebx+color2] + movq mm2, mm0 + movq mm3, mm1 + movq mm4, mm0 + movq mm5, mm1 + + pand mm0, [colorMask] + pand mm1, [colorMask] + + psrlw mm0, 1 + psrlw mm1, 1 + + pand mm3, [lowPixelMask] + paddw mm0, mm1 + + pand mm3, mm2 + paddw mm0, mm3 ;mm0 contains the interpolated values + ;--------------------------- + + + +%ifdef dfhsdfhsdahdsfhdsfh + + if (color5 == color3 && color2 != color6 && color4 == color5 && color5 != colorA2) + product2a = INTERPOLATE (color2, color5); + else + if (color5 == color1 && color6 == color5 && color4 != color2 && color5 != colorA0) + product2a = INTERPOLATE(color2, color5); + else + product2a = color2; + + if (color2 == color6 && color5 != color3 && color1 == color2 && color2 != colorB2) + product1a = INTERPOLATE (color2, color5); + else + if (color4 == color2 && color3 == color2 && color1 != color5 && color2 != colorB0) + product1a = INTERPOLATE(color2, color5); + else + product1a = color5; + +%endif + + + movq mm7, [Mask26] + movq mm6, [eax+colorB2] + movq mm5, [eax+ebx+ebx+color2] + movq mm4, [eax+ebx+ebx+color1] + pcmpeqw mm4, mm5 + pcmpeqw mm6, mm5 + pxor mm5, mm5 + pand mm7, mm4 + pcmpeqw mm6, mm5 + pand mm7, mm6 + + + + movq mm6, [eax+ebx+ebx+color3] + movq mm5, [eax+ebx+ebx+color2] + movq mm4, [eax+ebx+ebx+color1] + movq mm2, [eax+ebx+color5] + movq mm1, [eax+ebx+color4] + movq mm3, [eax+colorB0] + + pcmpeqw mm2, mm4 + pcmpeqw mm6, mm5 + pcmpeqw mm1, mm5 + pcmpeqw mm3, mm5 + pxor mm5, mm5 + pcmpeqw mm2, mm5 + pcmpeqw mm3, mm5 + pand mm6, mm1 + pand mm2, mm3 + pand mm6, mm2 + por mm7, mm6 + + + movq mm6, mm7 + pcmpeqw mm6, mm5 + pand mm7, mm0 + + movq mm1, [eax+ebx+color5] + pand mm6, mm1 + por mm7, mm6 + movq [final1a], mm7 ;finished 1a + + + + ;-------------------------------- + + movq mm7, [Mask35] + push eax + add eax, ebx + movq mm6, [eax+ebx+ebx+colorA2] + pop eax + movq mm5, [eax+ebx+color5] + movq mm4, [eax+ebx+color4] + pcmpeqw mm4, mm5 + pcmpeqw mm6, mm5 + pxor mm5, mm5 + pand mm7, mm4 + pcmpeqw mm6, mm5 + pand mm7, mm6 + + + + movq mm6, [eax+ebx+color6] + movq mm5, [eax+ebx+color5] + movq mm4, [eax+ebx+color4] + movq mm2, [eax+ebx+ebx+color2] + movq mm1, [eax+ebx+ebx+color1] + push eax + add eax, ebx + movq mm3, [eax+ebx+ebx+colorA0] + pop eax + + pcmpeqw mm2, mm4 + pcmpeqw mm6, mm5 + pcmpeqw mm1, mm5 + pcmpeqw mm3, mm5 + pxor mm5, mm5 + pcmpeqw mm2, mm5 + pcmpeqw mm3, mm5 + pand mm6, mm1 + pand mm2, mm3 + pand mm6, mm2 + por mm7, mm6 + + + movq mm6, mm7 + pcmpeqw mm6, mm5 + pand mm7, mm0 + + movq mm1, [eax+ebx+ebx+color2] + pand mm6, mm1 + por mm7, mm6 + movq [final2a], mm7 ;finished 2a + + + ;-------------------------------------------- + + +%ifdef dfhsdfhsdahdsfhdsfh + if (color6 == color3 && color3 == colorA1 && color2 != colorA2 && color3 != colorA0) + product2b = Q_INTERPOLATE (color3, color3, color3, color2); + else + if (color5 == color2 && color2 == colorA2 && colorA1 != color3 && color2 != colorA3) + product2b = Q_INTERPOLATE (color2, color2, color2, color3); + else + product2b = INTERPOLATE (color2, color3); + + if (color6 == color3 && color6 == colorB1 && color5 != colorB2 && color6 != colorB0) + product1b = Q_INTERPOLATE (color6, color6, color6, color5); + else + if (color5 == color2 && color5 == colorB2 && colorB1 != color6 && color5 != colorB3) + product1b = Q_INTERPOLATE (color6, color5, color5, color5); + else + product1b = INTERPOLATE (color5, color6); +%endif + + push eax + add eax, ebx + pxor mm7, mm7 + movq mm0, [eax+ebx+ebx+colorA0] + movq mm1, [eax+ebx+ebx+colorA1] + movq mm2, [eax+ebx+ebx+colorA2] + movq mm3, [eax+ebx+ebx+colorA3] + pop eax + movq mm4, [eax+ebx+ebx+color2] + movq mm5, [eax+ebx+ebx+color3] + movq mm6, [eax+ebx+color6] + + pcmpeqw mm6, mm5 + pcmpeqw mm1, mm5 + pcmpeqw mm4, mm2 + pcmpeqw mm0, mm5 + pcmpeqw mm4, mm7 + pcmpeqw mm0, mm7 + pand mm0, mm4 + pand mm6, mm1 + pand mm0, mm6 + + movq mm4, [eax+ebx+color2] + movq mm5, [eax+ebx+ebx+color5] + movq mm6, [eax+ebx+ebx+color3] + + pcmpeqw mm5, mm4 + pcmpeqw mm2, mm4 + pcmpeqw mm1, mm6 + pcmpeqw mm3, mm4 + pcmpeqw mm1, mm7 + pcmpeqw mm3, mm7 + pand mm2, mm5 + pand mm1, mm3 + pand mm1, mm2 + + movq mm2, mm0 + movq mm7, [I2333Pixel] + movq mm6, [I2223Pixel] + movq mm5, [I23Pixel] + movq mm4, [Mask35] + movq mm3, [Mask26] + + por mm2, mm4 + pand mm4, [eax+ebx+ebx+color3] + por mm2, mm3 + pand mm3, [eax+ebx+ebx+color2] + por mm2, mm1 + pand mm0, mm7 + pand mm1, mm6 + pxor mm7, mm7 + pcmpeqw mm2, mm7 + por mm0, mm1 + por mm3, mm4 + pand mm2, mm5 + por mm0, mm3 + por mm0, mm2 + movq [final2b], mm0 + + ;----------------------------------- + + + pxor mm7, mm7 + movq mm0, [eax+colorB0] + movq mm1, [eax+colorB1] + movq mm2, [eax+colorB2] + movq mm3, [eax+colorB3] + movq mm4, [eax+ebx+color5] + movq mm5, [eax+ebx+color6] + movq mm6, [eax+ebx+ebx+color3] + + pcmpeqw mm6, mm5 + pcmpeqw mm1, mm5 + pcmpeqw mm4, mm2 + pcmpeqw mm0, mm5 + pcmpeqw mm4, mm7 + pcmpeqw mm0, mm7 + pand mm0, mm4 + pand mm6, mm1 + pand mm0, mm6 + + movq mm4, [eax+ebx+color5] + movq mm5, [eax+ebx+ebx+color2] + movq mm6, [eax+ebx+color6] + + pcmpeqw mm5, mm4 + pcmpeqw mm2, mm4 + pcmpeqw mm1, mm6 + pcmpeqw mm3, mm4 + pcmpeqw mm1, mm7 + pcmpeqw mm3, mm7 + pand mm2, mm5 + pand mm1, mm3 + pand mm1, mm2 + + movq mm2, mm0 + movq mm7, [I5666Pixel] + movq mm6, [I5556Pixel] + movq mm5, [I56Pixel] + movq mm4, [Mask35] + movq mm3, [Mask26] + + por mm2, mm4 + pand mm4, [eax+ebx+color5] + por mm2, mm3 + pand mm3, [eax+ebx+color6] + por mm2, mm1 + pand mm0, mm7 + pand mm1, mm6 + pxor mm7, mm7 + pcmpeqw mm2, mm7 + por mm0, mm1 + por mm3, mm4 + pand mm2, mm5 + por mm0, mm3 + por mm0, mm2 + movq [final1b], mm0 + + ;--------- + + movq mm0, [final1a] + movq mm4, [final2a] + movq mm2, [final1b] + movq mm6, [final2b] + + movq mm1, mm0 + movq mm5, mm4 + + + punpcklwd mm0, mm2 + punpckhwd mm1, mm2 + + punpcklwd mm4, mm6 + punpckhwd mm5, mm6 + + +%ifdef __DJGPP__ + movq [fs:edx], mm0 + movq [fs:edx+8], mm1 + push edx + add edx, [ebp+dstPitch] + movq [fs:edx], mm4 + movq [fs:edx+8], mm5 + pop edx +%else + movq [edx], mm0 + movq [edx+8], mm1 + push edx + add edx, [ebp+dstPitch] + movq [edx], mm4 + movq [edx+8], mm5 + pop edx +%endif +.SKIP_PROCESS: + mov ecx, [ebp+deltaPtr] + add ecx, 8 + mov [ebp+deltaPtr], ecx + add edx, 16 + add eax, 8 + + pop ecx + sub ecx, 4 + cmp ecx, 0 + jg near .Loop + +; Restore some stuff + popad + mov esp, ebp + pop ebp + emms + ret + + +;------------------------------------------------------------------------- +;------------------------------------------------------------------------- +;------------------------------------------------------------------------- + + + + + + + + NEWSYM _2xSaISuperEagleLineW @@ -1324,24 +2025,27 @@ FALSE dd 0x00000000, 0x00000000 TRUE dd 0xffffffff, 0xffffffff ONE dd 0x00010001, 0x00010001 -ACPixel times 8 db 0 -Mask1 times 8 db 0 -Mask2 times 8 db 0 -I56Pixel times 8 db 0 -I23Pixel times 8 db 0 -Mask26 times 8 db 0 -Mask35 times 8 db 0 -Mask26b times 8 db 0 -Mask35b times 8 db 0 -product1a times 8 db 0 -product1b times 8 db 0 -product2a times 8 db 0 -product2b times 8 db 0 -final1a times 8 db 0 -final1b times 8 db 0 -final2a times 8 db 0 -final2b times 8 db 0 + SECTION .bss ALIGN = 32 +ACPixel resb 8 +Mask1 resb 8 +Mask2 resb 8 -SECTION .text -NEWSYM TwoxSaiWAsmEnd +I56Pixel resb 8 +I23Pixel resb 8 +I5556Pixel resb 8 +I2223Pixel resb 8 +I5666Pixel resb 8 +I2333Pixel resb 8 +Mask26 resb 8 +Mask35 resb 8 +Mask26b resb 8 +Mask35b resb 8 +product1a resb 8 +product1b resb 8 +product2a resb 8 +product2b resb 8 +final1a resb 8 +final1b resb 8 +final2a resb 8 +final2b resb 8 diff --git a/zsnes/src/video/copyvid.inc b/zsnes/src/video/copyvid.inc index 81a374a8..a9b161e9 100644 --- a/zsnes/src/video/copyvid.inc +++ b/zsnes/src/video/copyvid.inc @@ -24,11 +24,11 @@ EXTSYM vesa2_clbitng,vesa2_clbitng2,vesa2_clbitng3 EXTSYM granadd EXTSYM SpecialLine EXTSYM vidbufferofsb -EXTSYM Super2xSaI +;EXTSYM Super2xSaI EXTSYM HalfTransB,HalfTransC -%include "video/2xsaimmx.inc" +%include "video/2xSaImmx.inc" @@ -831,7 +831,6 @@ copyvesa2320x480x8ng: ;******************************************************* ALIGN32 -SECTION .data NEWSYM EagleHold, dd 0 NEWSYM smallscreenon, dd 0 NEWSYM smallscreence, dd 0 @@ -839,7 +838,7 @@ NEWSYM CurrentGUIOn, dd 0 vesavaland dd 0,0 mmxvalanda dd 11111111110000001111111111000000b,11111111110000001111111111000000b mmxvalandb dd 00000000000111110000000000011111b,00000000000111110000000000011111b -SECTION .text + NEWSYM copyvesa2640x480x8bgui mov byte[CurrentGUIOn],1 cmp byte[smallscreenon],1 @@ -3800,9 +3799,10 @@ Process2xSaI: ;dstPitch equ 28 ;dstSegment equ 32 - +%ifdef asdfasdfasdfsafdasdf cmp byte[En2xSaI],3 je .super2xsai +%endif mov eax,1280 ; destination pitch push eax mov eax,edi ; destination offset @@ -3821,6 +3821,8 @@ Process2xSaI: .supereagle call _2xSaISuperEagleLine jmp .normal + +%ifdef asdgawe65egfdgfdg .super2xsai pushad mov eax,1280 ; destination pitch @@ -3860,6 +3862,9 @@ Process2xSaI: add eax,8 loop .s2xsailoop2 jmp .dones2xsai +%endif + + .normal add esp,24 .dones2xsai @@ -5696,4 +5701,3 @@ NEWSYM Clear2xSaIBuffer add ebx,4 loop .nextb ret - diff --git a/zsnes/src/win/copyvwin.asm b/zsnes/src/win/copyvwin.asm index c75d17c2..af353cd7 100644 --- a/zsnes/src/win/copyvwin.asm +++ b/zsnes/src/win/copyvwin.asm @@ -19,12 +19,12 @@ EXTSYM vesa2selec,vidbuffer,GUIOn,FPUCopy,resolutn,En2xSaI,antienab,scanlines EXTSYM hirestiledat,res512switch,curblank,spritetablea -EXTSYM lineleft,_2xSaILineW,_2xSaISuperEagleLineW +EXTSYM lineleft,_2xSaILineW,_2xSaISuperEagleLineW, _2xSaISuper2xSaILineW EXTSYM newengen,cfield,HalfTrans EXTSYM GUIOn2 EXTSYM SpecialLine EXTSYM vidbufferofsb -EXTSYM Super2xSaI +;EXTSYM Super2xSaI EXTSYM HalfTransB,HalfTransC @@ -866,7 +866,7 @@ Process2xSaIwin: call _2xSaISuperEagleLineW jmp .normal .super2xSaI - call Super2xSaI + call _2xSaISuper2xSaILineW .normal add esp,24 pop ebx diff --git a/zsnes/src/win/winlink.cpp b/zsnes/src/win/winlink.cpp index dd79564e..a53e0aa5 100644 --- a/zsnes/src/win/winlink.cpp +++ b/zsnes/src/win/winlink.cpp @@ -131,24 +131,17 @@ _int64 start, end, freq, update_ticks_pc, start2, end2, update_ticks_pc2; extern "C" { void drawscreenwin(void); - void Init_2xSaI(UINT32 BitFormat); + //void Init_2xSaI(UINT32 BitFormat); DWORD LastUsedPos=0; DWORD CurMode=-1; } -void DDrawError(){ - char message1[256]; - - sprintf(message1,"Error drawing to the screen\nMake sure the device is not being used by another process \0"); - MessageBox (NULL, message1, "DirectDraw Error" , MB_ICONERROR ); -} - void DrawScreen() { if(DD_CFB==NULL) return; if(DD_Primary->Blt(&rcWindow,DD_CFB,NULL,DDBLT_WAIT,NULL)!=DD_OK) { - DDrawError(); + // Error!!! } } @@ -518,7 +511,6 @@ InitSound() break; default: wfx.nSamplesPerSec = 11025; - SoundBufferSize=1024*2; } if(StereoSound==1) @@ -842,7 +834,7 @@ void DInputError(){ char message1[256]; sprintf(message1,"Error initializing DirectInput\nYou may need to install DirectX 7.0a or higher located at www.microsoft.com/directx \0"); - MessageBox (NULL, message1, "DirectInput Error" , MB_ICONERROR ); + MessageBox (NULL, message1, "Init Error" , MB_ICONERROR ); } bool InitInput() @@ -853,7 +845,7 @@ bool InitInput() if(FAILED(hr=DirectInputCreate(hInst,DIRECTINPUT_VERSION,&DInput,NULL))) { sprintf(message1,"Error initializing DirectInput\nYou may need to install DirectX 7.0a or higher located at www.microsoft.com/directx \0"); - MessageBox (NULL, message1, "DirectInput Error" , MB_ICONERROR ); + MessageBox (NULL, message1, "Init Error" , MB_ICONERROR ); switch(hr) { @@ -1082,7 +1074,7 @@ startgame() if(BitDepth==16&& GBitMask!=0x07E0) { converta=1; - Init_2xSaI(555); + //Init_2xSaI(555); } else { @@ -2309,7 +2301,7 @@ void SetMouseY(int Y) void ZsnesPage() { - ShellExecute(NULL, NULL, "http://www.zsnes.com/", NULL, NULL, 0); + ShellExecute(NULL, NULL, "http://www.zsnes.com", NULL, NULL, 0); } @@ -2317,3 +2309,4 @@ void ZsnesPage() } +