shithub: riscv

Download patch

ref: e8e04281402b697c0568ce5e35dbd7fc1d0c0935
parent: 289fb207c93576321a21a2ff82f2bb45b56821ad
author: aiju <devnull@localhost>
date: Fri Mar 14 11:06:25 EDT 2014

games/snes: performance improvements and scaling

--- a/sys/src/games/snes/dat.h
+++ b/sys/src/games/snes/dat.h
@@ -14,7 +14,7 @@
 extern u16int vtime, htime, subcolor, oamaddr;
 extern u16int m7[6], hofs[4], vofs[4];
 
-extern int battery, saveclock;
+extern int battery, saveclock, scale;
 
 enum {
 	FLAGC = 1<<0,
--- a/sys/src/games/snes/ppu.c
+++ b/sys/src/games/snes/ppu.c
@@ -31,12 +31,36 @@
 pixeldraw(int x, int y, u16int v)
 {
 	uchar *p;
+	u16int *q;
+	union { u16int w; u8int b[2]; } u;
+	int i;
 
 	if(bright != 0xf)
 		v = darken(v);
-	p = pic + (x + y * 256) * 2;
-	*p++ = v;
-	*p = v >> 8;
+	if(scale == 1){
+		p = pic + (x + y * 256) * 2;
+		*p++ = v;
+		*p = v >> 8;
+		return;
+	}
+	u.b[0] = v;
+	u.b[1] = v >> 8;
+	if(scale == 2){
+		q = (u16int*)pic + (x + y * 256 * 2) * 2;
+		*q++ = u.w;
+		*q = u.w;
+		q += 256 * 2 - 1;
+		*q++ = u.w;
+		*q = u.w;
+	}else{
+		q = (u16int*)pic + (x + y * 256 * 3) * 3;
+		for(i = 0; i < 3; i++){
+			*q++ = u.w;
+			*q++ = u.w;
+			*q = u.w;
+			q += 256 * 3 - 2;
+		}
+	}
 }
 
 static int
@@ -100,9 +124,8 @@
 }
 
 static void
-chr(int n, int nb, int sz, u16int t, int x, int y, u8int c[])
+chr(int n, int nb, int sz, u16int t, int x, int y, u32int c[])
 {
-	int i;
 	u16int a;
 
 	if(sz == 16){
@@ -120,10 +143,20 @@
 	else
 		a &= 0xf;
 	a = (a << 13) + (t & 0x3ff) * 8 * nb + y * 2;
-	for(i = 0; i < nb; i += 2){
-		c[i] = vram[a++];
-		c[i+1] = vram[a];
+	c[0] = vram[a++];
+	c[0] |= vram[a] << 8;
+	if(nb != 2){
 		a += 15;
+		c[0] |= vram[a++] << 16;
+		c[0] |= vram[a] << 24;
+		if(nb == 8){
+			a += 15;
+			c[1] = vram[a++];
+			c[1] |= vram[a] << 8;
+			a += 15;
+			c[1] |= vram[a++] << 16;
+			c[1] |= vram[a] << 24;
+		}
 	}
 }
 
@@ -156,36 +189,45 @@
 }
 
 static void
-shift(u8int *c, int nb, int n, int d)
+shift(u32int *c, int nb, int n, int d)
 {
-	u8int *e;
-	
-	e = c + nb;
-	if(d)
-		while(c < e)
-			*c++ >>= n;
-	else
-		while(c < e)
-			*c++ <<= n;
+	if(d){
+		c[0] >>= n;
+		if(nb == 8)
+			c[1] >>= n;
+	}else{
+		c[0] <<= n;
+		if(nb == 8)
+			c[1] <<= n;
+	}
 }
 
 static u8int
-bgpixel(u8int *c, int nb, int d)
+bgpixel(u32int *c, int nb, int d)
 {
 	u8int v;
-	int i;
 	
-	v = 0;
-	if(d)
-		for(i = 0; i < nb; i++){
-			v |= (*c & 1) << i;
-			*c++ >>= 1;
+	if(d){
+		v = c[0] & 1 | c[0] >> 7 & 2;
+		if(nb != 2){
+			v |= c[0] >> 14 & 4 | c[0] >> 21 & 8;
+			if(nb == 8){
+				v |= c[1] << 4 & 16 | c[1] >> 3 & 32 | c[1] >> 10 & 64 | c[1] >> 17 & 128;
+				c[1] >>= 1;
+			}
 		}
-	else
-		for(i = 0; i < nb; i++){
-			v |= (*c & 0x80) >> (7 - i);
-			*c++ <<= 1;
+		c[0] >>= 1;
+	}else{
+		v = c[0] >> 7 & 1 | c[0] >> 14 & 2;
+		if(nb != 2){
+			v |= c[0] >> 21 & 4 | c[0] >> 28 & 8;
+			if(nb == 8){
+				v |= c[1] >> 3 & 16 | c[1] >> 10 & 32 | c[1] >> 17 & 64 | c[1] >> 24 & 128;
+				c[1] <<= 1;
+			}
 		}
+		c[0] <<= 1;
+	}
 	return v;
 }
 
@@ -196,7 +238,7 @@
 		u8int sz, szsh;
 		u16int tx, ty, tnx, tny;
 		u16int t;
-		u8int c[8];
+		u32int c[2];
 		int pal;
 		u8int msz, mv, mx;
 	} bgs[4];
@@ -206,8 +248,6 @@
 	p = bgs + n;
 	if(rx == 0){
 		p->szsh = (reg[BGMODE] & (1<<(4+n))) != 0 ? 4 : 3;
-		if(mode >= 5)
-			p->szsh = 4;
 		p->sz = 1<<p->szsh;
 		sx = hofs[n];
 		sy = vofs[n] + ppuy;
@@ -293,10 +333,10 @@
 	static struct {
 		short x;
 		u8int sx, i, c, pal, pri;
-		u8int *ch;
+		u32int *ch;
 	} t[32], *tp;
-	static uchar ch[34*4], *cp;
-	static uchar *p, q, over;
+	static u32int ch[34];
+	static u8int *p, q, over;
 	static int n, m;
 	static int *sz;
 	static int szs[] = {
@@ -308,6 +348,7 @@
 	static u16int base[2];
 	u8int dy, v, col, pri0, pri1, prio;
 	u16int a;
+	u32int w, *cp;
 	int i, nt, dx;
 
 	if(rx == 0){
@@ -358,20 +399,16 @@
 			dx = rx - tp->x;
 			if(dx < 0 || dx >= tp->sx)
 				continue;
-			p = tp->ch + (dx >> 1 & 0xfc);
+			w = *tp->ch;
 			if((tp->c & 0x40) != 0){
-				v = p[2] & 1 | p[3] << 1 & 2 | p[0] << 2 & 4 | p[1] << 3 & 8;
-				p[0] >>= 1;
-				p[1] >>= 1;
-				p[2] >>= 1;
-				p[3] >>= 1;
+				v = w & 1 | w >> 7 & 2 | w >> 14 & 4 | w >> 21 & 8;
+				*tp->ch = w >> 1;
 			}else{
-				v = p[0] >> 7 & 1 | p[1] >> 6 & 2 | p[2] >> 5 & 4 | p[3] >> 4 & 8;
-				p[0] <<= 1;
-				p[1] <<= 1;
-				p[2] <<= 1;
-				p[3] <<= 1;
+				v = w >> 7 & 1 | w >> 14 & 2 | w >> 21 & 4 | w >> 28 & 8;
+				*tp->ch = w << 1;
 			}
+			if((dx & 7) == 7)
+				tp->ch++;
 			nt = (tp->i - prio) & 0x7f;
 			if(v != 0 && nt < pri1){
 				col = tp->pal + v;
@@ -397,7 +434,7 @@
 				tp->pri |= OBJNC;
 			tp->ch = cp;
 			tp->i = sp->i;
-			nt = sp->sx >> 2;
+			nt = sp->sx >> 3;
 			dy = ppuy - sp->y;
 			if((sp->c & 0x80) != 0)
 				dy = sp->sy - 1 - dy;
@@ -407,21 +444,26 @@
 			if((sp->c & 0x40) != 0){
 				a += sp->sx * 4;
 				for(i = 0; i < nt; i++){
-					if(cp < ch + sizeof(ch)){
-						a -= 16;
-						*(u16int*)cp = *(u16int*)&vram[sp->t1 | a & 0x1fff];
-						cp += 2;
-						tp->sx += 4;
+					if(cp < ch + nelem(ch)){
+						w  = vram[sp->t1 | (a -= 16) & 0x1fff] << 16;
+						w |= vram[sp->t1 | (a + 1) & 0x1fff] << 24;
+						w |= vram[sp->t1 | (a -= 16) & 0x1fff] << 0;
+						w |= vram[sp->t1 | (a + 1) & 0x1fff] << 8;
+						*cp++ = w;
+						tp->sx += 8;
 					}else
 						over |= 0x80;
 				}
 			}else
 				for(i = 0; i < nt; i++){
-					if(cp < ch + sizeof(ch)){
-						*(u16int*)cp = *(u16int*)&vram[sp->t1 | a & 0x1fff];
-						cp += 2;
-						tp->sx += 4;
-						a += 16;
+					if(cp < ch + nelem(ch)){
+						w  = vram[sp->t1 | a & 0x1fff];
+						w |= vram[sp->t1 | ++a & 0x1fff] << 8;
+						w |= vram[sp->t1 | (a += 15) & 0x1fff] << 16;
+						w |= vram[sp->t1 | ++a & 0x1fff] << 24;
+						*cp++ = w;
+						tp->sx += 8;
+						a += 15;
 					}else
 						over |= 0x80;
 				}
--- a/sys/src/games/snes/snes.c
+++ b/sys/src/games/snes/snes.c
@@ -11,11 +11,11 @@
 uchar *prg, *sram;
 int nprg, nsram, hirom, battery;
 
-int ppuclock, spcclock, stimerclock, saveclock, msgclock, paused;
+int ppuclock, spcclock, stimerclock, saveclock, msgclock, paused, perfclock;
 Mousectl *mc;
 QLock pauselock;
 int keys, savefd;
-int scale;
+int scale, profile;
 Rectangle picr;
 Image *tmp, *bg;
 
@@ -30,8 +30,6 @@
 void
 loadrom(char *file)
 {
-	static char buf[512];
-	char *s;
 	int fd;
 	vlong size;
 
@@ -78,6 +76,14 @@
 	default:
 		print("unknown rom type %d\n", memread(0xffd5));
 	}
+}
+
+void
+loadbat(char *file)
+{
+	static char buf[512];
+	char *s;
+
 	if(battery && nsram != 0){
 		strncpy(buf, file, sizeof buf - 5);
 		s = buf + strlen(buf) - 4;
@@ -164,6 +170,23 @@
 }
 
 void
+timing(void)
+{
+	static vlong old;
+	static char buf[32];
+	vlong new;
+	
+	new = nsec();
+	if(new != old)
+		sprint(buf, "%6.2f%%", 1e11 / (new - old));
+	else
+		buf[0] = 0;
+	draw(screen, Rect(10, 10, 200, 30), bg, nil, ZP);
+	string(screen, Pt(10, 10), display->black, ZP, display->defaultfont, buf);
+	old = nsec();
+}
+
+void
 threadmain(int argc, char **argv)
 {
 	int t;
@@ -171,13 +194,25 @@
 
 	scale = 1;
 	ARGBEGIN {
+	case '2':
+		scale = 2;
+		break;
+	case '3':
+		scale = 3;
+		break;
 	case 's':
 		battery++;
 		break;
+	case 'T':
+		profile++;
+		break;
+	default:
+		goto usage;
 	} ARGEND;
 	
 	if(argc != 1){
-		fprint(2, "usage: %s rom\n", argv0);
+usage:
+		fprint(2, "usage: %s [-23s] rom\n", argv0);
 		threadexitsall("usage");
 	}
 	loadrom(argv[0]);
@@ -186,6 +221,7 @@
 	mc = initmouse(nil, screen);
 	if(mc == nil)
 		sysfatal("initmouse: %r");
+	loadbat(argv[0]);
 	screeninit();
 	proccreate(keyproc, 0, 8192);
 	cpureset();
@@ -200,6 +236,7 @@
 		spcclock -= t;
 		stimerclock += t;
 		ppuclock += t;
+		perfclock -= t;
 
 		while(ppuclock >= 4){
 			ppustep();
@@ -222,6 +259,10 @@
 				draw(screen, screen->r, bg, nil, ZP);
 				msgclock = 0;
 			}
+		}
+		if(profile && perfclock <= 0){
+			perfclock = FREQ;
+			timing();
 		}
 	}
 }