ref: e8e04281402b697c0568ce5e35dbd7fc1d0c0935
parent: 289fb207c93576321a21a2ff82f2bb45b56821ad
author: aiju <devnull@localhost>
date: Fri Mar 14 11:06:25 EDT 2014
games/snes: performance improvements and scaling
--- a/sys/src/games/snes/dat.h
+++ b/sys/src/games/snes/dat.h
@@ -14,7 +14,7 @@
extern u16int vtime, htime, subcolor, oamaddr;
extern u16int m7[6], hofs[4], vofs[4];
-extern int battery, saveclock;
+extern int battery, saveclock, scale;
enum {
FLAGC = 1<<0,
--- a/sys/src/games/snes/ppu.c
+++ b/sys/src/games/snes/ppu.c
@@ -31,12 +31,36 @@
pixeldraw(int x, int y, u16int v)
{
uchar *p;
+ u16int *q;
+ union { u16int w; u8int b[2]; } u;
+ int i;
if(bright != 0xf)
v = darken(v);
- p = pic + (x + y * 256) * 2;
- *p++ = v;
- *p = v >> 8;
+ if(scale == 1){
+ p = pic + (x + y * 256) * 2;
+ *p++ = v;
+ *p = v >> 8;
+ return;
+ }
+ u.b[0] = v;
+ u.b[1] = v >> 8;
+ if(scale == 2){
+ q = (u16int*)pic + (x + y * 256 * 2) * 2;
+ *q++ = u.w;
+ *q = u.w;
+ q += 256 * 2 - 1;
+ *q++ = u.w;
+ *q = u.w;
+ }else{
+ q = (u16int*)pic + (x + y * 256 * 3) * 3;
+ for(i = 0; i < 3; i++){
+ *q++ = u.w;
+ *q++ = u.w;
+ *q = u.w;
+ q += 256 * 3 - 2;
+ }
+ }
}
static int
@@ -100,9 +124,8 @@
}
static void
-chr(int n, int nb, int sz, u16int t, int x, int y, u8int c[])
+chr(int n, int nb, int sz, u16int t, int x, int y, u32int c[])
{
- int i;
u16int a;
if(sz == 16){
@@ -120,10 +143,20 @@
else
a &= 0xf;
a = (a << 13) + (t & 0x3ff) * 8 * nb + y * 2;
- for(i = 0; i < nb; i += 2){
- c[i] = vram[a++];
- c[i+1] = vram[a];
+ c[0] = vram[a++];
+ c[0] |= vram[a] << 8;
+ if(nb != 2){
a += 15;
+ c[0] |= vram[a++] << 16;
+ c[0] |= vram[a] << 24;
+ if(nb == 8){
+ a += 15;
+ c[1] = vram[a++];
+ c[1] |= vram[a] << 8;
+ a += 15;
+ c[1] |= vram[a++] << 16;
+ c[1] |= vram[a] << 24;
+ }
}
}
@@ -156,36 +189,45 @@
}
static void
-shift(u8int *c, int nb, int n, int d)
+shift(u32int *c, int nb, int n, int d)
{
- u8int *e;
-
- e = c + nb;
- if(d)
- while(c < e)
- *c++ >>= n;
- else
- while(c < e)
- *c++ <<= n;
+ if(d){
+ c[0] >>= n;
+ if(nb == 8)
+ c[1] >>= n;
+ }else{
+ c[0] <<= n;
+ if(nb == 8)
+ c[1] <<= n;
+ }
}
static u8int
-bgpixel(u8int *c, int nb, int d)
+bgpixel(u32int *c, int nb, int d)
{
u8int v;
- int i;
- v = 0;
- if(d)
- for(i = 0; i < nb; i++){
- v |= (*c & 1) << i;
- *c++ >>= 1;
+ if(d){
+ v = c[0] & 1 | c[0] >> 7 & 2;
+ if(nb != 2){
+ v |= c[0] >> 14 & 4 | c[0] >> 21 & 8;
+ if(nb == 8){
+ v |= c[1] << 4 & 16 | c[1] >> 3 & 32 | c[1] >> 10 & 64 | c[1] >> 17 & 128;
+ c[1] >>= 1;
+ }
}
- else
- for(i = 0; i < nb; i++){
- v |= (*c & 0x80) >> (7 - i);
- *c++ <<= 1;
+ c[0] >>= 1;
+ }else{
+ v = c[0] >> 7 & 1 | c[0] >> 14 & 2;
+ if(nb != 2){
+ v |= c[0] >> 21 & 4 | c[0] >> 28 & 8;
+ if(nb == 8){
+ v |= c[1] >> 3 & 16 | c[1] >> 10 & 32 | c[1] >> 17 & 64 | c[1] >> 24 & 128;
+ c[1] <<= 1;
+ }
}
+ c[0] <<= 1;
+ }
return v;
}
@@ -196,7 +238,7 @@
u8int sz, szsh;
u16int tx, ty, tnx, tny;
u16int t;
- u8int c[8];
+ u32int c[2];
int pal;
u8int msz, mv, mx;
} bgs[4];
@@ -206,8 +248,6 @@
p = bgs + n;
if(rx == 0){
p->szsh = (reg[BGMODE] & (1<<(4+n))) != 0 ? 4 : 3;
- if(mode >= 5)
- p->szsh = 4;
p->sz = 1<<p->szsh;
sx = hofs[n];
sy = vofs[n] + ppuy;
@@ -293,10 +333,10 @@
static struct {
short x;
u8int sx, i, c, pal, pri;
- u8int *ch;
+ u32int *ch;
} t[32], *tp;
- static uchar ch[34*4], *cp;
- static uchar *p, q, over;
+ static u32int ch[34];
+ static u8int *p, q, over;
static int n, m;
static int *sz;
static int szs[] = {
@@ -308,6 +348,7 @@
static u16int base[2];
u8int dy, v, col, pri0, pri1, prio;
u16int a;
+ u32int w, *cp;
int i, nt, dx;
if(rx == 0){
@@ -358,20 +399,16 @@
dx = rx - tp->x;
if(dx < 0 || dx >= tp->sx)
continue;
- p = tp->ch + (dx >> 1 & 0xfc);
+ w = *tp->ch;
if((tp->c & 0x40) != 0){
- v = p[2] & 1 | p[3] << 1 & 2 | p[0] << 2 & 4 | p[1] << 3 & 8;
- p[0] >>= 1;
- p[1] >>= 1;
- p[2] >>= 1;
- p[3] >>= 1;
+ v = w & 1 | w >> 7 & 2 | w >> 14 & 4 | w >> 21 & 8;
+ *tp->ch = w >> 1;
}else{
- v = p[0] >> 7 & 1 | p[1] >> 6 & 2 | p[2] >> 5 & 4 | p[3] >> 4 & 8;
- p[0] <<= 1;
- p[1] <<= 1;
- p[2] <<= 1;
- p[3] <<= 1;
+ v = w >> 7 & 1 | w >> 14 & 2 | w >> 21 & 4 | w >> 28 & 8;
+ *tp->ch = w << 1;
}
+ if((dx & 7) == 7)
+ tp->ch++;
nt = (tp->i - prio) & 0x7f;
if(v != 0 && nt < pri1){
col = tp->pal + v;
@@ -397,7 +434,7 @@
tp->pri |= OBJNC;
tp->ch = cp;
tp->i = sp->i;
- nt = sp->sx >> 2;
+ nt = sp->sx >> 3;
dy = ppuy - sp->y;
if((sp->c & 0x80) != 0)
dy = sp->sy - 1 - dy;
@@ -407,21 +444,26 @@
if((sp->c & 0x40) != 0){
a += sp->sx * 4;
for(i = 0; i < nt; i++){
- if(cp < ch + sizeof(ch)){
- a -= 16;
- *(u16int*)cp = *(u16int*)&vram[sp->t1 | a & 0x1fff];
- cp += 2;
- tp->sx += 4;
+ if(cp < ch + nelem(ch)){
+ w = vram[sp->t1 | (a -= 16) & 0x1fff] << 16;
+ w |= vram[sp->t1 | (a + 1) & 0x1fff] << 24;
+ w |= vram[sp->t1 | (a -= 16) & 0x1fff] << 0;
+ w |= vram[sp->t1 | (a + 1) & 0x1fff] << 8;
+ *cp++ = w;
+ tp->sx += 8;
}else
over |= 0x80;
}
}else
for(i = 0; i < nt; i++){
- if(cp < ch + sizeof(ch)){
- *(u16int*)cp = *(u16int*)&vram[sp->t1 | a & 0x1fff];
- cp += 2;
- tp->sx += 4;
- a += 16;
+ if(cp < ch + nelem(ch)){
+ w = vram[sp->t1 | a & 0x1fff];
+ w |= vram[sp->t1 | ++a & 0x1fff] << 8;
+ w |= vram[sp->t1 | (a += 15) & 0x1fff] << 16;
+ w |= vram[sp->t1 | ++a & 0x1fff] << 24;
+ *cp++ = w;
+ tp->sx += 8;
+ a += 15;
}else
over |= 0x80;
}
--- a/sys/src/games/snes/snes.c
+++ b/sys/src/games/snes/snes.c
@@ -11,11 +11,11 @@
uchar *prg, *sram;
int nprg, nsram, hirom, battery;
-int ppuclock, spcclock, stimerclock, saveclock, msgclock, paused;
+int ppuclock, spcclock, stimerclock, saveclock, msgclock, paused, perfclock;
Mousectl *mc;
QLock pauselock;
int keys, savefd;
-int scale;
+int scale, profile;
Rectangle picr;
Image *tmp, *bg;
@@ -30,8 +30,6 @@
void
loadrom(char *file)
{
- static char buf[512];
- char *s;
int fd;
vlong size;
@@ -78,6 +76,14 @@
default:
print("unknown rom type %d\n", memread(0xffd5));
}
+}
+
+void
+loadbat(char *file)
+{
+ static char buf[512];
+ char *s;
+
if(battery && nsram != 0){
strncpy(buf, file, sizeof buf - 5);
s = buf + strlen(buf) - 4;
@@ -164,6 +170,23 @@
}
void
+timing(void)
+{
+ static vlong old;
+ static char buf[32];
+ vlong new;
+
+ new = nsec();
+ if(new != old)
+ sprint(buf, "%6.2f%%", 1e11 / (new - old));
+ else
+ buf[0] = 0;
+ draw(screen, Rect(10, 10, 200, 30), bg, nil, ZP);
+ string(screen, Pt(10, 10), display->black, ZP, display->defaultfont, buf);
+ old = nsec();
+}
+
+void
threadmain(int argc, char **argv)
{
int t;
@@ -171,13 +194,25 @@
scale = 1;
ARGBEGIN {
+ case '2':
+ scale = 2;
+ break;
+ case '3':
+ scale = 3;
+ break;
case 's':
battery++;
break;
+ case 'T':
+ profile++;
+ break;
+ default:
+ goto usage;
} ARGEND;
if(argc != 1){
- fprint(2, "usage: %s rom\n", argv0);
+usage:
+ fprint(2, "usage: %s [-23s] rom\n", argv0);
threadexitsall("usage");
}
loadrom(argv[0]);
@@ -186,6 +221,7 @@
mc = initmouse(nil, screen);
if(mc == nil)
sysfatal("initmouse: %r");
+ loadbat(argv[0]);
screeninit();
proccreate(keyproc, 0, 8192);
cpureset();
@@ -200,6 +236,7 @@
spcclock -= t;
stimerclock += t;
ppuclock += t;
+ perfclock -= t;
while(ppuclock >= 4){
ppustep();
@@ -222,6 +259,10 @@
draw(screen, screen->r, bg, nil, ZP);
msgclock = 0;
}
+ }
+ if(profile && perfclock <= 0){
+ perfclock = FREQ;
+ timing();
}
}
}