ref: 0184245d3352001a72bbc12386d7882cc23eb778
parent: dfc9377fca976ca112ec9d906b69c8cb179c40f6
author: Snesrev <snesrev@protonmail.com>
date: Sun Sep 11 10:37:14 EDT 2022
Switch to another pixel format to improve performance SDL_UnlockTexture was really slow on some computers
--- a/main.c
+++ b/main.c
@@ -194,10 +194,18 @@
printf("Failed to create renderer: %s\n", SDL_GetError());
return 1;
}
+
+ SDL_RendererInfo renderer_info;
+ SDL_GetRendererInfo(renderer, &renderer_info);
+ printf("Supported texture formats:");
+ for (int i = 0; i < renderer_info.num_texture_formats; i++)
+ printf(" %s", SDL_GetPixelFormatName(renderer_info.texture_formats[i]));
+ printf("\n");
+
g_renderer = renderer;
if (!g_config.ignore_aspect_ratio)
SDL_RenderSetLogicalSize(renderer, kRenderWidth, kRenderHeight);
- SDL_Texture* texture = SDL_CreateTexture(renderer, SDL_PIXELFORMAT_RGBX8888, SDL_TEXTUREACCESS_STREAMING, kRenderWidth * 2, kRenderHeight * 2);
+ SDL_Texture* texture = SDL_CreateTexture(renderer, SDL_PIXELFORMAT_ARGB8888, SDL_TEXTUREACCESS_STREAMING, kRenderWidth * 2, kRenderHeight * 2);
if(texture == NULL) {
printf("Failed to create texture: %s\n", SDL_GetError());
return 1;
@@ -303,15 +311,32 @@
if (g_input1_state & 0xf0)
g_gamepad_buttons = 0;
+ uint64 t0 = SDL_GetPerformanceCounter();
+
bool is_turbo = RunOneFrame(snes_run, g_input1_state | g_gamepad_buttons, (frameCtr++ & 0x7f) != 0 && g_turbo);
if (is_turbo)
continue;
+
+ uint64 t1 = SDL_GetPerformanceCounter();
PlayAudio(snes_run, device, have.channels, audioBuffer);
+ uint64 t2 = SDL_GetPerformanceCounter();
+
RenderScreen(window, renderer, texture, (g_win_flags & SDL_WINDOW_FULLSCREEN_DESKTOP) != 0);
+ uint64 t3 = SDL_GetPerformanceCounter();
+ SDL_RenderPresent(renderer); // vsyncs to 60 FPS?
+ uint64 t4 = SDL_GetPerformanceCounter();
- SDL_RenderPresent(renderer); // vsyncs to 60 FPS
+ double f = 1e3 / (double)SDL_GetPerformanceFrequency();
+ if (0) printf("Perf %6.2f %6.2f %6.2f %6.2f\n",
+ (t1 - t0) * f,
+ (t2 - t1) * f,
+ (t3 - t2) * f,
+ (t4 - t3) * f
+ );
+
+
// if vsync isn't working, delay manually
curTick = SDL_GetTicks();
@@ -406,26 +431,43 @@
int i;
sprintf(buf, "%d", n);
for (s = buf, i = 2 * 4; *s; s++, i += 8 * 4)
- RenderDigit(dst + ((pitch + i + 4) << big), pitch, *s - '0', 0x40404000, big);
+ RenderDigit(dst + ((pitch + i + 4) << big), pitch, *s - '0', 0x404040, big);
for (s = buf, i = 2 * 4; *s; s++, i += 8 * 4)
- RenderDigit(dst + (i << big), pitch, *s - '0', 0xffffff00, big);
+ RenderDigit(dst + (i << big), pitch, *s - '0', 0xffffff, big);
}
static void RenderScreen(SDL_Window *window, SDL_Renderer *renderer, SDL_Texture *texture, bool fullscreen) {
uint8* pixels = NULL;
int pitch = 0;
+ uint64 t0 = SDL_GetPerformanceCounter();
if(SDL_LockTexture(texture, NULL, (void**)&pixels, &pitch) != 0) {
printf("Failed to lock texture: %s\n", SDL_GetError());
return;
}
+ uint64 t1 = SDL_GetPerformanceCounter();
bool hq = RenderScreenWithPerf(pixels, pitch, g_ppu_render_flags);
if (g_display_perf)
RenderNumber(pixels + (pitch*2<<hq), pitch, g_curr_fps, hq);
+
+ uint64 t2 = SDL_GetPerformanceCounter();
SDL_UnlockTexture(texture);
+ uint64 t3 = SDL_GetPerformanceCounter();
SDL_RenderClear(renderer);
-
+ uint64 t4 = SDL_GetPerformanceCounter();
SDL_Rect src_rect = { 0, 0, kRenderWidth, kRenderHeight };
SDL_RenderCopy(renderer, texture, hq ? NULL : &src_rect, NULL);
+ uint64 t5 = SDL_GetPerformanceCounter();
+
+ double f = 1e3 / (double)SDL_GetPerformanceFrequency();
+ if (0) printf("RenderPerf %6.2f %6.2f %6.2f %6.2f %6.2f\n",
+ (t1 - t0) * f,
+ (t2 - t1) * f,
+ (t3 - t2) * f,
+ (t4 - t3) * f,
+ (t5 - t4) * f
+ );
+
+
}
static void HandleCommand(uint32 j, bool pressed) {
--- a/snes/ppu.c
+++ b/snes/ppu.c
@@ -157,7 +157,7 @@
if (hq) {
for (int i = 0; i < 256; i++) {
uint32 color = ppu->cgram[i];
- ppu->colorMapRgb[i] = ppu->brightnessMult[color & 0x1f] << 24 | ppu->brightnessMult[(color >> 5) & 0x1f] << 16 | ppu->brightnessMult[(color >> 10) & 0x1f] << 8;
+ ppu->colorMapRgb[i] = ppu->brightnessMult[color & 0x1f] << 16 | ppu->brightnessMult[(color >> 5) & 0x1f] << 8 | ppu->brightnessMult[(color >> 10) & 0x1f];
}
}
@@ -876,9 +876,9 @@
uint32 i = left;
do {
uint32 color = ppu->cgram[ppu->bgBuffers[0].pixel[i]];
- dst[1] = dst[0] = ppu->brightnessMult[color & clip_color_mask] << 24 |
- ppu->brightnessMult[(color >> 5) & clip_color_mask] << 16 |
- ppu->brightnessMult[(color >> 10) & clip_color_mask] << 8;
+ dst[1] = dst[0] = ppu->brightnessMult[color & clip_color_mask] << 16 |
+ ppu->brightnessMult[(color >> 5) & clip_color_mask] << 8 |
+ ppu->brightnessMult[(color >> 10) & clip_color_mask];
} while (dst += 2, ++i < right);
} else {
uint8 *half_color_map = ppu->halfColor ? ppu->brightnessMultHalf : ppu->brightnessMult;
@@ -913,7 +913,7 @@
b += b2;
}
}
- dst[0] = dst[1] = color_map[r] << 24 | color_map[g] << 16 | color_map[b] << 8;
+ dst[0] = dst[1] = color_map[b] | color_map[g] << 8 | color_map[r] << 16;
} while (dst += 2, ++i < right);
}
} while (cw_clip_math >>= 1, ++windex < cwin.nr);
@@ -977,14 +977,14 @@
}
int row = y - 1;
uint8 *pixelBuffer = (uint8*) &ppu->renderBuffer[row * 2 * ppu->renderPitch + x * 8];
- pixelBuffer[0] = 0;
- pixelBuffer[1] = ((b2 << 3) | (b2 >> 2)) * ppu->brightness / 15;
- pixelBuffer[2] = ((g2 << 3) | (g2 >> 2)) * ppu->brightness / 15;
- pixelBuffer[3] = ((r2 << 3) | (r2 >> 2)) * ppu->brightness / 15;
- pixelBuffer[4] = 0;
- pixelBuffer[5] = ((b << 3) | (b >> 2)) * ppu->brightness / 15;
- pixelBuffer[6] = ((g << 3) | (g >> 2)) * ppu->brightness / 15;
- pixelBuffer[7] = ((r << 3) | (r >> 2)) * ppu->brightness / 15;
+ pixelBuffer[0] = ((b2 << 3) | (b2 >> 2)) * ppu->brightness / 15;
+ pixelBuffer[1] = ((g2 << 3) | (g2 >> 2)) * ppu->brightness / 15;
+ pixelBuffer[2] = ((r2 << 3) | (r2 >> 2)) * ppu->brightness / 15;
+ pixelBuffer[3] = 0;
+ pixelBuffer[4] = ((b << 3) | (b >> 2)) * ppu->brightness / 15;
+ pixelBuffer[5] = ((g << 3) | (g >> 2)) * ppu->brightness / 15;
+ pixelBuffer[6] = ((r << 3) | (r >> 2)) * ppu->brightness / 15;
+ pixelBuffer[7] = 0;
}
static const int bitDepthsPerMode[10][4] = {