ref: f6fe0537ed424851bc86836b0f27279cd939259a
parent: 9925c16b29650a3228c7bb801bd20bbc79c5cfb6
author: Clownacy <Clownacy@users.noreply.github.com>
date: Mon Jun 29 17:37:05 EDT 2020
Add vertex-batching to Wii U port Should remove the last bit of lag that port has (like the Labyrinth fight where you team-up with Curly)
--- a/src/Backends/Rendering/WiiU.cpp
+++ b/src/Backends/Rendering/WiiU.cpp
@@ -25,6 +25,15 @@
#include "WiiUShaders/texture.gsh.h"
#include "WiiUShaders/texture_colour_key.gsh.h"
+typedef enum RenderMode
+{
+ MODE_BLANK,
+ MODE_DRAW_SURFACE,
+ MODE_DRAW_SURFACE_WITH_TRANSPARENCY,
+ MODE_COLOUR_FILL,
+ MODE_DRAW_GLYPH
+} RenderMode;
+
typedef struct RenderBackend_Surface
{
GX2Texture texture;
@@ -62,6 +71,11 @@
Coordinate2D texture;
} Vertex;
+typedef struct VertexBufferSlot
+{
+ Vertex vertices[4];
+} VertexBufferSlot;
+
static WHBGfxShaderGroup texture_shader;
static WHBGfxShaderGroup texture_colour_key_shader;
static WHBGfxShaderGroup colour_fill_shader;
@@ -80,6 +94,90 @@
static Viewport tv_viewport;
static Viewport drc_viewport;
+static VertexBufferSlot *local_vertex_buffer;
+static unsigned long local_vertex_buffer_size;
+static unsigned long current_vertex_buffer_slot;
+
+static RenderMode last_render_mode;
+static RenderBackend_Surface *last_source_surface;
+static RenderBackend_Surface *last_destination_surface;
+
+static VertexBufferSlot* GetVertexBufferSlot(unsigned int slots_needed)
+{
+ // Check if buffer needs expanding
+ if (current_vertex_buffer_slot + slots_needed > local_vertex_buffer_size)
+ {
+ local_vertex_buffer_size = 1;
+
+ while (current_vertex_buffer_slot + slots_needed > local_vertex_buffer_size)
+ local_vertex_buffer_size <<= 1;
+
+ VertexBufferSlot *realloc_result = (VertexBufferSlot*)realloc(local_vertex_buffer, local_vertex_buffer_size * sizeof(VertexBufferSlot));
+
+ if (realloc_result != NULL)
+ {
+ local_vertex_buffer = realloc_result;
+ }
+ else
+ {
+ Backend_PrintError("Couldn't expand vertex buffer");
+ return NULL;
+ }
+ }
+
+ current_vertex_buffer_slot += slots_needed;
+
+ return &local_vertex_buffer[current_vertex_buffer_slot - slots_needed];
+}
+
+static void FlushVertexBuffer(void)
+{
+ static unsigned long vertex_buffer_size;
+ static unsigned int current_vertex_buffer = 0;
+
+ if (current_vertex_buffer_slot == 0)
+ return;
+
+ // Make sure the buffers aren't currently being used before we modify them
+ GX2DrawDone();
+
+ // Upload vertex buffer to VBO, growing it if necessary
+ if (local_vertex_buffer_size > vertex_buffer_size)
+ {
+ vertex_buffer_size = local_vertex_buffer_size;
+
+ GX2RDestroyBufferEx(&vertex_buffer, (GX2RResourceFlags)0);
+
+ vertex_buffer.flags = (GX2RResourceFlags)(GX2R_RESOURCE_BIND_VERTEX_BUFFER |
+ GX2R_RESOURCE_USAGE_CPU_READ |
+ GX2R_RESOURCE_USAGE_CPU_WRITE |
+ GX2R_RESOURCE_USAGE_GPU_READ);
+ vertex_buffer.elemSize = sizeof(VertexBufferSlot);
+ vertex_buffer.elemCount = vertex_buffer_size;
+
+ if (GX2RCreateBuffer(&vertex_buffer))
+ {
+ void *vertex_pointer = GX2RLockBufferEx(&vertex_buffer, (GX2RResourceFlags)0);
+
+ memcpy(vertex_pointer, local_vertex_buffer, vertex_buffer_size * sizeof(VertexBufferSlot));
+
+ GX2RUnlockBufferEx(&vertex_buffer, (GX2RResourceFlags)0);
+ }
+ }
+ else
+ {
+ void *vertex_pointer = GX2RLockBufferEx(&vertex_buffer, (GX2RResourceFlags)0);
+
+ memcpy(vertex_pointer, local_vertex_buffer, current_vertex_buffer_slot * sizeof(VertexBufferSlot));
+
+ GX2RUnlockBufferEx(&vertex_buffer, (GX2RResourceFlags)0);
+ }
+
+ GX2DrawEx(GX2_PRIMITIVE_MODE_QUADS, 4 * current_vertex_buffer_slot, 0, 1);
+
+ current_vertex_buffer_slot = 0;
+}
+
static void CalculateViewport(unsigned int actual_screen_width, unsigned int actual_screen_height, Viewport *viewport)
{
if ((float)actual_screen_width / (float)actual_screen_height > (float)framebuffer_surface->width / (float)framebuffer_surface->height)
@@ -136,80 +234,63 @@
WHBGfxInitShaderAttribute(&glyph_shader, "input_texture_coordinates", 1, 0, GX2_ATTRIB_FORMAT_FLOAT_32_32);
WHBGfxInitFetchShader(&glyph_shader);
- // Initialise vertex position buffer
- vertex_buffer.flags = (GX2RResourceFlags)(GX2R_RESOURCE_BIND_VERTEX_BUFFER |
- GX2R_RESOURCE_USAGE_CPU_READ |
- GX2R_RESOURCE_USAGE_CPU_WRITE |
- GX2R_RESOURCE_USAGE_GPU_READ);
- vertex_buffer.elemSize = sizeof(Vertex);
- vertex_buffer.elemCount = 4;
+ // Initialise sampler
+ GX2InitSampler(&sampler, GX2_TEX_CLAMP_MODE_CLAMP, GX2_TEX_XY_FILTER_MODE_LINEAR);
- if (GX2RCreateBuffer(&vertex_buffer))
+ // Create framebuffer surface
+ framebuffer_surface = RenderBackend_CreateSurface(screen_width, screen_height, true);
+
+ if (framebuffer_surface != NULL)
{
- // Initialise sampler
- GX2InitSampler(&sampler, GX2_TEX_CLAMP_MODE_CLAMP, GX2_TEX_XY_FILTER_MODE_LINEAR);
+ // Create a 'context' (this voodoo magic can be used to undo `GX2SetColorBuffer`,
+ // allowing us to draw to the screen once again)
+ gx2_context = (GX2ContextState*)aligned_alloc(GX2_CONTEXT_STATE_ALIGNMENT, sizeof(GX2ContextState));
- // Create framebuffer surface
- framebuffer_surface = RenderBackend_CreateSurface(screen_width, screen_height, true);
-
- if (framebuffer_surface != NULL)
+ if (gx2_context != NULL)
{
- // Create a 'context' (this voodoo magic can be used to undo `GX2SetColorBuffer`,
- // allowing us to draw to the screen once again)
- gx2_context = (GX2ContextState*)aligned_alloc(GX2_CONTEXT_STATE_ALIGNMENT, sizeof(GX2ContextState));
+ memset(gx2_context, 0, sizeof(GX2ContextState));
+ GX2SetupContextStateEx(gx2_context, TRUE);
+ GX2SetContextState(gx2_context);
- if (gx2_context != NULL)
+ // Disable depth-test (enabled by default for some reason)
+ GX2SetDepthOnlyControl(FALSE, FALSE, GX2_COMPARE_FUNC_ALWAYS);
+
+ // Calculate centred viewports
+ switch (GX2GetSystemTVScanMode())
{
- memset(gx2_context, 0, sizeof(GX2ContextState));
- GX2SetupContextStateEx(gx2_context, TRUE);
- GX2SetContextState(gx2_context);
+ // For now, we have to match WUT's broken behaviour (its `GX2TVScanMode`
+ // enum is missing values, and the rest are off-by-one)
+ //case GX2_TV_SCAN_MODE_576I:
+ case GX2_TV_SCAN_MODE_480I: // Actually 576i
+ case GX2_TV_SCAN_MODE_480P: // Actually 480i
+ CalculateViewport(854, 480, &tv_viewport);
+ break;
- // Disable depth-test (enabled by default for some reason)
- GX2SetDepthOnlyControl(FALSE, FALSE, GX2_COMPARE_FUNC_ALWAYS);
+ case GX2_TV_SCAN_MODE_720P: // Actually 480p
+ default: // Funnel the *real* 1080p into this
+ CalculateViewport(1280, 720, &tv_viewport);
+ break;
- // Calculate centred viewports
- switch (GX2GetSystemTVScanMode())
- {
- // For now, we have to match WUT's broken behaviour (its `GX2TVScanMode`
- // enum is missing values, and the rest are off-by-one)
- //case GX2_TV_SCAN_MODE_576I:
- case GX2_TV_SCAN_MODE_480I: // Actually 576i
- case GX2_TV_SCAN_MODE_480P: // Actually 480i
- CalculateViewport(854, 480, &tv_viewport);
- break;
-
- case GX2_TV_SCAN_MODE_720P: // Actually 480p
- default: // Funnel the *real* 1080p into this
- CalculateViewport(1280, 720, &tv_viewport);
- break;
-
- case GX2_TV_SCAN_MODE_1080I: // Actually invalid
- case GX2_TV_SCAN_MODE_1080P: // Actually 1080i
- CalculateViewport(1920, 1080, &tv_viewport);
- break;
- }
-
- CalculateViewport(854, 480, &drc_viewport);
-
- return framebuffer_surface;
+ case GX2_TV_SCAN_MODE_1080I: // Actually invalid
+ case GX2_TV_SCAN_MODE_1080P: // Actually 1080i
+ CalculateViewport(1920, 1080, &tv_viewport);
+ break;
}
- else
- {
- Backend_PrintError("Couldn't allocate memory for the GX2 context");
- }
- RenderBackend_FreeSurface(framebuffer_surface);
+ CalculateViewport(854, 480, &drc_viewport);
+
+ return framebuffer_surface;
}
else
{
- Backend_PrintError("Couldn't create the framebuffer surface");
+ Backend_PrintError("Couldn't allocate memory for the GX2 context");
}
- GX2RDestroyBufferEx(&vertex_buffer, (GX2RResourceFlags)0);
+ RenderBackend_FreeSurface(framebuffer_surface);
}
else
{
- Backend_PrintError("Couldn't create the vertex buffer");
+ Backend_PrintError("Couldn't create the framebuffer surface");
}
WHBGfxFreeShaderGroup(&glyph_shader);
@@ -268,6 +349,11 @@
void RenderBackend_DrawScreen(void)
{
+ FlushVertexBuffer();
+ last_render_mode = MODE_BLANK;
+ last_source_surface = NULL;
+ last_destination_surface = NULL;
+
// Make sure the buffers aren't currently being used before we modify them
GX2DrawDone();
@@ -503,124 +589,143 @@
if (source_surface == NULL || destination_surface == NULL)
return;
- // Make sure the buffers aren't currently being used before we modify them
- GX2DrawDone();
+ const RenderMode render_mode = (colour_key ? MODE_DRAW_SURFACE_WITH_TRANSPARENCY : MODE_DRAW_SURFACE);
- Vertex *vertex_pointer = (Vertex*)GX2RLockBufferEx(&vertex_buffer, (GX2RResourceFlags)0);
+ // Flush vertex data if a context-change is needed
+ if (last_render_mode != render_mode || last_source_surface != source_surface || last_destination_surface != destination_surface)
+ {
+ FlushVertexBuffer();
- // Set vertex position buffer
- const float destination_left = x;
- const float destination_top = y;
- const float destination_right = x + (rect->right - rect->left);
- const float destination_bottom = y + (rect->bottom - rect->top);
+ last_render_mode = render_mode;
+ last_source_surface = source_surface;
+ last_destination_surface = destination_surface;
- vertex_pointer[0].position.x = destination_left;
- vertex_pointer[0].position.y = destination_top;
- vertex_pointer[1].position.x = destination_right;
- vertex_pointer[1].position.y = destination_top;
- vertex_pointer[2].position.x = destination_right;
- vertex_pointer[2].position.y = destination_bottom;
- vertex_pointer[3].position.x = destination_left;
- vertex_pointer[3].position.y = destination_bottom;
+ // Draw to the selected texture, instead of the screen
+ GX2SetColorBuffer(&destination_surface->colour_buffer, GX2_RENDER_TARGET_0);
+ GX2SetViewport(0.0f, 0.0f, (float)destination_surface->colour_buffer.surface.width, (float)destination_surface->colour_buffer.surface.height, 0.0f, 1.0f);
+ GX2SetScissor(0, 0, destination_surface->colour_buffer.surface.width, destination_surface->colour_buffer.surface.height);
- for (unsigned int i = 0; i < 4; ++i)
- {
- vertex_pointer[i].position.x /= destination_surface->width;
- vertex_pointer[i].position.x *= 2.0f;
- vertex_pointer[i].position.x -= 1.0f;
+ // Select shader
+ WHBGfxShaderGroup *shader = colour_key ? &texture_colour_key_shader : &texture_shader;
- vertex_pointer[i].position.y /= destination_surface->height;
- vertex_pointer[i].position.y *= -2.0f;
- vertex_pointer[i].position.y += 1.0f;
+ // Bind it
+ GX2SetFetchShader(&shader->fetchShader);
+ GX2SetVertexShader(shader->vertexShader);
+ GX2SetPixelShader(shader->pixelShader);
+
+ // Bind misc. data
+ GX2SetPixelSampler(&sampler, shader->pixelShader->samplerVars[0].location);
+ GX2SetPixelTexture(&source_surface->texture, shader->pixelShader->samplerVars[0].location);
+ GX2RSetAttributeBuffer(&vertex_buffer, 0, sizeof(Vertex), offsetof(Vertex, position));
+ GX2RSetAttributeBuffer(&vertex_buffer, 1, sizeof(Vertex), offsetof(Vertex, texture));
}
- // Set texture coordinate buffer
- vertex_pointer[0].texture.x = rect->left / (float)source_surface->width;
- vertex_pointer[0].texture.y = rect->top / (float)source_surface->height;
- vertex_pointer[1].texture.x = rect->right / (float)source_surface->width;
- vertex_pointer[1].texture.y = rect->top / (float)source_surface->height;
- vertex_pointer[2].texture.x = rect->right / (float)source_surface->width;
- vertex_pointer[2].texture.y = rect->bottom / (float)source_surface->height;
- vertex_pointer[3].texture.x = rect->left / (float)source_surface->width;
- vertex_pointer[3].texture.y = rect->bottom / (float)source_surface->height;
+ VertexBufferSlot *vertex_buffer_slot = GetVertexBufferSlot(1);
- GX2RUnlockBufferEx(&vertex_buffer, (GX2RResourceFlags)0);
+ if (vertex_buffer_slot != NULL)
+ {
+ // Set vertex position buffer
+ const float destination_left = x;
+ const float destination_top = y;
+ const float destination_right = x + (rect->right - rect->left);
+ const float destination_bottom = y + (rect->bottom - rect->top);
- // Draw to the selected texture, instead of the screen
- GX2SetColorBuffer(&destination_surface->colour_buffer, GX2_RENDER_TARGET_0);
- GX2SetViewport(0.0f, 0.0f, (float)destination_surface->colour_buffer.surface.width, (float)destination_surface->colour_buffer.surface.height, 0.0f, 1.0f);
- GX2SetScissor(0, 0, destination_surface->colour_buffer.surface.width, destination_surface->colour_buffer.surface.height);
+ vertex_buffer_slot->vertices[0].position.x = destination_left;
+ vertex_buffer_slot->vertices[0].position.y = destination_top;
+ vertex_buffer_slot->vertices[1].position.x = destination_right;
+ vertex_buffer_slot->vertices[1].position.y = destination_top;
+ vertex_buffer_slot->vertices[2].position.x = destination_right;
+ vertex_buffer_slot->vertices[2].position.y = destination_bottom;
+ vertex_buffer_slot->vertices[3].position.x = destination_left;
+ vertex_buffer_slot->vertices[3].position.y = destination_bottom;
- // Select shader
- WHBGfxShaderGroup *shader = colour_key ? &texture_colour_key_shader : &texture_shader;
+ for (unsigned int i = 0; i < 4; ++i)
+ {
+ vertex_buffer_slot->vertices[i].position.x /= destination_surface->width;
+ vertex_buffer_slot->vertices[i].position.x *= 2.0f;
+ vertex_buffer_slot->vertices[i].position.x -= 1.0f;
- // Bind it
- GX2SetFetchShader(&shader->fetchShader);
- GX2SetVertexShader(shader->vertexShader);
- GX2SetPixelShader(shader->pixelShader);
+ vertex_buffer_slot->vertices[i].position.y /= destination_surface->height;
+ vertex_buffer_slot->vertices[i].position.y *= -2.0f;
+ vertex_buffer_slot->vertices[i].position.y += 1.0f;
+ }
- // Bind misc. data
- GX2SetPixelSampler(&sampler, shader->pixelShader->samplerVars[0].location);
- GX2SetPixelTexture(&source_surface->texture, shader->pixelShader->samplerVars[0].location);
- GX2RSetAttributeBuffer(&vertex_buffer, 0, sizeof(Vertex), offsetof(Vertex, position));
- GX2RSetAttributeBuffer(&vertex_buffer, 1, sizeof(Vertex), offsetof(Vertex, texture));
-
- // Draw
- GX2DrawEx(GX2_PRIMITIVE_MODE_QUADS, 4, 0, 1);
+ // Set texture coordinate buffer
+ vertex_buffer_slot->vertices[0].texture.x = rect->left / (float)source_surface->width;
+ vertex_buffer_slot->vertices[0].texture.y = rect->top / (float)source_surface->height;
+ vertex_buffer_slot->vertices[1].texture.x = rect->right / (float)source_surface->width;
+ vertex_buffer_slot->vertices[1].texture.y = rect->top / (float)source_surface->height;
+ vertex_buffer_slot->vertices[2].texture.x = rect->right / (float)source_surface->width;
+ vertex_buffer_slot->vertices[2].texture.y = rect->bottom / (float)source_surface->height;
+ vertex_buffer_slot->vertices[3].texture.x = rect->left / (float)source_surface->width;
+ vertex_buffer_slot->vertices[3].texture.y = rect->bottom / (float)source_surface->height;
+ }
}
void RenderBackend_ColourFill(RenderBackend_Surface *surface, const RenderBackend_Rect *rect, unsigned char red, unsigned char green, unsigned char blue)
{
+ static unsigned char last_red;
+ static unsigned char last_green;
+ static unsigned char last_blue;
+
if (surface == NULL)
return;
- // Make sure the buffers aren't currently being used before we modify them
- GX2DrawDone();
+ // Flush vertex data if a context-change is needed
+ if (last_render_mode != MODE_COLOUR_FILL || last_destination_surface != surface || last_red != red || last_green != green || last_blue != blue)
+ {
+ FlushVertexBuffer();
- Vertex *vertex_pointer = (Vertex*)GX2RLockBufferEx(&vertex_buffer, (GX2RResourceFlags)0);
+ last_render_mode = MODE_COLOUR_FILL;
+ last_source_surface = NULL;
+ last_destination_surface = surface;
+ last_red = red;
+ last_green = green;
+ last_blue = blue;
- // Set vertex position buffer
- vertex_pointer[0].position.x = rect->left;
- vertex_pointer[0].position.y = rect->top;
- vertex_pointer[1].position.x = rect->right;
- vertex_pointer[1].position.y = rect->top;
- vertex_pointer[2].position.x = rect->right;
- vertex_pointer[2].position.y = rect->bottom;
- vertex_pointer[3].position.x = rect->left;
- vertex_pointer[3].position.y = rect->bottom;
+ // Draw to the selected texture, instead of the screen
+ GX2SetColorBuffer(&surface->colour_buffer, GX2_RENDER_TARGET_0);
+ GX2SetViewport(0, 0, (float)surface->colour_buffer.surface.width, (float)surface->colour_buffer.surface.height, 0.0f, 1.0f);
+ GX2SetScissor(0, 0, (float)surface->colour_buffer.surface.width, (float)surface->colour_buffer.surface.height);
- for (unsigned int i = 0; i < 4; ++i)
- {
- vertex_pointer[i].position.x /= surface->width;
- vertex_pointer[i].position.x *= 2.0f;
- vertex_pointer[i].position.x -= 1.0f;
+ // Set the colour-fill... colour
+ const float uniform_colours[4] = {red / 255.0f, green / 255.0f, blue / 255.0f, 1.0f};
+ GX2SetPixelUniformReg(colour_fill_shader.pixelShader->uniformVars[0].offset, 4, (uint32_t*)&uniform_colours);
- vertex_pointer[i].position.y /= surface->height;
- vertex_pointer[i].position.y *= -2.0f;
- vertex_pointer[i].position.y += 1.0f;
+ // Bind the colour-fill shader
+ GX2SetFetchShader(&colour_fill_shader.fetchShader);
+ GX2SetVertexShader(colour_fill_shader.vertexShader);
+ GX2SetPixelShader(colour_fill_shader.pixelShader);
+
+ // Bind misc. data
+ GX2RSetAttributeBuffer(&vertex_buffer, 0, sizeof(Vertex), offsetof(Vertex, position));
}
- GX2RUnlockBufferEx(&vertex_buffer, (GX2RResourceFlags)0);
+ VertexBufferSlot *vertex_buffer_slot = GetVertexBufferSlot(1);
- // Draw to the selected texture, instead of the screen
- GX2SetColorBuffer(&surface->colour_buffer, GX2_RENDER_TARGET_0);
- GX2SetViewport(0, 0, (float)surface->colour_buffer.surface.width, (float)surface->colour_buffer.surface.height, 0.0f, 1.0f);
- GX2SetScissor(0, 0, (float)surface->colour_buffer.surface.width, (float)surface->colour_buffer.surface.height);
+ if (vertex_buffer_slot != NULL)
+ {
+ // Set vertex position buffer
+ vertex_buffer_slot->vertices[0].position.x = rect->left;
+ vertex_buffer_slot->vertices[0].position.y = rect->top;
+ vertex_buffer_slot->vertices[1].position.x = rect->right;
+ vertex_buffer_slot->vertices[1].position.y = rect->top;
+ vertex_buffer_slot->vertices[2].position.x = rect->right;
+ vertex_buffer_slot->vertices[2].position.y = rect->bottom;
+ vertex_buffer_slot->vertices[3].position.x = rect->left;
+ vertex_buffer_slot->vertices[3].position.y = rect->bottom;
- // Set the colour-fill... colour
- const float uniform_colours[4] = {red / 255.0f, green / 255.0f, blue / 255.0f, 1.0f};
- GX2SetPixelUniformReg(colour_fill_shader.pixelShader->uniformVars[0].offset, 4, (uint32_t*)&uniform_colours);
+ for (unsigned int i = 0; i < 4; ++i)
+ {
+ vertex_buffer_slot->vertices[i].position.x /= surface->width;
+ vertex_buffer_slot->vertices[i].position.x *= 2.0f;
+ vertex_buffer_slot->vertices[i].position.x -= 1.0f;
- // Bind the colour-fill shader
- GX2SetFetchShader(&colour_fill_shader.fetchShader);
- GX2SetVertexShader(colour_fill_shader.vertexShader);
- GX2SetPixelShader(colour_fill_shader.pixelShader);
-
- // Bind misc. data
- GX2RSetAttributeBuffer(&vertex_buffer, 0, sizeof(Vertex), offsetof(Vertex, position));
-
- // Draw
- GX2DrawEx(GX2_PRIMITIVE_MODE_QUADS, 4, 0, 1);
+ vertex_buffer_slot->vertices[i].position.y /= surface->height;
+ vertex_buffer_slot->vertices[i].position.y *= -2.0f;
+ vertex_buffer_slot->vertices[i].position.y += 1.0f;
+ }
+ }
}
RenderBackend_Glyph* RenderBackend_LoadGlyph(const unsigned char *pixels, unsigned int width, unsigned int height, int pitch)
@@ -692,6 +797,11 @@
void RenderBackend_PrepareToDrawGlyphs(RenderBackend_Surface *destination_surface, const unsigned char *colour_channels)
{
+ FlushVertexBuffer();
+ last_render_mode = MODE_BLANK;
+ last_source_surface = NULL;
+ last_destination_surface = NULL;
+
if (destination_surface == NULL)
return;