From 7d36568eeb7a1040f50b8c923552ac1b3f7c3a7b Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Wed, 12 Mar 2025 01:13:39 +0100 Subject: [PATCH 01/43] add base of rlsw.h --- src/external/rlsw.h | 1909 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1909 insertions(+) create mode 100644 src/external/rlsw.h diff --git a/src/external/rlsw.h b/src/external/rlsw.h new file mode 100644 index 000000000..55205d057 --- /dev/null +++ b/src/external/rlsw.h @@ -0,0 +1,1909 @@ +/** + * MIT License + * + * Copyright (c) 2025 Le Juez Victor + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + + #ifndef RLSW_H + #define RLSW_H + + #include + #include + + #ifndef SW_MALLOC + # define SW_MALLOC(sz) malloc(sz) + #endif + + #ifndef SW_FREE + # define SW_FREE(ptr) free(ptr) + #endif + + #ifndef SW_MAX_PROJECTION_STACK_SIZE + # define SW_MAX_PROJECTION_STACK_SIZE 2 + #endif + + #ifndef SW_MAX_MODELVIEW_STACK_SIZE + # define SW_MAX_MODELVIEW_STACK_SIZE 8 + #endif + + #ifndef SW_MAX_TEXTURE_STACK_SIZE + # define SW_MAX_TEXTURE_STACK_SIZE 4 + #endif + + #ifndef SW_MAX_TEXTURES + # define SW_MAX_TEXTURES 128 + #endif + + #ifndef SW_MAX_CLIPPED_POLYGON_VERTICES + # define SW_MAX_CLIPPED_POLYGON_VERTICES 12 + #endif + + #ifndef SW_CLIP_EPSILON + # define SW_CLIP_EPSILON 1e-4f + #endif + + typedef enum { + SW_PROJECTION, + SW_MODELVIEW, + SW_TEXTURE + } SWmatrix; + + typedef enum { + SW_VERTEX_ARRAY, + SW_TEXTURE_COORD_ARRAY, + SW_NORMAL_ARRAY, + SW_COLOR_ARRAY + } SWarray; + + typedef enum { + SW_POINTS, + SW_LINES, + SW_TRIANGLES, + SW_QUADS, + } SWfill; + + typedef enum { + SW_CULL_FRONT, + SW_CULL_BACK, + } SWcull; + + typedef enum { + SW_PIXELFORMAT_UNCOMPRESSED_GRAYSCALE = 1, // 8 bit per pixel (no alpha) + SW_PIXELFORMAT_UNCOMPRESSED_GRAY_ALPHA, // 8*2 bpp (2 channels) + SW_PIXELFORMAT_UNCOMPRESSED_R5G6B5, // 16 bpp + SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8, // 24 bpp + SW_PIXELFORMAT_UNCOMPRESSED_R5G5B5A1, // 16 bpp (1 bit alpha) + SW_PIXELFORMAT_UNCOMPRESSED_R4G4B4A4, // 16 bpp (4 bit alpha) + SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8A8, // 32 bpp + SW_PIXELFORMAT_UNCOMPRESSED_R32, // 32 bpp (1 channel - float) + SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32, // 32*3 bpp (3 channels - float) + SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32A32, // 32*4 bpp (4 channels - float) + SW_PIXELFORMAT_UNCOMPRESSED_R16, // 16 bpp (1 channel - half float) + SW_PIXELFORMAT_UNCOMPRESSED_R16G16B16, // 16*3 bpp (3 channels - half float) + SW_PIXELFORMAT_UNCOMPRESSED_R16G16B16A16, // 16*4 bpp (4 channels - half float) + SW_PIXELFORMAT_COMPRESSED_DXT1_RGB, // 4 bpp (no alpha) + SW_PIXELFORMAT_COMPRESSED_DXT1_RGBA, // 4 bpp (1 bit alpha) + SW_PIXELFORMAT_COMPRESSED_DXT3_RGBA, // 8 bpp + SW_PIXELFORMAT_COMPRESSED_DXT5_RGBA, // 8 bpp + SW_PIXELFORMAT_COMPRESSED_ETC1_RGB, // 4 bpp + SW_PIXELFORMAT_COMPRESSED_ETC2_RGB, // 4 bpp + SW_PIXELFORMAT_COMPRESSED_ETC2_EAC_RGBA, // 8 bpp + SW_PIXELFORMAT_COMPRESSED_PVRT_RGB, // 4 bpp + SW_PIXELFORMAT_COMPRESSED_PVRT_RGBA, // 4 bpp + SW_PIXELFORMAT_COMPRESSED_ASTC_4x4_RGBA, // 8 bpp + SW_PIXELFORMAT_COMPRESSED_ASTC_8x8_RGBA // 2 bpp + } SWpixelformat; + + typedef enum { + SW_NEAREST, + SW_LINEAR, + SW_NEAREST_MIPMAP_NEAREST, + SW_NEAREST_MIPMAP_LINEAR, + SW_LINEAR_MIPMAP_NEAREST, + SW_LINEAR_MIPMAP_LINEAR + } SWfilter; + + typedef enum { + SW_REPEAT, + SW_CLAMP_TO_EDGE, + SW_MIRRORED_REPEAT + } SWwrap; + + typedef enum { + SW_TEXTURE_MIN_FILTER, + SW_TEXTURE_MAG_FILTER, + SW_TEXTURE_WRAP_S, + SW_TEXTURE_WRAP_T + } SWtexparam; + + typedef enum { + SW_NO_ERROR, + SW_INVALID_ENUM, + SW_INVALID_VALUE, + SW_STACK_OVERFLOW, + SW_STACK_UNDERFLOW, + SW_INVALID_OPERATION, + SW_ERROR_OUT_OF_MEMORY + } SWerrcode; + + /* === Public API === */ + + void swInit(int w, int h); + void swClose(void); + + void* swGetColorBuffer(int* w, int* h); + + void swMatrixMode(SWmatrix mode); + void swPushMatrix(void); + void swPopMatrix(void); + void swLoadIdentity(void); + void swTranslatef(float x, float y, float z); + void swRotatef(float angle, float x, float y, float z); + void swScalef(float x, float y, float z); + void swMultMatrixf(const float* mat); + void swFrustum(float left, float right, float bottom, float top, float znear, float zfar); + void swOrtho(float left, float right, float bottom, float top, float znear, float zfar); + + void swViewport(int x, int y, int width, int height); + + void swClearColor(float r, float g, float b, float a); + void swClear(void); + + void swBegin(SWfill mode); + void swEnd(void); + + void swVertex2i(int x, int y); + void swVertex2f(float x, float y); + void swVertex2fv(const float* v); + void swVertex3i(int x, int y, int z); + void swVertex3f(float x, float y, float z); + void swVertex3fv(const float* v); + void swVertex4i(int x, int y, int z, int w); + void swVertex4f(float x, float y, float z, float w); + void swVertex4fv(const float* v); + + void swColor1ui(uint32_t color); + void swColor3ub(uint8_t r, uint8_t g, uint8_t b); + void swColor3ubv(const uint8_t* v); + void swColor3us(uint16_t r, uint16_t g, uint16_t b); + void swColor3usv(const uint16_t* v); + void swColor3ui(uint32_t r, uint32_t g, uint32_t b); + void swColor3uiv(const uint32_t* v); + void swColor3f(float r, float g, float b); + void swColor3fv(const float* v); + void swColor4ub(uint8_t r, uint8_t g, uint8_t b, uint8_t a); + void swColor4ubv(const uint8_t* v); + void swColor4us(uint16_t r, uint16_t g, uint16_t b, uint16_t a); + void swColor4usv(const uint16_t* v); + void swColor4ui(uint32_t r, uint32_t g, uint32_t b, uint32_t a); + void swColor4uiv(const uint32_t* v); + void swColor4f(float r, float g, float b, float a); + void swColor4fv(const float* v); + + void swTexCoord2f(float u, float v); + void swTexCoordfv(const float* v); + + void swNormal3f(float x, float y, float z); + void swNormal3fv(const float* v); + + void swBindArray(SWarray type, void *buffer); + void swDrawArrays(SWfill mode, int offset, int count); + + uint32_t swLoadTexture(const void *data, int width, int height, int format, int mipmapCount); + void swUnloadTexture(uint32_t id); + + void swTextureParameters(uint32_t id, int param, int value); + void swBindTexture(uint32_t id); + + #endif // RLSW_H + + + + #ifdef RLSW_IMPL + + #include + #include + + /* === Defines and Macros === */ + + #define SW_PI 3.14159265358979323846f + #define SW_DEG2RAD (SW_PI/180.0f) + #define SW_RAD2DEG (180.0f/SW_PI) + + /* === Internal Structs === */ + + typedef float sw_matrix_t[4*4]; + typedef uint16_t sw_half_t; + + typedef struct { + + float position[4]; // Position coordinates + float normal[3]; // Normal vector + float texcoord[2]; // Texture coordinates + float color[4]; // Color + + float homogeneous[4]; // Homogeneous coordinates + float screen[2]; // Screen coordinates + + } sw_vertex_t; + + typedef struct { + + const void* pixels; + int width; + int height; + int format; + + SWfilter minFilter; + SWfilter magFilter; + + SWwrap sWrap; + SWwrap tWrap; + + float tx; + float ty; + + } sw_texture_t; + + typedef struct { + uint8_t *color; // 32-bit RGBA color buffer + uint16_t *depth; // 16-bit fixed fract buffer + int width, height; + } sw_framebuffer_t; + + typedef struct { + + sw_framebuffer_t framebuffer; + uint8_t clearColor[4]; // Color used to clear the screen + uint16_t clearDepth; // Depth value used to clear the screen + + uint32_t currentTexture; + sw_matrix_t *currentMatrix; + + uint32_t blendFunction; + uint32_t depthFunction; + + int vpPos[2]; // Represents the top-left corner of the viewport + int vpDim[2]; // Represents the dimensions of the viewport (minus one) + int vpMin[2]; // Represents the minimum renderable point of the viewport (top-left) + int vpMax[2]; // Represents the maximum renderable point of the viewport (bottom-right) + + struct { + float* positions; + float* texcoords; + float* normals; + uint8_t* colors; + } array; + + sw_vertex_t vertexBuffer[4]; // Buffer used for storing primitive vertices, used for processing and rendering + int vertexCounter; // Number of vertices in 'ctx.vertexBuffer' + + SWfill fillMode; // Current polygon filling mode (e.g., lines, triangles) + float pointSize; // Rasterized point size + float lineWidth; // Rasterized line width + + sw_matrix_t matProjection; // Projection matrix, user adjustable + sw_matrix_t matTexture; // Texture matrix, user adjustable + sw_matrix_t matModel; // Model matrix, user adjustable (the one used if we push in SW_MODELVIEW mode) + sw_matrix_t matView; // View matrix, user adjustable (the default one used in SW_MODELVIEW mode) + sw_matrix_t matMVP; // Model view projection matrix, calculated and used internally + + sw_matrix_t stackProjection[SW_MAX_PROJECTION_STACK_SIZE]; // Projection matrix stack for push/pop operations + sw_matrix_t stackModelview[SW_MAX_MODELVIEW_STACK_SIZE]; // Modelview matrix stack for push/pop operations + sw_matrix_t stackTexture[SW_MAX_TEXTURE_STACK_SIZE]; // Texture matrix stack for push/pop operations + uint32_t stackProjectionCounter; // Counter for matrix stack operations + uint32_t stackModelviewCounter; // Counter for matrix stack operations + uint32_t stackTextureCounter; // Counter for matrix stack operations + + SWmatrix currentMatrixMode; // Current matrix mode (e.g., sw_MODELVIEW, sw_PROJECTION) + bool modelMatrixUsed; // Flag indicating if the model matrix is used + + SWcull cullFace; // Faces to cull + SWerrcode errCode; // Last error code + + sw_texture_t* loadedTextures; + int loadedTextureCount; + + uint32_t* freeTextureIds; + int freeTextureIdCount; + + } sw_data_t; + + + /* === Global Data === */ + + static sw_data_t RLSW = { 0 }; + + + /* === Helper Functions === */ + + static inline void sw_matrix_id(sw_matrix_t dst) + { + dst[0] = 1, dst[1] = 0, dst[2] = 0, dst[3] = 0; + dst[4] = 0, dst[5] = 1, dst[6] = 0, dst[7] = 0; + dst[8] = 0, dst[9] = 0, dst[10] = 1, dst[11] = 0; + dst[12] = 0, dst[13] = 0, dst[14] = 0, dst[15] = 1; + } + + static inline void sw_matrix_mul(sw_matrix_t dst, const sw_matrix_t left, const sw_matrix_t right) + { + sw_matrix_t result; + for (int i = 0; i < 4; i++) { + for (int j = 0; j < 4; j++) { + float sum = 0.0; + for (int k = 0; k < 4; k++) { + sum += left[i * 4 + k] * right[k * 4 + j]; + } + result[i * 4 + j] = sum; + } + } + for (int i = 0; i < 16; i++) { + dst[i] = result[i]; + } + } + + static inline void sw_vec4_transform(float dst[4], const float v[4], const sw_matrix_t mat) + { + float tmp[4] = { + mat[0] * v[0] + mat[4] * v[1] + mat[8] * v[2] + mat[12] * v[3], + mat[1] * v[0] + mat[5] * v[1] + mat[9] * v[2] + mat[13] * v[3], + mat[2] * v[0] + mat[6] * v[1] + mat[10] * v[2] + mat[14] * v[3], + mat[3] * v[0] + mat[7] * v[1] + mat[11] * v[2] + mat[15] * v[3] + }; + + for (int i = 0; i < 4; i++) { + dst[i] = tmp[i]; + } + } + + static inline float sw_lerp(float a, float b, float t) + { + return a + t * (b - a); + } + + static inline sw_vertex_t sw_lerp_vertex(const sw_vertex_t* a, const sw_vertex_t* b, float t) + { + sw_vertex_t result; + for (int i = 0; i < sizeof(sw_vertex_t) / sizeof(float); i++) { + ((float*)&result)[i] = sw_lerp(((float*)a)[i], ((float*)b)[i], t); + } + return result; + } + + static inline uint32_t sw_cvt_hf_ui(uint16_t h) + { + uint32_t s = (uint32_t)(h & 0x8000) << 16; + int32_t em = h & 0x7fff; + + // bias exponent and pad mantissa with 0; 112 is relative exponent bias (127-15) + int32_t r = (em + (112 << 10)) << 13; + + // denormal: flush to zero + r = (em < (1 << 10)) ? 0 : r; + + // infinity/NaN; note that we preserve NaN payload as a byproduct of unifying inf/nan cases + // 112 is an exponent bias fixup; since we already applied it once, applying it twice converts 31 to 255 + r += (em >= (31 << 10)) ? (112 << 23) : 0; + + return s | r; + } + + static inline float sw_cvt_hf(sw_half_t y) + { + union { float f; uint32_t i; } v = { + .i = sw_cvt_hf_ui(y) + }; + return v.f; + } + + static inline void sw_get_pixel_grayscale(float* color, const void* pixels, uint32_t offset) + { + float gray = (float)((uint8_t*)pixels)[offset] / 255; + + color[0] = gray; + color[1] = gray; + color[2] = gray; + color[3] = 1.0f; + } + + static inline void sw_get_pixel_red_16(float* color, const void* pixels, uint32_t offset) + { + float value = sw_cvt_hf(((sw_half_t*)pixels)[offset]); + + color[0] = value; + color[1] = value; + color[2] = value; + color[3] = 1.0f; + } + + static inline void sw_get_pixel_red_32(float* color, const void* pixels, uint32_t offset) + { + float value = ((float*)pixels)[offset]; + + color[0] = value; + color[1] = value; + color[2] = value; + color[3] = 1.0f; + } + + static inline void sw_get_pixel_grayscale_alpha(float* color, const void* pixels, uint32_t offset) + { + float gray = (float)((uint8_t*)pixels)[2 * offset] / 255; + float alpha = (float)((uint8_t*)pixels)[2 * offset + 1] / 255; + + color[0] = gray; + color[1] = gray; + color[2] = gray; + color[3] = alpha; + } + + static inline void sw_get_pixel_rgb_565(float* color, const void* pixels, uint32_t offset) + { + uint16_t pixel = ((uint16_t*)pixels)[offset]; + + color[0] = (float)((pixel & 0xF800) >> 11) / 31; + color[1] = (float)((pixel & 0x7E0) >> 5) / 63; + color[2] = (float)(pixel & 0x1F) / 31; + color[3] = 1.0f; + } + + static inline void sw_get_pixel_rgb_888(float* color, const void* pixels, uint32_t offset) + { + const uint8_t* pixel = (uint8_t*)pixels + 3 * offset; + + color[0] = (float)pixel[0] / 255; + color[1] = (float)pixel[1] / 255; + color[2] = (float)pixel[2] / 255; + color[3] = 1.0f; + } + + static inline void sw_get_pixel_rgb_161616(float* color, const void* pixels, uint32_t offset) + { + const sw_half_t *pixel = (sw_half_t*)pixels + 3 * offset; + + color[0] = sw_cvt_hf(pixel[0]); + color[1] = sw_cvt_hf(pixel[1]); + color[2] = sw_cvt_hf(pixel[2]); + color[3] = 1.0f; + } + + static inline void sw_get_pixel_rgb_323232(float* color, const void* pixels, uint32_t offset) + { + const float *pixel = (float*)pixels + 3 * offset; + + color[0] = pixel[0]; + color[1] = pixel[1]; + color[2] = pixel[2]; + color[3] = 1.0f; + } + + static inline void sw_get_pixel_rgba_5551(float* color, const void* pixels, uint32_t offset) + { + uint16_t pixel = ((uint16_t*)pixels)[offset]; + + color[0] = (float)((pixel & 0xF800) >> 11) / 31; + color[1] = (float)((pixel & 0x7C0) >> 6) / 31; + color[2] = (float)((pixel & 0x3E) >> 1) / 31; + color[3] = (float)(pixel & 0x1); + } + + static inline void sw_get_pixel_rgba_4444(float* color, const void* pixels, uint32_t offset) + { + uint16_t pixel = ((uint16_t*)pixels)[offset]; + + color[0] = (float)((pixel & 0xF000) >> 12) / 15; + color[1] = (float)((pixel & 0xF00) >> 8) / 15; + color[2] = (float)((pixel & 0xF0) >> 4) / 15; + color[3] = (float)(pixel & 0xF) / 15; + } + + static inline void sw_get_pixel_rgba_8888(float* color, const void* pixels, uint32_t offset) + { + const uint8_t *pixel = (uint8_t*)pixels + 4 * offset; + + color[0] = (float)pixel[0] / 255; + color[1] = (float)pixel[1] / 255; + color[2] = (float)pixel[2] / 255; + color[3] = (float)pixel[3] / 255; + } + + static inline void sw_get_pixel_rgba_16161616(float* color, const void* pixels, uint32_t offset) + { + const sw_half_t *pixel = (sw_half_t*)pixels + 4 * offset; + + color[0] = sw_cvt_hf(pixel[0]); + color[1] = sw_cvt_hf(pixel[1]); + color[2] = sw_cvt_hf(pixel[2]); + color[3] = sw_cvt_hf(pixel[3]); + } + + static inline void sw_get_pixel_rgba_32323232(float* color, const void* pixels, uint32_t offset) + { + const float *pixel = (float*)pixels + 4 * offset; + + color[0] = pixel[0]; + color[1] = pixel[1]; + color[2] = pixel[2]; + color[3] = pixel[3]; + } + + static inline void sw_get_pixel(float* color, const void* pixels, uint32_t offset, SWpixelformat format) + { + switch (format) { + + case SW_PIXELFORMAT_UNCOMPRESSED_GRAYSCALE: + sw_get_pixel_grayscale(color, pixels, offset); + break; + + case SW_PIXELFORMAT_UNCOMPRESSED_GRAY_ALPHA: + sw_get_pixel_grayscale_alpha(color, pixels, offset); + break; + + case SW_PIXELFORMAT_UNCOMPRESSED_R5G6B5: + sw_get_pixel_rgb_565(color, pixels, offset); + break; + + case SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8: + sw_get_pixel_rgb_888(color, pixels, offset); + break; + + case SW_PIXELFORMAT_UNCOMPRESSED_R5G5B5A1: + sw_get_pixel_rgba_5551(color, pixels, offset); + break; + + case SW_PIXELFORMAT_UNCOMPRESSED_R4G4B4A4: + sw_get_pixel_rgba_4444(color, pixels, offset); + break; + + case SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8A8: + sw_get_pixel_rgba_8888(color, pixels, offset); + break; + + case SW_PIXELFORMAT_UNCOMPRESSED_R32: + sw_get_pixel_red_32(color, pixels, offset); + break; + + case SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32: + sw_get_pixel_rgb_323232(color, pixels, offset); + break; + + case SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32A32: + sw_get_pixel_rgba_32323232(color, pixels, offset); + break; + + case SW_PIXELFORMAT_UNCOMPRESSED_R16: + sw_get_pixel_red_16(color, pixels, offset); + break; + + case SW_PIXELFORMAT_UNCOMPRESSED_R16G16B16: + sw_get_pixel_rgb_161616(color, pixels, offset); + break; + + case SW_PIXELFORMAT_UNCOMPRESSED_R16G16B16A16: + sw_get_pixel_rgba_16161616(color, pixels, offset); + break; + + case SW_PIXELFORMAT_COMPRESSED_DXT1_RGB: + case SW_PIXELFORMAT_COMPRESSED_DXT1_RGBA: + case SW_PIXELFORMAT_COMPRESSED_DXT3_RGBA: + case SW_PIXELFORMAT_COMPRESSED_DXT5_RGBA: + case SW_PIXELFORMAT_COMPRESSED_ETC1_RGB: + case SW_PIXELFORMAT_COMPRESSED_ETC2_RGB: + case SW_PIXELFORMAT_COMPRESSED_ETC2_EAC_RGBA: + case SW_PIXELFORMAT_COMPRESSED_PVRT_RGB: + case SW_PIXELFORMAT_COMPRESSED_PVRT_RGBA: + case SW_PIXELFORMAT_COMPRESSED_ASTC_4x4_RGBA: + case SW_PIXELFORMAT_COMPRESSED_ASTC_8x8_RGBA: + break; + + } + } + + static inline void sw_map_repeat(int* out, float in, int max) + { + // Upscale to nearest texture coordinates + // NOTE: We use '(int)(x+0.5)' although this is incorrect + // regarding the direction of rounding in case of negative values + // and also less accurate than roundf, but it remains so much more + // efficient that it is preferable for now to opt for this option. + + *out = abs((int)((in - (int)in) * (max - 1) + 0.5f)); + } + + static inline void sw_map_clamp_to_edge(int* out, float in, int max) + { + in = (in > 1.0f) ? 1.0f : ((in < 0.0f) ? 0.0f : in); + *out = (int)(in * (max - 1) + 0.5f); + } + + static inline void sw_map_mirrored_repeat(int* out, float in, int max) + { + in = fmodf(fabsf(in), 2); + if (in > 1.0f) in = 1.0f - (in - 1.0f); + *out = (int)(in * (max - 1) + 0.5f); + } + + static inline void sw_map(int* out, float in, int max, SWwrap mode) + { + switch (mode) { + case SW_REPEAT: + sw_map_repeat(out, in, max); + break; + case SW_CLAMP_TO_EDGE: + sw_map_clamp_to_edge(out, in, max); + break; + case SW_MIRRORED_REPEAT: + sw_map_mirrored_repeat(out, in, max); + break; + } + } + + static inline void sw_sample_texture_nearest(float* color, const sw_texture_t* tex, float u, float v) + { + int x, y; + sw_map(&x, u, tex->width, tex->sWrap); + sw_map(&y, v, tex->height, tex->tWrap); + sw_get_pixel(color, tex->pixels, y * tex->width + x, tex->format); + } + + static inline void sw_sample_texture_bilinear(float* color, const sw_texture_t* tex, float u, float v) + { + int x0, y0, x1, y1; + sw_map(&x0, u, tex->width, tex->sWrap); + sw_map(&y0, v, tex->height, tex->tWrap); + sw_map(&x1, u + tex->tx, tex->width, tex->sWrap); + sw_map(&y1, v + tex->ty, tex->height, tex->tWrap); + + float fx = u * (tex->width - 1) - x0; + float fy = v * (tex->height - 1) - y0; + + float c00[4], c10[4], c01[4], c11[4]; + sw_get_pixel(c00, tex->pixels, y0 * tex->width + x0, tex->format); + sw_get_pixel(c10, tex->pixels, y0 * tex->width + x1, tex->format); + sw_get_pixel(c01, tex->pixels, y1 * tex->width + x0, tex->format); + sw_get_pixel(c11, tex->pixels, y1 * tex->width + x1, tex->format); + + float c0[4], c1[4]; + for (int i = 0; i < 4; i++) { + float a = sw_lerp(c00[i], c10[i], fx); + float b = sw_lerp(c01[i], c11[i], fx); + color[i] = sw_lerp(a, b, fy); + } + } + + static inline void sw_sample_texture(float* color, const sw_texture_t* tex, float u, float v, + float xDu, float yDu, float xDv, float yDv) + { + // TODO: It seems there are some incorrect detections depending on the context + // This is probably due to the fact that the fractions are obtained + // at the wrong moment during rasterization. It would be worth reviewing + // this, although the scanline method complicates things. + + // Calculate the derivatives for each axis + float du = sqrtf(xDu * xDu + yDu * yDu); + float dv = sqrtf(xDv * xDv + yDv * yDv); + float L = (du > dv) ? du : dv; + + // Select the filter based on the size of the footprint + if (L > 1.0f) { + // Minification + if (tex->minFilter == SW_NEAREST) { + sw_sample_texture_nearest(color, tex, u, v); + } else if (tex->minFilter == SW_LINEAR) { + sw_sample_texture_bilinear(color, tex, u, v); + } + } else { + // Magnification + if (tex->magFilter == SW_NEAREST) { + sw_sample_texture_nearest(color, tex, u, v); + } else if (tex->magFilter == SW_LINEAR) { + sw_sample_texture_bilinear(color, tex, u, v); + } + } + } + + static inline bool sw_clip_polygon_w(sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON_VERTICES], int* vertexCounter) + { + sw_vertex_t input[SW_MAX_CLIPPED_POLYGON_VERTICES]; + for (int i = 0; i < SW_MAX_CLIPPED_POLYGON_VERTICES; i++) { + input[i] = polygon[i]; + } + + int inputCounter = *vertexCounter; + *vertexCounter = 0; + + const sw_vertex_t *prevVt = &input[inputCounter-1]; + char prevDot = (prevVt->homogeneous[3] < SW_CLIP_EPSILON) ? -1 : 1; + + for (int i = 0; i < inputCounter; i++) { + char currDot = (input[i].homogeneous[3] < SW_CLIP_EPSILON) ? -1 : 1; + if (prevDot*currDot < 0) { + polygon[(*vertexCounter)++] = sw_lerp_vertex(prevVt, &input[i], + (SW_CLIP_EPSILON - prevVt->homogeneous[3]) / (input[i].homogeneous[3] - prevVt->homogeneous[3])); + } + if (currDot > 0) { + polygon[(*vertexCounter)++] = input[i]; + } + prevDot = currDot; + prevVt = &input[i]; + } + + return *vertexCounter > 0; + } + + static inline bool sw_clip_polygon_xyz(sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON_VERTICES], int* vertexCounter) + { + for (int iAxis = 0; iAxis < 3; iAxis++) + { + if (*vertexCounter == 0) return false; + + sw_vertex_t input[SW_MAX_CLIPPED_POLYGON_VERTICES]; + int inputCounter; + + const sw_vertex_t *prevVt; + char prevDot; + + // Clip against first plane + + for (int i = 0; i < SW_MAX_CLIPPED_POLYGON_VERTICES; i++) { + input[i] = polygon[i]; + } + inputCounter = *vertexCounter; + *vertexCounter = 0; + + prevVt = &input[inputCounter-1]; + prevDot = (prevVt->homogeneous[iAxis] <= prevVt->homogeneous[3]) ? 1 : -1; + + for (int i = 0; i < inputCounter; i++) { + char currDot = (input[i].homogeneous[iAxis] <= input[i].homogeneous[3]) ? 1 : -1; + if (prevDot * currDot <= 0) { + polygon[(*vertexCounter)++] = sw_lerp_vertex(prevVt, &input[i], (prevVt->homogeneous[3] - prevVt->homogeneous[iAxis]) / + ((prevVt->homogeneous[3] - prevVt->homogeneous[iAxis]) - (input[i].homogeneous[3] - input[i].homogeneous[iAxis]))); + } + if (currDot > 0) { + polygon[(*vertexCounter)++] = input[i]; + } + prevDot = currDot; + prevVt = &input[i]; + } + + if (*vertexCounter == 0) return false; + + // Clip against opposite plane + + for (int i = 0; i < SW_MAX_CLIPPED_POLYGON_VERTICES; i++) { + input[i] = polygon[i]; + } + inputCounter = *vertexCounter; + *vertexCounter = 0; + + prevVt = &input[inputCounter-1]; + prevDot = (-prevVt->homogeneous[iAxis] <= prevVt->homogeneous[3]) ? 1 : -1; + + for (int i = 0; i < inputCounter; i++) { + char currDot = (-input[i].homogeneous[iAxis] <= input[i].homogeneous[3]) ? 1 : -1; + if (prevDot*currDot <= 0) { + polygon[(*vertexCounter)++] = sw_lerp_vertex(prevVt, &input[i], (prevVt->homogeneous[3] + prevVt->homogeneous[iAxis]) / + ((prevVt->homogeneous[3] + prevVt->homogeneous[iAxis]) - (input[i].homogeneous[3] + input[i].homogeneous[iAxis]))); + } + if (currDot > 0) { + polygon[(*vertexCounter)++] = input[i]; + } + prevDot = currDot; + prevVt = &input[i]; + } + } + + return *vertexCounter > 0; + } + + void sw_project_and_clip_triangle(sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON_VERTICES], int* vertexCounter) + { + for (int i = 0; i < *vertexCounter; i++) { + sw_vertex_t *v = polygon + i; + for (int j = 0; j < 4; j++) v->homogeneous[j] = v->position[j]; + sw_vec4_transform(v->homogeneous, v->homogeneous, RLSW.matMVP); + } + + if (sw_clip_polygon_w(polygon, vertexCounter) && sw_clip_polygon_xyz(polygon, vertexCounter)) { + for (int i = 0; i < *vertexCounter; i++) { + sw_vertex_t *v = polygon + i; + + // Calculation of the reciprocal of W for normalization + // as well as perspective correct attributes + v->homogeneous[3] = 1.0f / v->homogeneous[3]; + + // Division of XYZ coordinates by weight + v->homogeneous[0] *= v->homogeneous[3]; + v->homogeneous[1] *= v->homogeneous[3]; + v->homogeneous[2] *= v->homogeneous[3]; + + // Division of texture coordinates (perspective correct) + v->texcoord[0] *= v->homogeneous[3]; + v->texcoord[1] *= v->homogeneous[3]; + + // Transform to screen space + v->screen[0] = RLSW.vpPos[0] + (v->homogeneous[0] + 1.0f) * 0.5f * RLSW.vpDim[0]; + v->screen[1] = RLSW.vpPos[1] + (v->homogeneous[1] + 1.0f) * 0.5f * RLSW.vpDim[1]; + } + } + } + + void sw_raster_scanline(const sw_texture_t* tex, const sw_vertex_t* start, const sw_vertex_t* end, float yDu, float yDv) + { + // Calculate the horizontal width and avoid division by zero + float dx = end->screen[0] - start->screen[0]; + if (fabsf(dx) < 1e-4f) return; + + // Convert and center the screen coordinates + int xStart = (int)(start->screen[0] + 0.5f); + int xEnd = (int)(end->screen[0] + 0.5f); + int y = (int)(start->screen[1] + 0.5f); + + // Calculate the initial interpolation parameter and its increment + float dt = 1.0f / dx; + float t = (xStart - start->screen[0]) * dt; + + // Calculate the horizontal gradients for UV coordinates + float xDu = (end->texcoord[0] - start->texcoord[0]) * dt; + float xDv = (end->texcoord[1] - start->texcoord[1]) * dt; + + // Pre-calculate the color differences for interpolation + float dcol[4]; + for (int i = 0; i < 4; i++) { + dcol[i] = end->color[i] - start->color[i]; + } + + // Pre-calculate the differences in Z and W (for depth testing and perspective correction) + float dz = end->homogeneous[2] - start->homogeneous[2]; + float dw = end->homogeneous[3] - start->homogeneous[3]; + + // Initialize the interpolated texture coordinates + float u = start->texcoord[0] + t * xDu; + float v = start->texcoord[1] + t * xDv; + + // Pre-calculate the starting pointer for the color framebuffer row + uint8_t* row_ptr = (uint8_t*)((uint32_t*)RLSW.framebuffer.color + y * RLSW.framebuffer.width); + uint8_t* dst = row_ptr + xStart * 4; + + // Pre-calculate the pointer for the depth buffer row + uint16_t* depth_row = RLSW.framebuffer.depth + y * RLSW.framebuffer.width + xStart; + uint16_t* dptr = depth_row; + + // Scanline rasterization loop + for (int x = xStart; x < xEnd; x++) { + // Interpolate Z and W for depth testing and perspective correction + float w = 1.0f / (start->homogeneous[3] + t * dw); + float z = start->homogeneous[2] + t * dz; + + // Depth testing with direct access to the depth buffer + // TODO: Implement different depth funcs? + float depth = (float)(*dptr) / UINT16_MAX; + if (z > depth) goto discard; + + // Update the depth buffer + *dptr = (uint16_t)(z * UINT16_MAX); + + // Sample the texture + float texColor[4]; + sw_sample_texture(texColor, tex, u * w, v * w, xDu, yDu, xDv, yDv); + + // Interpolate the color and modulate by the texture color + for (int i = 0; i < 4; i++) { + float lerp = start->color[i] + t * dcol[i]; + float finalColor = texColor[i] * lerp; + // Inline clamp to keep the value between 0 and 1 + // NOTE: The need for clamp, the colors could be a sign of problem during interpolation (?) + finalColor = (finalColor < 0.0f) ? 0.0f : (finalColor > 1.0f ? 1.0f : finalColor); + dst[i] = (uint8_t)(finalColor * 255.0f); + } + + // Increment the interpolation parameter, UVs, and pointers + discard: + t += dt; + u += xDu; + v += xDv; + dst += 4; + dptr++; + } + } + + void sw_raster_triangle(const sw_vertex_t* v0, const sw_vertex_t* v1, const sw_vertex_t* v2, const sw_texture_t* tex) + { + // Swap vertices by increasing y + if (v0->screen[1] > v1->screen[1]) { const sw_vertex_t* tmp = v0; v0 = v1; v1 = tmp; } + if (v1->screen[1] > v2->screen[1]) { const sw_vertex_t* tmp = v1; v1 = v2; v2 = tmp; } + if (v0->screen[1] > v1->screen[1]) { const sw_vertex_t* tmp = v0; v0 = v1; v1 = tmp; } + + // Extracting coordinates from the sorted vertices + float x0 = v0->screen[0], y0 = v0->screen[1]; + float x1 = v1->screen[0], y1 = v1->screen[1]; + float x2 = v2->screen[0], y2 = v2->screen[1]; + + // Reject degenerate triangles + float height = y2 - y0; + if (height < 1e-4f) return; + + // Global calculation of vertical texture gradients for the triangle + float yDu = (v2->texcoord[0] - v0->texcoord[0]) / height; + float yDv = (v2->texcoord[1] - v0->texcoord[1]) / height; + + // Precompute the inverse of the triangle height and + // edge lengths with checks to avoid division by zero. + float inv_height = 1.0f / height; + float inv_y1y0 = (y1 - y0 > 1e-4f) ? 1.0f / (y1 - y0) : 0.0f; + float inv_y2y1 = (y2 - y1 > 1e-4f) ? 1.0f / (y2 - y1) : 0.0f; + + // Pre-calculation of slopes (dx/dy) + float dx02 = (x2 - x0) * inv_height; + float dx01 = (x1 - x0) * inv_y1y0; + float dx12 = (x2 - x1) * inv_y2y1; + + // Y bounds (vertical clipping) + int yTop = (int)(y0 + 0.5f); + int yMiddle = (int)(y1 + 0.5f); + int yBottom = (int)(y2 + 0.5f); + + // Initializing scanline variables + float xLeft = x0, xRight = x0; + sw_vertex_t start, end; + + // Scanline for the upper part of the triangle + for (int y = yTop; y < yMiddle; y++) { + float dy = (float)y - y0; + float t1 = dy * inv_height; + float t2 = dy * inv_y1y0; + + // Optimized interpolation + start = sw_lerp_vertex(v0, v2, t1); + end = sw_lerp_vertex(v0, v1, t2); + start.screen[0] = xLeft; + start.screen[1] = (float)y; + end.screen[0] = xRight; + end.screen[1] = (float)y; + + if (xLeft > xRight) { sw_vertex_t tmp = start; start = end; end = tmp; } + sw_raster_scanline(tex, &start, &end, yDu, yDv); + + // Incremental update + xLeft += dx02; + xRight += dx01; + } + + // Scanline for the lower part of the triangle + xRight = x1; // Restart the right side from the second vertex + for (int y = yMiddle; y < yBottom; y++) { + float dy = (float)y - y0; + float t1 = dy * inv_height; + float t2 = (float)(y - y1) * inv_y2y1; + + // Optimized interpolation + start = sw_lerp_vertex(v0, v2, t1); + end = sw_lerp_vertex(v1, v2, t2); + start.screen[0] = xLeft; + start.screen[1] = (float)y; + end.screen[0] = xRight; + end.screen[1] = (float)y; + + if (xLeft > xRight) { sw_vertex_t tmp = start; start = end; end = tmp; } + sw_raster_scanline(tex, &start, &end, yDu, yDv); + + // Incremental update + xLeft += dx02; + xRight += dx12; + } + } + + void sw_render_triangle(const sw_vertex_t* v0, const sw_vertex_t* v1, const sw_vertex_t* v2) + { + int vertexCounter = 3; + + sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON_VERTICES]; + polygon[0] = *v0; + polygon[1] = *v1; + polygon[2] = *v2; + + sw_project_and_clip_triangle(polygon, &vertexCounter); + + if (vertexCounter < 3) { + return; + } + + for (int_fast8_t i = 0; i < vertexCounter - 2; i++) { + sw_raster_triangle( + &polygon[0], &polygon[i + 1], &polygon[i + 2], + &RLSW.loadedTextures[RLSW.currentTexture] + ); + } + } + + static inline bool sw_is_texture_id_valid(uint32_t id) + { + bool valid = true; + + if (id == 0) valid = false; + else if (id >= SW_MAX_TEXTURES) valid = false; + else if (RLSW.loadedTextures[id].pixels == 0) valid = false; + + return true; + } + + static inline bool sw_is_texture_filter_valid(int filter) + { + return (filter == SW_NEAREST || filter == SW_LINEAR); + } + + static inline bool sw_is_texture_wrap_valid(int wrap) + { + return (wrap == SW_REPEAT || wrap == SW_CLAMP_TO_EDGE || SW_MIRRORED_REPEAT); + } + + /* === Public Implementation === */ + + void swInit(int w, int h) + { + swViewport(0, 0, w, h); + + RLSW.framebuffer.color = SW_MALLOC(4 * w * h); + RLSW.framebuffer.depth = SW_MALLOC(2 * w * h); + + RLSW.framebuffer.width = w; + RLSW.framebuffer.height = h; + + RLSW.loadedTextures = SW_MALLOC(SW_MAX_TEXTURES); + RLSW.freeTextureIds = SW_MALLOC(SW_MAX_TEXTURES); + + RLSW.clearColor[0] = 0; + RLSW.clearColor[1] = 0; + RLSW.clearColor[2] = 0; + RLSW.clearColor[3] = 255; + RLSW.clearDepth = UINT16_MAX; + + RLSW.currentMatrixMode = SW_MODELVIEW; + RLSW.currentMatrix = &RLSW.matView; + + sw_matrix_id(RLSW.matProjection); + sw_matrix_id(RLSW.matTexture); + sw_matrix_id(RLSW.matModel); + sw_matrix_id(RLSW.matView); + + RLSW.vertexBuffer[0].color[0] = 1.0f; + RLSW.vertexBuffer[0].color[1] = 1.0f; + RLSW.vertexBuffer[0].color[2] = 1.0f; + RLSW.vertexBuffer[0].color[3] = 1.0f; + + RLSW.vertexBuffer[0].texcoord[0] = 0.0f; + RLSW.vertexBuffer[0].texcoord[1] = 0.0f; + + RLSW.vertexBuffer[0].normal[0] = 0.0f; + RLSW.vertexBuffer[0].normal[1] = 0.0f; + RLSW.vertexBuffer[0].normal[2] = 1.0f; + + static const float defTex[3*2*2] = + { + 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, + }; + + RLSW.loadedTextures[0].pixels = defTex; + RLSW.loadedTextures[0].width = 2; + RLSW.loadedTextures[0].height = 2; + RLSW.loadedTextures[0].format = SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32; + RLSW.loadedTextures[0].minFilter = SW_NEAREST; + RLSW.loadedTextures[0].magFilter = SW_NEAREST; + RLSW.loadedTextures[0].sWrap = SW_REPEAT; + RLSW.loadedTextures[0].tWrap = SW_REPEAT; + RLSW.loadedTextures[0].tx = 0.5f; + RLSW.loadedTextures[0].ty = 0.5f; + + RLSW.loadedTextureCount = 1; + } + + void swClose(void) + { + SW_FREE(RLSW.framebuffer.color); + SW_FREE(RLSW.framebuffer.depth); + + SW_FREE(RLSW.loadedTextures); + SW_FREE(RLSW.freeTextureIds); + } + + void* swGetColorBuffer(int* w, int* h) + { + if (w) *w = RLSW.framebuffer.width; + if (h) *h = RLSW.framebuffer.height; + + return RLSW.framebuffer.color; + } + + void swMatrixMode(SWmatrix mode) + { + switch (mode) { + case SW_PROJECTION: + RLSW.currentMatrix = &RLSW.matProjection; + break; + case SW_MODELVIEW: + RLSW.currentMatrix = RLSW.modelMatrixUsed + ? &RLSW.matModel : &RLSW.matView; + break; + case SW_TEXTURE: + RLSW.currentMatrix = &RLSW.matTexture; + break; + default: + RLSW.errCode = SW_INVALID_ENUM; + return; + } + + RLSW.currentMatrixMode = mode; + } + + void swPushMatrix(void) + { + switch (RLSW.currentMatrixMode) { + + case SW_PROJECTION: + if (RLSW.stackProjectionCounter >= SW_MAX_PROJECTION_STACK_SIZE) { + RLSW.errCode = SW_STACK_OVERFLOW; + return; + } + for (int i = 0; i < 16; i++) { + RLSW.stackProjection[RLSW.stackProjectionCounter][i] = RLSW.matProjection[i]; + } + RLSW.stackProjectionCounter++; + break; + + case SW_MODELVIEW: + if (RLSW.stackModelviewCounter >= SW_MAX_MODELVIEW_STACK_SIZE) { + RLSW.errCode = SW_STACK_OVERFLOW; + return; + } + if (RLSW.modelMatrixUsed) { + for (int i = 0; i < 16; i++) { + RLSW.stackModelview[RLSW.stackModelviewCounter][i] = RLSW.matModel[i]; + } + RLSW.stackModelviewCounter++; + } else { + RLSW.currentMatrix = &RLSW.matModel; + RLSW.modelMatrixUsed = true; + } + break; + + case SW_TEXTURE: + if (RLSW.stackTextureCounter >= SW_MAX_TEXTURE_STACK_SIZE) { + RLSW.errCode = SW_STACK_OVERFLOW; + return; + } + for (int i = 0; i < 16; i++) { + RLSW.stackTexture[RLSW.stackTextureCounter][i] = RLSW.matTexture[i]; + } + RLSW.stackTextureCounter++; + break; + + } + } + + void swPopMatrix(void) + { + switch (RLSW.currentMatrixMode) { + + case SW_PROJECTION: + if (RLSW.stackProjectionCounter <= 0) { + RLSW.errCode = SW_STACK_UNDERFLOW; + return; + } + RLSW.stackProjectionCounter--; + for (int i = 0; i < 16; i++) { + RLSW.matProjection[i] = RLSW.stackProjection[RLSW.stackProjectionCounter][i]; + } + break; + + case SW_MODELVIEW: + if (RLSW.stackModelviewCounter == 0) { + if (!RLSW.modelMatrixUsed) { + RLSW.errCode = SW_STACK_UNDERFLOW; + return; + } + sw_matrix_id(RLSW.matModel); + RLSW.currentMatrix = &RLSW.matView; + RLSW.modelMatrixUsed = false; + } else { + RLSW.stackModelviewCounter--; + for (int i = 0; i < 16; i++) { + RLSW.matModel[i] = RLSW.stackModelview[RLSW.stackModelviewCounter][i]; + } + } + break; + + case SW_TEXTURE: + if (RLSW.stackTextureCounter <= 0) { + RLSW.errCode = SW_STACK_UNDERFLOW; + return; + } + RLSW.stackTextureCounter--; + for (int i = 0; i < 16; i++) { + RLSW.matTexture[i] = RLSW.stackTexture[RLSW.stackTextureCounter][i]; + } + break; + + } + } + + void swLoadIdentity(void) + { + sw_matrix_id(*RLSW.currentMatrix); + } + + void swTranslatef(float x, float y, float z) + { + sw_matrix_t mat; + sw_matrix_id(mat); + + mat[12] = x; + mat[13] = y; + mat[14] = z; + + sw_matrix_mul(*RLSW.currentMatrix, mat, *RLSW.currentMatrix); + } + + void swRotatef(float angle, float x, float y, float z) + { + angle *= SW_DEG2RAD; + + sw_matrix_t mat; + sw_matrix_id(mat); + + float lengthSq = x*x + y*y + z*z; + + if (lengthSq != 1.0f && lengthSq != 0.0f) { + float invLenght = 1.0f / lengthSq; + x *= invLenght; + y *= invLenght; + z *= invLenght; + } + + float sinres = sinf(angle); + float cosres = cosf(angle); + float t = 1.0f - cosres; + + mat[0] = x*x*t + cosres; + mat[1] = y*x*t + z*sinres; + mat[2] = z*x*t - y*sinres; + + mat[4] = x*y*t - z*sinres; + mat[5] = y*y*t + cosres; + mat[6] = z*y*t + x*sinres; + + mat[8] = x*z*t + y*sinres; + mat[9] = y*z*t - x*sinres; + mat[10] = z*z*t + cosres; + + sw_matrix_mul(*RLSW.currentMatrix, mat, *RLSW.currentMatrix); + } + + void swScalef(float x, float y, float z) + { + sw_matrix_t mat; + + mat[0] = x, mat[1] = 0, mat[2] = 0, mat[3] = 0; + mat[4] = 0, mat[5] = y, mat[6] = 0, mat[7] = 0; + mat[8] = 0, mat[9] = 0, mat[10] = z, mat[11] = 0; + mat[12] = 0, mat[13] = 0, mat[14] = 0, mat[15] = 1; + + sw_matrix_mul(*RLSW.currentMatrix, mat, *RLSW.currentMatrix); + } + + void swMultMatrixf(const float* mat) + { + sw_matrix_mul(*RLSW.currentMatrix, *RLSW.currentMatrix, mat); + } + + void swFrustum(float left, float right, float bottom, float top, float znear, float zfar) + { + sw_matrix_t mat = { 0 }; + + float rl = right - left; + float tb = top - bottom; + float fn = zfar - znear; + + mat[0] = (znear * 2.0f) / rl; + mat[5] = (znear * 2.0f) / tb; + + mat[8] = (right + left) / rl; + mat[9] = (top + bottom) / tb; + mat[10] = -(zfar + znear) / fn; + mat[11] = -1.0f; + + mat[14] = -(zfar * znear * 2.0f) / fn; + + sw_matrix_mul(*RLSW.currentMatrix, *RLSW.currentMatrix, mat); + } + + void swOrtho(float left, float right, float bottom, float top, float znear, float zfar) + { + sw_matrix_t mat = { 0 }; + + float rl = (right - left); + float tb = (top - bottom); + float fn = (zfar - znear); + + mat[0] = 2.0f / rl; + mat[5] = 2.0f / tb; + + mat[10] = -2.0f / fn; + mat[11] = 0.0f; + mat[12] = -(left + right) / rl; + mat[13] = -(top + bottom) / tb; + + mat[14] = -(zfar + znear) / fn; + mat[15] = 1.0f; + + sw_matrix_mul(*RLSW.currentMatrix, *RLSW.currentMatrix, mat); + } + + void swViewport(int x, int y, int width, int height) + { + if (x <= -width || y <= -height) { + RLSW.errCode = SW_INVALID_OPERATION; + return; + } + + RLSW.vpPos[0] = x; + RLSW.vpPos[1] = y; + + RLSW.vpDim[0] = width - 1; + RLSW.vpDim[1] = height - 1; + + RLSW.vpMin[0] = (x < 0) ? 0 : x; + RLSW.vpMin[1] = (y < 0) ? 0 : y; + + int fbW = RLSW.framebuffer.width - 1; + int fbH = RLSW.framebuffer.height - 1; + + int vpMaxX = x + width; + int vpMaxY = y + height; + + RLSW.vpMax[0] = (vpMaxX < fbW) ? vpMaxX : fbW; + RLSW.vpMax[1] = (vpMaxY < fbH) ? vpMaxY : fbH; + } + + void swClearColor(float r, float g, float b, float a) + { + RLSW.clearColor[0] = r * 255; + RLSW.clearColor[1] = g * 255; + RLSW.clearColor[2] = b * 255; + RLSW.clearColor[3] = a * 255; + } + + void swClear(void) + { + int size = RLSW.framebuffer.width * RLSW.framebuffer.height; + + for (int i = 0; i < size; i++) { + ((uint32_t*)RLSW.framebuffer.color)[i] = *((uint32_t*)RLSW.clearColor); + RLSW.framebuffer.depth[i] = RLSW.clearDepth; + } + } + + void swBegin(SWfill mode) + { + if (mode < SW_POINTS || mode > SW_QUADS) { + RLSW.errCode = SW_INVALID_ENUM; + return; + } + RLSW.vertexCounter = 0; + RLSW.fillMode = mode; + } + + void swEnd(void) + { + RLSW.vertexCounter = 0; + } + + void swVertex2i(int x, int y) + { + float v[4] = { (float)x, (float)y, 0.0f, 1.0f }; + swVertex4fv(v); + } + + void swVertex2f(float x, float y) + { + float v[4] = { x, y, 0.0f, 1.0f }; + swVertex4fv(v); + } + + void swVertex2fv(const float* v) + { + float v4[4] = { v[0], v[1], 0.0f, 1.0f }; + swVertex4fv(v4); + } + + void swVertex3i(int x, int y, int z) + { + float v[4] = { (float)x, (float)y, (float)z, 1.0f }; + swVertex4fv(v); + } + + void swVertex3f(float x, float y, float z) + { + float v[4] = { x, y, z, 1.0f }; + swVertex4fv(v); + } + + void swVertex3fv(const float* v) + { + float v4[4] = { v[0], v[1], v[2], 1.0f }; + swVertex4fv(v4); + } + + void swVertex4i(int x, int y, int z, int w) + { + float v[4] = { (float)x, (float)y, (float)z, (float)w }; + swVertex4fv(v); + } + + void swVertex4f(float x, float y, float z, float w) + { + float v[4] = { x, y, z, w }; + swVertex4fv(v); + } + + void swVertex4fv(const float* v) + { + for (int i = 0; i < 4; i++) { + RLSW.vertexBuffer[RLSW.vertexCounter].position[i] = v[i]; + } + RLSW.vertexCounter++; + + int neededVertices = 0; + switch (RLSW.fillMode) { + case SW_POINTS: + neededVertices = 1; + break; + case SW_LINES: + neededVertices = 2; + break; + case SW_TRIANGLES: + neededVertices = 3; + break; + case SW_QUADS: + neededVertices = 4; + break; + } + + if (RLSW.vertexCounter == neededVertices) { + + // TODO: Optimize MVP calculation + sw_matrix_mul(RLSW.matMVP, RLSW.matModel, RLSW.matView); + sw_matrix_mul(RLSW.matMVP, RLSW.matMVP, RLSW.matProjection); + + switch (RLSW.fillMode) { + case SW_POINTS: + break; + case SW_LINES: + neededVertices = 2; + break; + case SW_TRIANGLES: + sw_render_triangle( + &RLSW.vertexBuffer[0], + &RLSW.vertexBuffer[1], + &RLSW.vertexBuffer[2] + ); + break; + case SW_QUADS: + sw_render_triangle( + &RLSW.vertexBuffer[0], + &RLSW.vertexBuffer[1], + &RLSW.vertexBuffer[2] + ); + sw_render_triangle( + &RLSW.vertexBuffer[2], + &RLSW.vertexBuffer[3], + &RLSW.vertexBuffer[0] + ); + break; + } + + RLSW.vertexBuffer[0] = RLSW.vertexBuffer[neededVertices - 1]; + RLSW.vertexCounter = 0; + } + else { + RLSW.vertexBuffer[RLSW.vertexCounter] = RLSW.vertexBuffer[RLSW.vertexCounter - 1]; + } + } + + void swColor1ui(uint32_t color) + { + union { + uint32_t v; + uint8_t a[4]; + } c = { .v = color }; + + float cv[4]; + cv[0] = (float)c.a[0] / 255; + cv[1] = (float)c.a[1] / 255; + cv[2] = (float)c.a[2] / 255; + cv[3] = (float)c.a[3] / 255; + + swColor4fv(cv); + } + + void swColor3ub(uint8_t r, uint8_t g, uint8_t b) + { + float cv[4]; + cv[0] = (float)r / 255; + cv[1] = (float)g / 255; + cv[2] = (float)b / 255; + cv[3] = 1.0f; + + swColor4fv(cv); + } + + void swColor3ubv(const uint8_t* v) + { + float cv[4]; + cv[0] = (float)v[0] / 255; + cv[1] = (float)v[1] / 255; + cv[2] = (float)v[2] / 255; + cv[3] = 1.0f; + + swColor4fv(cv); + } + + void swColor3us(uint16_t r, uint16_t g, uint16_t b) + { + float cv[4]; + cv[0] = (float)((uint8_t)(r >> 8)) / 255; + cv[1] = (float)((uint8_t)(g >> 8)) / 255; + cv[2] = (float)((uint8_t)(b >> 8)) / 255; + cv[3] = 1.0f; + + swColor4fv(cv); + } + + void swColor3usv(const uint16_t* v) + { + float cv[4]; + cv[0] = (float)((uint8_t)(v[0] >> 8)) / 255; + cv[1] = (float)((uint8_t)(v[1] >> 8)) / 255; + cv[2] = (float)((uint8_t)(v[2] >> 8)) / 255; + cv[3] = 1.0f; + + swColor4fv(cv); + } + + void swColor3ui(uint32_t r, uint32_t g, uint32_t b) + { + float cv[4]; + cv[0] = (float)((uint8_t)(r >> 24)) / 255; + cv[1] = (float)((uint8_t)(g >> 24)) / 255; + cv[2] = (float)((uint8_t)(b >> 24)) / 255; + cv[3] = 1.0f; + + swColor4fv(cv); + } + + void swColor3uiv(const uint32_t* v) + { + float cv[4]; + cv[0] = (float)((uint8_t)(v[0] >> 24)) / 255; + cv[1] = (float)((uint8_t)(v[1] >> 24)) / 255; + cv[2] = (float)((uint8_t)(v[2] >> 24)) / 255; + cv[3] = 1.0f; + + swColor4fv(cv); + } + + void swColor3f(float r, float g, float b) + { + float cv[4]; + cv[0] = r; + cv[1] = g; + cv[2] = b; + cv[3] = 1.0f; + + swColor4fv(cv); + } + + void swColor3fv(const float* v) + { + float cv[4]; + cv[0] = v[0]; + cv[1] = v[1]; + cv[2] = v[2]; + cv[3] = 1.0f; + + swColor4fv(cv); + } + + void swColor4ub(uint8_t r, uint8_t g, uint8_t b, uint8_t a) + { + float cv[4]; + cv[0] = (float)r / 255; + cv[1] = (float)g / 255; + cv[2] = (float)b / 255; + cv[3] = (float)a / 255; + + swColor4fv(cv); + } + + void swColor4ubv(const uint8_t* v) + { + float cv[4]; + cv[0] = (float)v[0] / 255; + cv[1] = (float)v[1] / 255; + cv[2] = (float)v[2] / 255; + cv[3] = (float)v[3] / 255; + + swColor4fv(cv); + } + + void swColor4us(uint16_t r, uint16_t g, uint16_t b, uint16_t a) + { + float cv[4]; + cv[0] = (float)((uint8_t)(r >> 8)) / 255; + cv[1] = (float)((uint8_t)(g >> 8)) / 255; + cv[2] = (float)((uint8_t)(b >> 8)) / 255; + cv[3] = (float)((uint8_t)(a >> 8)) / 255; + + swColor4fv(cv); + } + + void swColor4usv(const uint16_t* v) + { + float cv[4]; + cv[0] = (float)((uint8_t)(v[0] >> 8)) / 255; + cv[1] = (float)((uint8_t)(v[1] >> 8)) / 255; + cv[2] = (float)((uint8_t)(v[2] >> 8)) / 255; + cv[3] = (float)((uint8_t)(v[3] >> 8)) / 255; + + swColor4fv(cv); + } + + void swColor4ui(uint32_t r, uint32_t g, uint32_t b, uint32_t a) + { + float cv[4]; + cv[0] = (float)((uint8_t)(r >> 24)) / 255; + cv[1] = (float)((uint8_t)(g >> 24)) / 255; + cv[2] = (float)((uint8_t)(b >> 24)) / 255; + cv[3] = (float)((uint8_t)(a >> 24)) / 255; + + swColor4fv(cv); + } + + void swColor4uiv(const uint32_t* v) + { + float cv[4]; + cv[0] = (float)((uint8_t)(v[0] >> 24)) / 255; + cv[1] = (float)((uint8_t)(v[1] >> 24)) / 255; + cv[2] = (float)((uint8_t)(v[2] >> 24)) / 255; + cv[3] = (float)((uint8_t)(v[3] >> 24)) / 255; + + swColor4fv(cv); + } + + void swColor4f(float r, float g, float b, float a) + { + float cv[4]; + cv[0] = r; + cv[1] = g; + cv[2] = b; + cv[3] = a; + + swColor4fv(cv); + } + + void swColor4fv(const float* v) + { + for (int i = 0; i < 4; i++) { + RLSW.vertexBuffer[RLSW.vertexCounter].color[i] = v[i]; + } + } + + void swTexCoord2f(float u, float v) + { + float s = RLSW.matTexture[0]*u + RLSW.matTexture[4]*v + RLSW.matTexture[12]; + float t = RLSW.matTexture[1]*u + RLSW.matTexture[5]*v + RLSW.matTexture[13]; + + RLSW.vertexBuffer[RLSW.vertexCounter].texcoord[0] = s; + RLSW.vertexBuffer[RLSW.vertexCounter].texcoord[1] = t; + } + + void swTexCoordfv(const float* v) + { + float s = RLSW.matTexture[0]*v[0] + RLSW.matTexture[4]*v[1] + RLSW.matTexture[12]; + float t = RLSW.matTexture[1]*v[0] + RLSW.matTexture[5]*v[1] + RLSW.matTexture[13]; + + RLSW.vertexBuffer[RLSW.vertexCounter].texcoord[0] = s; + RLSW.vertexBuffer[RLSW.vertexCounter].texcoord[1] = t; + } + + void swNormal3f(float x, float y, float z) + { + RLSW.vertexBuffer[RLSW.vertexCounter].normal[0] = x; + RLSW.vertexBuffer[RLSW.vertexCounter].normal[1] = y; + RLSW.vertexBuffer[RLSW.vertexCounter].normal[2] = z; + } + + void swNormal3fv(const float* v) + { + RLSW.vertexBuffer[RLSW.vertexCounter].normal[0] = v[0]; + RLSW.vertexBuffer[RLSW.vertexCounter].normal[1] = v[1]; + RLSW.vertexBuffer[RLSW.vertexCounter].normal[2] = v[2]; + } + + void swBindArray(SWarray type, void *buffer) + { + switch (type) { + case SW_VERTEX_ARRAY: + RLSW.array.positions = buffer; + break; + case SW_TEXTURE_COORD_ARRAY: + RLSW.array.texcoords = buffer; + break; + case SW_NORMAL_ARRAY: + RLSW.array.normals = buffer; + break; + case SW_COLOR_ARRAY: + RLSW.array.colors = buffer; + break; + default: + break; + } + } + + void swDrawArrays(SWfill mode, int offset, int count) + { + if (RLSW.array.positions == 0) { + RLSW.errCode = SW_INVALID_OPERATION; + return; + } + + swBegin(mode); + + for (int i = offset; i < count; i++) { + if (RLSW.array.texcoords) { + swTexCoordfv(RLSW.array.texcoords + 2 * i); + } + if (RLSW.array.normals) { + swNormal3fv(RLSW.array.normals + 3 * i); + } + if (RLSW.array.colors) { + swColor4ubv(RLSW.array.colors + 4 * i); + } + swVertex3fv(RLSW.array.positions + 3 * i); + } + + swEnd(); + } + + uint32_t swLoadTexture(const void *data, int width, int height, int format, int mipmapCount) + { + if (RLSW.loadedTextureCount >= SW_MAX_TEXTURES) { + RLSW.errCode = SW_ERROR_OUT_OF_MEMORY; + return 0; + } + + sw_texture_t texture = { 0 }; + texture.pixels = data; + texture.width = width; + texture.height = height; + texture.format = format; + texture.minFilter = SW_NEAREST; + texture.magFilter = SW_NEAREST; + texture.sWrap = SW_REPEAT; + texture.tWrap = SW_REPEAT; + texture.tx = 1.0f / width; + texture.ty = 1.0f / height; + (void)mipmapCount; + + uint32_t id = 0; + if (RLSW.freeTextureIdCount > 0) { + id = RLSW.freeTextureIds[--RLSW.freeTextureIdCount]; + } + else { + id = RLSW.loadedTextureCount++; + } + + RLSW.loadedTextures[id] = texture; + + return id; + } + + void swUnloadTexture(uint32_t id) + { + if (!sw_is_texture_id_valid(id)) { + RLSW.errCode = SW_INVALID_VALUE; + return; + } + + RLSW.loadedTextures[id].pixels = 0; + RLSW.freeTextureIds[RLSW.freeTextureIdCount++] = id; + } + + void swTextureParameters(uint32_t id, int param, int value) + { + if (!sw_is_texture_id_valid(id)) { + RLSW.errCode = SW_INVALID_VALUE; + return; + } + + sw_texture_t* texture = &RLSW.loadedTextures[id]; + + switch (param) { + + case SW_TEXTURE_MIN_FILTER: + if (!sw_is_texture_filter_valid(value)) { + RLSW.errCode = SW_INVALID_ENUM; + return; + } + texture->minFilter = value; + break; + + case SW_TEXTURE_MAG_FILTER: + if (!sw_is_texture_filter_valid(value)) { + RLSW.errCode = SW_INVALID_ENUM; + return; + } + texture->magFilter = value; + break; + + case SW_TEXTURE_WRAP_S: + if (!sw_is_texture_wrap_valid(value)) { + RLSW.errCode = SW_INVALID_ENUM; + return; + } + texture->sWrap = value; + break; + + case SW_TEXTURE_WRAP_T: + if (!sw_is_texture_wrap_valid(value)) { + RLSW.errCode = SW_INVALID_ENUM; + return; + } + texture->tWrap = value; + break; + + default: + RLSW.errCode = SW_INVALID_ENUM; + return; + + } + } + + void swBindTexture(uint32_t id) + { + if (id >= SW_MAX_TEXTURES) { + RLSW.errCode = SW_INVALID_VALUE; + return; + } + + if (id > 0 && RLSW.loadedTextures[id].pixels == 0) { + RLSW.errCode = SW_INVALID_OPERATION; + return; + } + + RLSW.currentTexture = id; + } + + #endif // RLSW_IMPL + \ No newline at end of file From 8aed39ff49e9f832cbc04cf15525a4ef0f78a61e Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Wed, 12 Mar 2025 16:35:34 +0100 Subject: [PATCH 02/43] implement state support Also replace the triangle rasterization functions with macros that generate specific functions for each state of the rendering system. Also, add the OpenGL definitions in order to add a binding for rlgl. --- src/external/rlsw.h | 3943 ++++++++++++++++++++++--------------------- 1 file changed, 2058 insertions(+), 1885 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 55205d057..6af7fcf84 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -22,1888 +22,2061 @@ * SOFTWARE. */ - #ifndef RLSW_H - #define RLSW_H - - #include - #include - - #ifndef SW_MALLOC - # define SW_MALLOC(sz) malloc(sz) - #endif - - #ifndef SW_FREE - # define SW_FREE(ptr) free(ptr) - #endif - - #ifndef SW_MAX_PROJECTION_STACK_SIZE - # define SW_MAX_PROJECTION_STACK_SIZE 2 - #endif - - #ifndef SW_MAX_MODELVIEW_STACK_SIZE - # define SW_MAX_MODELVIEW_STACK_SIZE 8 - #endif - - #ifndef SW_MAX_TEXTURE_STACK_SIZE - # define SW_MAX_TEXTURE_STACK_SIZE 4 - #endif - - #ifndef SW_MAX_TEXTURES - # define SW_MAX_TEXTURES 128 - #endif - - #ifndef SW_MAX_CLIPPED_POLYGON_VERTICES - # define SW_MAX_CLIPPED_POLYGON_VERTICES 12 - #endif - - #ifndef SW_CLIP_EPSILON - # define SW_CLIP_EPSILON 1e-4f - #endif - - typedef enum { - SW_PROJECTION, - SW_MODELVIEW, - SW_TEXTURE - } SWmatrix; - - typedef enum { - SW_VERTEX_ARRAY, - SW_TEXTURE_COORD_ARRAY, - SW_NORMAL_ARRAY, - SW_COLOR_ARRAY - } SWarray; - - typedef enum { - SW_POINTS, - SW_LINES, - SW_TRIANGLES, - SW_QUADS, - } SWfill; - - typedef enum { - SW_CULL_FRONT, - SW_CULL_BACK, - } SWcull; - - typedef enum { - SW_PIXELFORMAT_UNCOMPRESSED_GRAYSCALE = 1, // 8 bit per pixel (no alpha) - SW_PIXELFORMAT_UNCOMPRESSED_GRAY_ALPHA, // 8*2 bpp (2 channels) - SW_PIXELFORMAT_UNCOMPRESSED_R5G6B5, // 16 bpp - SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8, // 24 bpp - SW_PIXELFORMAT_UNCOMPRESSED_R5G5B5A1, // 16 bpp (1 bit alpha) - SW_PIXELFORMAT_UNCOMPRESSED_R4G4B4A4, // 16 bpp (4 bit alpha) - SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8A8, // 32 bpp - SW_PIXELFORMAT_UNCOMPRESSED_R32, // 32 bpp (1 channel - float) - SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32, // 32*3 bpp (3 channels - float) - SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32A32, // 32*4 bpp (4 channels - float) - SW_PIXELFORMAT_UNCOMPRESSED_R16, // 16 bpp (1 channel - half float) - SW_PIXELFORMAT_UNCOMPRESSED_R16G16B16, // 16*3 bpp (3 channels - half float) - SW_PIXELFORMAT_UNCOMPRESSED_R16G16B16A16, // 16*4 bpp (4 channels - half float) - SW_PIXELFORMAT_COMPRESSED_DXT1_RGB, // 4 bpp (no alpha) - SW_PIXELFORMAT_COMPRESSED_DXT1_RGBA, // 4 bpp (1 bit alpha) - SW_PIXELFORMAT_COMPRESSED_DXT3_RGBA, // 8 bpp - SW_PIXELFORMAT_COMPRESSED_DXT5_RGBA, // 8 bpp - SW_PIXELFORMAT_COMPRESSED_ETC1_RGB, // 4 bpp - SW_PIXELFORMAT_COMPRESSED_ETC2_RGB, // 4 bpp - SW_PIXELFORMAT_COMPRESSED_ETC2_EAC_RGBA, // 8 bpp - SW_PIXELFORMAT_COMPRESSED_PVRT_RGB, // 4 bpp - SW_PIXELFORMAT_COMPRESSED_PVRT_RGBA, // 4 bpp - SW_PIXELFORMAT_COMPRESSED_ASTC_4x4_RGBA, // 8 bpp - SW_PIXELFORMAT_COMPRESSED_ASTC_8x8_RGBA // 2 bpp - } SWpixelformat; - - typedef enum { - SW_NEAREST, - SW_LINEAR, - SW_NEAREST_MIPMAP_NEAREST, - SW_NEAREST_MIPMAP_LINEAR, - SW_LINEAR_MIPMAP_NEAREST, - SW_LINEAR_MIPMAP_LINEAR - } SWfilter; - - typedef enum { - SW_REPEAT, - SW_CLAMP_TO_EDGE, - SW_MIRRORED_REPEAT - } SWwrap; - - typedef enum { - SW_TEXTURE_MIN_FILTER, - SW_TEXTURE_MAG_FILTER, - SW_TEXTURE_WRAP_S, - SW_TEXTURE_WRAP_T - } SWtexparam; - - typedef enum { - SW_NO_ERROR, - SW_INVALID_ENUM, - SW_INVALID_VALUE, - SW_STACK_OVERFLOW, - SW_STACK_UNDERFLOW, - SW_INVALID_OPERATION, - SW_ERROR_OUT_OF_MEMORY - } SWerrcode; - - /* === Public API === */ - - void swInit(int w, int h); - void swClose(void); - - void* swGetColorBuffer(int* w, int* h); - - void swMatrixMode(SWmatrix mode); - void swPushMatrix(void); - void swPopMatrix(void); - void swLoadIdentity(void); - void swTranslatef(float x, float y, float z); - void swRotatef(float angle, float x, float y, float z); - void swScalef(float x, float y, float z); - void swMultMatrixf(const float* mat); - void swFrustum(float left, float right, float bottom, float top, float znear, float zfar); - void swOrtho(float left, float right, float bottom, float top, float znear, float zfar); - - void swViewport(int x, int y, int width, int height); - - void swClearColor(float r, float g, float b, float a); - void swClear(void); - - void swBegin(SWfill mode); - void swEnd(void); - - void swVertex2i(int x, int y); - void swVertex2f(float x, float y); - void swVertex2fv(const float* v); - void swVertex3i(int x, int y, int z); - void swVertex3f(float x, float y, float z); - void swVertex3fv(const float* v); - void swVertex4i(int x, int y, int z, int w); - void swVertex4f(float x, float y, float z, float w); - void swVertex4fv(const float* v); - - void swColor1ui(uint32_t color); - void swColor3ub(uint8_t r, uint8_t g, uint8_t b); - void swColor3ubv(const uint8_t* v); - void swColor3us(uint16_t r, uint16_t g, uint16_t b); - void swColor3usv(const uint16_t* v); - void swColor3ui(uint32_t r, uint32_t g, uint32_t b); - void swColor3uiv(const uint32_t* v); - void swColor3f(float r, float g, float b); - void swColor3fv(const float* v); - void swColor4ub(uint8_t r, uint8_t g, uint8_t b, uint8_t a); - void swColor4ubv(const uint8_t* v); - void swColor4us(uint16_t r, uint16_t g, uint16_t b, uint16_t a); - void swColor4usv(const uint16_t* v); - void swColor4ui(uint32_t r, uint32_t g, uint32_t b, uint32_t a); - void swColor4uiv(const uint32_t* v); - void swColor4f(float r, float g, float b, float a); - void swColor4fv(const float* v); - - void swTexCoord2f(float u, float v); - void swTexCoordfv(const float* v); - - void swNormal3f(float x, float y, float z); - void swNormal3fv(const float* v); - - void swBindArray(SWarray type, void *buffer); - void swDrawArrays(SWfill mode, int offset, int count); - - uint32_t swLoadTexture(const void *data, int width, int height, int format, int mipmapCount); - void swUnloadTexture(uint32_t id); - - void swTextureParameters(uint32_t id, int param, int value); - void swBindTexture(uint32_t id); - - #endif // RLSW_H - - - - #ifdef RLSW_IMPL - - #include - #include - - /* === Defines and Macros === */ - - #define SW_PI 3.14159265358979323846f - #define SW_DEG2RAD (SW_PI/180.0f) - #define SW_RAD2DEG (180.0f/SW_PI) - - /* === Internal Structs === */ - - typedef float sw_matrix_t[4*4]; - typedef uint16_t sw_half_t; - - typedef struct { - - float position[4]; // Position coordinates - float normal[3]; // Normal vector - float texcoord[2]; // Texture coordinates - float color[4]; // Color - - float homogeneous[4]; // Homogeneous coordinates - float screen[2]; // Screen coordinates - - } sw_vertex_t; - - typedef struct { - - const void* pixels; - int width; - int height; - int format; - - SWfilter minFilter; - SWfilter magFilter; - - SWwrap sWrap; - SWwrap tWrap; - - float tx; - float ty; - - } sw_texture_t; - - typedef struct { - uint8_t *color; // 32-bit RGBA color buffer - uint16_t *depth; // 16-bit fixed fract buffer - int width, height; - } sw_framebuffer_t; - - typedef struct { - - sw_framebuffer_t framebuffer; - uint8_t clearColor[4]; // Color used to clear the screen - uint16_t clearDepth; // Depth value used to clear the screen - - uint32_t currentTexture; - sw_matrix_t *currentMatrix; - - uint32_t blendFunction; - uint32_t depthFunction; - - int vpPos[2]; // Represents the top-left corner of the viewport - int vpDim[2]; // Represents the dimensions of the viewport (minus one) - int vpMin[2]; // Represents the minimum renderable point of the viewport (top-left) - int vpMax[2]; // Represents the maximum renderable point of the viewport (bottom-right) - - struct { - float* positions; - float* texcoords; - float* normals; - uint8_t* colors; - } array; - - sw_vertex_t vertexBuffer[4]; // Buffer used for storing primitive vertices, used for processing and rendering - int vertexCounter; // Number of vertices in 'ctx.vertexBuffer' - - SWfill fillMode; // Current polygon filling mode (e.g., lines, triangles) - float pointSize; // Rasterized point size - float lineWidth; // Rasterized line width - - sw_matrix_t matProjection; // Projection matrix, user adjustable - sw_matrix_t matTexture; // Texture matrix, user adjustable - sw_matrix_t matModel; // Model matrix, user adjustable (the one used if we push in SW_MODELVIEW mode) - sw_matrix_t matView; // View matrix, user adjustable (the default one used in SW_MODELVIEW mode) - sw_matrix_t matMVP; // Model view projection matrix, calculated and used internally - - sw_matrix_t stackProjection[SW_MAX_PROJECTION_STACK_SIZE]; // Projection matrix stack for push/pop operations - sw_matrix_t stackModelview[SW_MAX_MODELVIEW_STACK_SIZE]; // Modelview matrix stack for push/pop operations - sw_matrix_t stackTexture[SW_MAX_TEXTURE_STACK_SIZE]; // Texture matrix stack for push/pop operations - uint32_t stackProjectionCounter; // Counter for matrix stack operations - uint32_t stackModelviewCounter; // Counter for matrix stack operations - uint32_t stackTextureCounter; // Counter for matrix stack operations - - SWmatrix currentMatrixMode; // Current matrix mode (e.g., sw_MODELVIEW, sw_PROJECTION) - bool modelMatrixUsed; // Flag indicating if the model matrix is used - - SWcull cullFace; // Faces to cull - SWerrcode errCode; // Last error code - - sw_texture_t* loadedTextures; - int loadedTextureCount; - - uint32_t* freeTextureIds; - int freeTextureIdCount; - - } sw_data_t; - - - /* === Global Data === */ - - static sw_data_t RLSW = { 0 }; - - - /* === Helper Functions === */ - - static inline void sw_matrix_id(sw_matrix_t dst) - { - dst[0] = 1, dst[1] = 0, dst[2] = 0, dst[3] = 0; - dst[4] = 0, dst[5] = 1, dst[6] = 0, dst[7] = 0; - dst[8] = 0, dst[9] = 0, dst[10] = 1, dst[11] = 0; - dst[12] = 0, dst[13] = 0, dst[14] = 0, dst[15] = 1; - } - - static inline void sw_matrix_mul(sw_matrix_t dst, const sw_matrix_t left, const sw_matrix_t right) - { - sw_matrix_t result; - for (int i = 0; i < 4; i++) { - for (int j = 0; j < 4; j++) { - float sum = 0.0; - for (int k = 0; k < 4; k++) { - sum += left[i * 4 + k] * right[k * 4 + j]; - } - result[i * 4 + j] = sum; - } - } - for (int i = 0; i < 16; i++) { - dst[i] = result[i]; - } - } - - static inline void sw_vec4_transform(float dst[4], const float v[4], const sw_matrix_t mat) - { - float tmp[4] = { - mat[0] * v[0] + mat[4] * v[1] + mat[8] * v[2] + mat[12] * v[3], - mat[1] * v[0] + mat[5] * v[1] + mat[9] * v[2] + mat[13] * v[3], - mat[2] * v[0] + mat[6] * v[1] + mat[10] * v[2] + mat[14] * v[3], - mat[3] * v[0] + mat[7] * v[1] + mat[11] * v[2] + mat[15] * v[3] - }; - - for (int i = 0; i < 4; i++) { - dst[i] = tmp[i]; - } - } - - static inline float sw_lerp(float a, float b, float t) - { - return a + t * (b - a); - } - - static inline sw_vertex_t sw_lerp_vertex(const sw_vertex_t* a, const sw_vertex_t* b, float t) - { - sw_vertex_t result; - for (int i = 0; i < sizeof(sw_vertex_t) / sizeof(float); i++) { - ((float*)&result)[i] = sw_lerp(((float*)a)[i], ((float*)b)[i], t); - } - return result; - } - - static inline uint32_t sw_cvt_hf_ui(uint16_t h) - { - uint32_t s = (uint32_t)(h & 0x8000) << 16; - int32_t em = h & 0x7fff; - - // bias exponent and pad mantissa with 0; 112 is relative exponent bias (127-15) - int32_t r = (em + (112 << 10)) << 13; - - // denormal: flush to zero - r = (em < (1 << 10)) ? 0 : r; - - // infinity/NaN; note that we preserve NaN payload as a byproduct of unifying inf/nan cases - // 112 is an exponent bias fixup; since we already applied it once, applying it twice converts 31 to 255 - r += (em >= (31 << 10)) ? (112 << 23) : 0; - - return s | r; - } - - static inline float sw_cvt_hf(sw_half_t y) - { - union { float f; uint32_t i; } v = { - .i = sw_cvt_hf_ui(y) - }; - return v.f; - } - - static inline void sw_get_pixel_grayscale(float* color, const void* pixels, uint32_t offset) - { - float gray = (float)((uint8_t*)pixels)[offset] / 255; - - color[0] = gray; - color[1] = gray; - color[2] = gray; - color[3] = 1.0f; - } - - static inline void sw_get_pixel_red_16(float* color, const void* pixels, uint32_t offset) - { - float value = sw_cvt_hf(((sw_half_t*)pixels)[offset]); - - color[0] = value; - color[1] = value; - color[2] = value; - color[3] = 1.0f; - } - - static inline void sw_get_pixel_red_32(float* color, const void* pixels, uint32_t offset) - { - float value = ((float*)pixels)[offset]; - - color[0] = value; - color[1] = value; - color[2] = value; - color[3] = 1.0f; - } - - static inline void sw_get_pixel_grayscale_alpha(float* color, const void* pixels, uint32_t offset) - { - float gray = (float)((uint8_t*)pixels)[2 * offset] / 255; - float alpha = (float)((uint8_t*)pixels)[2 * offset + 1] / 255; - - color[0] = gray; - color[1] = gray; - color[2] = gray; - color[3] = alpha; - } - - static inline void sw_get_pixel_rgb_565(float* color, const void* pixels, uint32_t offset) - { - uint16_t pixel = ((uint16_t*)pixels)[offset]; - - color[0] = (float)((pixel & 0xF800) >> 11) / 31; - color[1] = (float)((pixel & 0x7E0) >> 5) / 63; - color[2] = (float)(pixel & 0x1F) / 31; - color[3] = 1.0f; - } - - static inline void sw_get_pixel_rgb_888(float* color, const void* pixels, uint32_t offset) - { - const uint8_t* pixel = (uint8_t*)pixels + 3 * offset; - - color[0] = (float)pixel[0] / 255; - color[1] = (float)pixel[1] / 255; - color[2] = (float)pixel[2] / 255; - color[3] = 1.0f; - } - - static inline void sw_get_pixel_rgb_161616(float* color, const void* pixels, uint32_t offset) - { - const sw_half_t *pixel = (sw_half_t*)pixels + 3 * offset; - - color[0] = sw_cvt_hf(pixel[0]); - color[1] = sw_cvt_hf(pixel[1]); - color[2] = sw_cvt_hf(pixel[2]); - color[3] = 1.0f; - } - - static inline void sw_get_pixel_rgb_323232(float* color, const void* pixels, uint32_t offset) - { - const float *pixel = (float*)pixels + 3 * offset; - - color[0] = pixel[0]; - color[1] = pixel[1]; - color[2] = pixel[2]; - color[3] = 1.0f; - } - - static inline void sw_get_pixel_rgba_5551(float* color, const void* pixels, uint32_t offset) - { - uint16_t pixel = ((uint16_t*)pixels)[offset]; - - color[0] = (float)((pixel & 0xF800) >> 11) / 31; - color[1] = (float)((pixel & 0x7C0) >> 6) / 31; - color[2] = (float)((pixel & 0x3E) >> 1) / 31; - color[3] = (float)(pixel & 0x1); - } - - static inline void sw_get_pixel_rgba_4444(float* color, const void* pixels, uint32_t offset) - { - uint16_t pixel = ((uint16_t*)pixels)[offset]; - - color[0] = (float)((pixel & 0xF000) >> 12) / 15; - color[1] = (float)((pixel & 0xF00) >> 8) / 15; - color[2] = (float)((pixel & 0xF0) >> 4) / 15; - color[3] = (float)(pixel & 0xF) / 15; - } - - static inline void sw_get_pixel_rgba_8888(float* color, const void* pixels, uint32_t offset) - { - const uint8_t *pixel = (uint8_t*)pixels + 4 * offset; - - color[0] = (float)pixel[0] / 255; - color[1] = (float)pixel[1] / 255; - color[2] = (float)pixel[2] / 255; - color[3] = (float)pixel[3] / 255; - } - - static inline void sw_get_pixel_rgba_16161616(float* color, const void* pixels, uint32_t offset) - { - const sw_half_t *pixel = (sw_half_t*)pixels + 4 * offset; - - color[0] = sw_cvt_hf(pixel[0]); - color[1] = sw_cvt_hf(pixel[1]); - color[2] = sw_cvt_hf(pixel[2]); - color[3] = sw_cvt_hf(pixel[3]); - } - - static inline void sw_get_pixel_rgba_32323232(float* color, const void* pixels, uint32_t offset) - { - const float *pixel = (float*)pixels + 4 * offset; - - color[0] = pixel[0]; - color[1] = pixel[1]; - color[2] = pixel[2]; - color[3] = pixel[3]; - } - - static inline void sw_get_pixel(float* color, const void* pixels, uint32_t offset, SWpixelformat format) - { - switch (format) { - - case SW_PIXELFORMAT_UNCOMPRESSED_GRAYSCALE: - sw_get_pixel_grayscale(color, pixels, offset); - break; - - case SW_PIXELFORMAT_UNCOMPRESSED_GRAY_ALPHA: - sw_get_pixel_grayscale_alpha(color, pixels, offset); - break; - - case SW_PIXELFORMAT_UNCOMPRESSED_R5G6B5: - sw_get_pixel_rgb_565(color, pixels, offset); - break; - - case SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8: - sw_get_pixel_rgb_888(color, pixels, offset); - break; - - case SW_PIXELFORMAT_UNCOMPRESSED_R5G5B5A1: - sw_get_pixel_rgba_5551(color, pixels, offset); - break; - - case SW_PIXELFORMAT_UNCOMPRESSED_R4G4B4A4: - sw_get_pixel_rgba_4444(color, pixels, offset); - break; - - case SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8A8: - sw_get_pixel_rgba_8888(color, pixels, offset); - break; - - case SW_PIXELFORMAT_UNCOMPRESSED_R32: - sw_get_pixel_red_32(color, pixels, offset); - break; - - case SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32: - sw_get_pixel_rgb_323232(color, pixels, offset); - break; - - case SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32A32: - sw_get_pixel_rgba_32323232(color, pixels, offset); - break; - - case SW_PIXELFORMAT_UNCOMPRESSED_R16: - sw_get_pixel_red_16(color, pixels, offset); - break; - - case SW_PIXELFORMAT_UNCOMPRESSED_R16G16B16: - sw_get_pixel_rgb_161616(color, pixels, offset); - break; - - case SW_PIXELFORMAT_UNCOMPRESSED_R16G16B16A16: - sw_get_pixel_rgba_16161616(color, pixels, offset); - break; - - case SW_PIXELFORMAT_COMPRESSED_DXT1_RGB: - case SW_PIXELFORMAT_COMPRESSED_DXT1_RGBA: - case SW_PIXELFORMAT_COMPRESSED_DXT3_RGBA: - case SW_PIXELFORMAT_COMPRESSED_DXT5_RGBA: - case SW_PIXELFORMAT_COMPRESSED_ETC1_RGB: - case SW_PIXELFORMAT_COMPRESSED_ETC2_RGB: - case SW_PIXELFORMAT_COMPRESSED_ETC2_EAC_RGBA: - case SW_PIXELFORMAT_COMPRESSED_PVRT_RGB: - case SW_PIXELFORMAT_COMPRESSED_PVRT_RGBA: - case SW_PIXELFORMAT_COMPRESSED_ASTC_4x4_RGBA: - case SW_PIXELFORMAT_COMPRESSED_ASTC_8x8_RGBA: - break; - - } - } - - static inline void sw_map_repeat(int* out, float in, int max) - { - // Upscale to nearest texture coordinates - // NOTE: We use '(int)(x+0.5)' although this is incorrect - // regarding the direction of rounding in case of negative values - // and also less accurate than roundf, but it remains so much more - // efficient that it is preferable for now to opt for this option. - - *out = abs((int)((in - (int)in) * (max - 1) + 0.5f)); - } - - static inline void sw_map_clamp_to_edge(int* out, float in, int max) - { - in = (in > 1.0f) ? 1.0f : ((in < 0.0f) ? 0.0f : in); - *out = (int)(in * (max - 1) + 0.5f); - } - - static inline void sw_map_mirrored_repeat(int* out, float in, int max) - { - in = fmodf(fabsf(in), 2); - if (in > 1.0f) in = 1.0f - (in - 1.0f); - *out = (int)(in * (max - 1) + 0.5f); - } - - static inline void sw_map(int* out, float in, int max, SWwrap mode) - { - switch (mode) { - case SW_REPEAT: - sw_map_repeat(out, in, max); - break; - case SW_CLAMP_TO_EDGE: - sw_map_clamp_to_edge(out, in, max); - break; - case SW_MIRRORED_REPEAT: - sw_map_mirrored_repeat(out, in, max); - break; - } - } - - static inline void sw_sample_texture_nearest(float* color, const sw_texture_t* tex, float u, float v) - { - int x, y; - sw_map(&x, u, tex->width, tex->sWrap); - sw_map(&y, v, tex->height, tex->tWrap); - sw_get_pixel(color, tex->pixels, y * tex->width + x, tex->format); - } - - static inline void sw_sample_texture_bilinear(float* color, const sw_texture_t* tex, float u, float v) - { - int x0, y0, x1, y1; - sw_map(&x0, u, tex->width, tex->sWrap); - sw_map(&y0, v, tex->height, tex->tWrap); - sw_map(&x1, u + tex->tx, tex->width, tex->sWrap); - sw_map(&y1, v + tex->ty, tex->height, tex->tWrap); - - float fx = u * (tex->width - 1) - x0; - float fy = v * (tex->height - 1) - y0; - - float c00[4], c10[4], c01[4], c11[4]; - sw_get_pixel(c00, tex->pixels, y0 * tex->width + x0, tex->format); - sw_get_pixel(c10, tex->pixels, y0 * tex->width + x1, tex->format); - sw_get_pixel(c01, tex->pixels, y1 * tex->width + x0, tex->format); - sw_get_pixel(c11, tex->pixels, y1 * tex->width + x1, tex->format); - - float c0[4], c1[4]; - for (int i = 0; i < 4; i++) { - float a = sw_lerp(c00[i], c10[i], fx); - float b = sw_lerp(c01[i], c11[i], fx); - color[i] = sw_lerp(a, b, fy); - } - } - - static inline void sw_sample_texture(float* color, const sw_texture_t* tex, float u, float v, - float xDu, float yDu, float xDv, float yDv) - { - // TODO: It seems there are some incorrect detections depending on the context - // This is probably due to the fact that the fractions are obtained - // at the wrong moment during rasterization. It would be worth reviewing - // this, although the scanline method complicates things. - - // Calculate the derivatives for each axis - float du = sqrtf(xDu * xDu + yDu * yDu); - float dv = sqrtf(xDv * xDv + yDv * yDv); - float L = (du > dv) ? du : dv; - - // Select the filter based on the size of the footprint - if (L > 1.0f) { - // Minification - if (tex->minFilter == SW_NEAREST) { - sw_sample_texture_nearest(color, tex, u, v); - } else if (tex->minFilter == SW_LINEAR) { - sw_sample_texture_bilinear(color, tex, u, v); - } - } else { - // Magnification - if (tex->magFilter == SW_NEAREST) { - sw_sample_texture_nearest(color, tex, u, v); - } else if (tex->magFilter == SW_LINEAR) { - sw_sample_texture_bilinear(color, tex, u, v); - } - } - } - - static inline bool sw_clip_polygon_w(sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON_VERTICES], int* vertexCounter) - { - sw_vertex_t input[SW_MAX_CLIPPED_POLYGON_VERTICES]; - for (int i = 0; i < SW_MAX_CLIPPED_POLYGON_VERTICES; i++) { - input[i] = polygon[i]; - } - - int inputCounter = *vertexCounter; - *vertexCounter = 0; - - const sw_vertex_t *prevVt = &input[inputCounter-1]; - char prevDot = (prevVt->homogeneous[3] < SW_CLIP_EPSILON) ? -1 : 1; - - for (int i = 0; i < inputCounter; i++) { - char currDot = (input[i].homogeneous[3] < SW_CLIP_EPSILON) ? -1 : 1; - if (prevDot*currDot < 0) { - polygon[(*vertexCounter)++] = sw_lerp_vertex(prevVt, &input[i], - (SW_CLIP_EPSILON - prevVt->homogeneous[3]) / (input[i].homogeneous[3] - prevVt->homogeneous[3])); - } - if (currDot > 0) { - polygon[(*vertexCounter)++] = input[i]; - } - prevDot = currDot; - prevVt = &input[i]; - } - - return *vertexCounter > 0; - } - - static inline bool sw_clip_polygon_xyz(sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON_VERTICES], int* vertexCounter) - { - for (int iAxis = 0; iAxis < 3; iAxis++) - { - if (*vertexCounter == 0) return false; - - sw_vertex_t input[SW_MAX_CLIPPED_POLYGON_VERTICES]; - int inputCounter; - - const sw_vertex_t *prevVt; - char prevDot; - - // Clip against first plane - - for (int i = 0; i < SW_MAX_CLIPPED_POLYGON_VERTICES; i++) { - input[i] = polygon[i]; - } - inputCounter = *vertexCounter; - *vertexCounter = 0; - - prevVt = &input[inputCounter-1]; - prevDot = (prevVt->homogeneous[iAxis] <= prevVt->homogeneous[3]) ? 1 : -1; - - for (int i = 0; i < inputCounter; i++) { - char currDot = (input[i].homogeneous[iAxis] <= input[i].homogeneous[3]) ? 1 : -1; - if (prevDot * currDot <= 0) { - polygon[(*vertexCounter)++] = sw_lerp_vertex(prevVt, &input[i], (prevVt->homogeneous[3] - prevVt->homogeneous[iAxis]) / - ((prevVt->homogeneous[3] - prevVt->homogeneous[iAxis]) - (input[i].homogeneous[3] - input[i].homogeneous[iAxis]))); - } - if (currDot > 0) { - polygon[(*vertexCounter)++] = input[i]; - } - prevDot = currDot; - prevVt = &input[i]; - } - - if (*vertexCounter == 0) return false; - - // Clip against opposite plane - - for (int i = 0; i < SW_MAX_CLIPPED_POLYGON_VERTICES; i++) { - input[i] = polygon[i]; - } - inputCounter = *vertexCounter; - *vertexCounter = 0; - - prevVt = &input[inputCounter-1]; - prevDot = (-prevVt->homogeneous[iAxis] <= prevVt->homogeneous[3]) ? 1 : -1; - - for (int i = 0; i < inputCounter; i++) { - char currDot = (-input[i].homogeneous[iAxis] <= input[i].homogeneous[3]) ? 1 : -1; - if (prevDot*currDot <= 0) { - polygon[(*vertexCounter)++] = sw_lerp_vertex(prevVt, &input[i], (prevVt->homogeneous[3] + prevVt->homogeneous[iAxis]) / - ((prevVt->homogeneous[3] + prevVt->homogeneous[iAxis]) - (input[i].homogeneous[3] + input[i].homogeneous[iAxis]))); - } - if (currDot > 0) { - polygon[(*vertexCounter)++] = input[i]; - } - prevDot = currDot; - prevVt = &input[i]; - } - } - - return *vertexCounter > 0; - } - - void sw_project_and_clip_triangle(sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON_VERTICES], int* vertexCounter) - { - for (int i = 0; i < *vertexCounter; i++) { - sw_vertex_t *v = polygon + i; - for (int j = 0; j < 4; j++) v->homogeneous[j] = v->position[j]; - sw_vec4_transform(v->homogeneous, v->homogeneous, RLSW.matMVP); - } - - if (sw_clip_polygon_w(polygon, vertexCounter) && sw_clip_polygon_xyz(polygon, vertexCounter)) { - for (int i = 0; i < *vertexCounter; i++) { - sw_vertex_t *v = polygon + i; - - // Calculation of the reciprocal of W for normalization - // as well as perspective correct attributes - v->homogeneous[3] = 1.0f / v->homogeneous[3]; - - // Division of XYZ coordinates by weight - v->homogeneous[0] *= v->homogeneous[3]; - v->homogeneous[1] *= v->homogeneous[3]; - v->homogeneous[2] *= v->homogeneous[3]; - - // Division of texture coordinates (perspective correct) - v->texcoord[0] *= v->homogeneous[3]; - v->texcoord[1] *= v->homogeneous[3]; - - // Transform to screen space - v->screen[0] = RLSW.vpPos[0] + (v->homogeneous[0] + 1.0f) * 0.5f * RLSW.vpDim[0]; - v->screen[1] = RLSW.vpPos[1] + (v->homogeneous[1] + 1.0f) * 0.5f * RLSW.vpDim[1]; - } - } - } - - void sw_raster_scanline(const sw_texture_t* tex, const sw_vertex_t* start, const sw_vertex_t* end, float yDu, float yDv) - { - // Calculate the horizontal width and avoid division by zero - float dx = end->screen[0] - start->screen[0]; - if (fabsf(dx) < 1e-4f) return; - - // Convert and center the screen coordinates - int xStart = (int)(start->screen[0] + 0.5f); - int xEnd = (int)(end->screen[0] + 0.5f); - int y = (int)(start->screen[1] + 0.5f); - - // Calculate the initial interpolation parameter and its increment - float dt = 1.0f / dx; - float t = (xStart - start->screen[0]) * dt; - - // Calculate the horizontal gradients for UV coordinates - float xDu = (end->texcoord[0] - start->texcoord[0]) * dt; - float xDv = (end->texcoord[1] - start->texcoord[1]) * dt; - - // Pre-calculate the color differences for interpolation - float dcol[4]; - for (int i = 0; i < 4; i++) { - dcol[i] = end->color[i] - start->color[i]; - } - - // Pre-calculate the differences in Z and W (for depth testing and perspective correction) - float dz = end->homogeneous[2] - start->homogeneous[2]; - float dw = end->homogeneous[3] - start->homogeneous[3]; - - // Initialize the interpolated texture coordinates - float u = start->texcoord[0] + t * xDu; - float v = start->texcoord[1] + t * xDv; - - // Pre-calculate the starting pointer for the color framebuffer row - uint8_t* row_ptr = (uint8_t*)((uint32_t*)RLSW.framebuffer.color + y * RLSW.framebuffer.width); - uint8_t* dst = row_ptr + xStart * 4; - - // Pre-calculate the pointer for the depth buffer row - uint16_t* depth_row = RLSW.framebuffer.depth + y * RLSW.framebuffer.width + xStart; - uint16_t* dptr = depth_row; - - // Scanline rasterization loop - for (int x = xStart; x < xEnd; x++) { - // Interpolate Z and W for depth testing and perspective correction - float w = 1.0f / (start->homogeneous[3] + t * dw); - float z = start->homogeneous[2] + t * dz; - - // Depth testing with direct access to the depth buffer - // TODO: Implement different depth funcs? - float depth = (float)(*dptr) / UINT16_MAX; - if (z > depth) goto discard; - - // Update the depth buffer - *dptr = (uint16_t)(z * UINT16_MAX); - - // Sample the texture - float texColor[4]; - sw_sample_texture(texColor, tex, u * w, v * w, xDu, yDu, xDv, yDv); - - // Interpolate the color and modulate by the texture color - for (int i = 0; i < 4; i++) { - float lerp = start->color[i] + t * dcol[i]; - float finalColor = texColor[i] * lerp; - // Inline clamp to keep the value between 0 and 1 - // NOTE: The need for clamp, the colors could be a sign of problem during interpolation (?) - finalColor = (finalColor < 0.0f) ? 0.0f : (finalColor > 1.0f ? 1.0f : finalColor); - dst[i] = (uint8_t)(finalColor * 255.0f); - } - - // Increment the interpolation parameter, UVs, and pointers - discard: - t += dt; - u += xDu; - v += xDv; - dst += 4; - dptr++; - } - } - - void sw_raster_triangle(const sw_vertex_t* v0, const sw_vertex_t* v1, const sw_vertex_t* v2, const sw_texture_t* tex) - { - // Swap vertices by increasing y - if (v0->screen[1] > v1->screen[1]) { const sw_vertex_t* tmp = v0; v0 = v1; v1 = tmp; } - if (v1->screen[1] > v2->screen[1]) { const sw_vertex_t* tmp = v1; v1 = v2; v2 = tmp; } - if (v0->screen[1] > v1->screen[1]) { const sw_vertex_t* tmp = v0; v0 = v1; v1 = tmp; } - - // Extracting coordinates from the sorted vertices - float x0 = v0->screen[0], y0 = v0->screen[1]; - float x1 = v1->screen[0], y1 = v1->screen[1]; - float x2 = v2->screen[0], y2 = v2->screen[1]; - - // Reject degenerate triangles - float height = y2 - y0; - if (height < 1e-4f) return; - - // Global calculation of vertical texture gradients for the triangle - float yDu = (v2->texcoord[0] - v0->texcoord[0]) / height; - float yDv = (v2->texcoord[1] - v0->texcoord[1]) / height; - - // Precompute the inverse of the triangle height and - // edge lengths with checks to avoid division by zero. - float inv_height = 1.0f / height; - float inv_y1y0 = (y1 - y0 > 1e-4f) ? 1.0f / (y1 - y0) : 0.0f; - float inv_y2y1 = (y2 - y1 > 1e-4f) ? 1.0f / (y2 - y1) : 0.0f; - - // Pre-calculation of slopes (dx/dy) - float dx02 = (x2 - x0) * inv_height; - float dx01 = (x1 - x0) * inv_y1y0; - float dx12 = (x2 - x1) * inv_y2y1; - - // Y bounds (vertical clipping) - int yTop = (int)(y0 + 0.5f); - int yMiddle = (int)(y1 + 0.5f); - int yBottom = (int)(y2 + 0.5f); - - // Initializing scanline variables - float xLeft = x0, xRight = x0; - sw_vertex_t start, end; - - // Scanline for the upper part of the triangle - for (int y = yTop; y < yMiddle; y++) { - float dy = (float)y - y0; - float t1 = dy * inv_height; - float t2 = dy * inv_y1y0; - - // Optimized interpolation - start = sw_lerp_vertex(v0, v2, t1); - end = sw_lerp_vertex(v0, v1, t2); - start.screen[0] = xLeft; - start.screen[1] = (float)y; - end.screen[0] = xRight; - end.screen[1] = (float)y; - - if (xLeft > xRight) { sw_vertex_t tmp = start; start = end; end = tmp; } - sw_raster_scanline(tex, &start, &end, yDu, yDv); - - // Incremental update - xLeft += dx02; - xRight += dx01; - } - - // Scanline for the lower part of the triangle - xRight = x1; // Restart the right side from the second vertex - for (int y = yMiddle; y < yBottom; y++) { - float dy = (float)y - y0; - float t1 = dy * inv_height; - float t2 = (float)(y - y1) * inv_y2y1; - - // Optimized interpolation - start = sw_lerp_vertex(v0, v2, t1); - end = sw_lerp_vertex(v1, v2, t2); - start.screen[0] = xLeft; - start.screen[1] = (float)y; - end.screen[0] = xRight; - end.screen[1] = (float)y; - - if (xLeft > xRight) { sw_vertex_t tmp = start; start = end; end = tmp; } - sw_raster_scanline(tex, &start, &end, yDu, yDv); - - // Incremental update - xLeft += dx02; - xRight += dx12; - } - } - - void sw_render_triangle(const sw_vertex_t* v0, const sw_vertex_t* v1, const sw_vertex_t* v2) - { - int vertexCounter = 3; - - sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON_VERTICES]; - polygon[0] = *v0; - polygon[1] = *v1; - polygon[2] = *v2; - - sw_project_and_clip_triangle(polygon, &vertexCounter); - - if (vertexCounter < 3) { - return; - } - - for (int_fast8_t i = 0; i < vertexCounter - 2; i++) { - sw_raster_triangle( - &polygon[0], &polygon[i + 1], &polygon[i + 2], - &RLSW.loadedTextures[RLSW.currentTexture] - ); - } - } - - static inline bool sw_is_texture_id_valid(uint32_t id) - { - bool valid = true; - - if (id == 0) valid = false; - else if (id >= SW_MAX_TEXTURES) valid = false; - else if (RLSW.loadedTextures[id].pixels == 0) valid = false; - - return true; - } - - static inline bool sw_is_texture_filter_valid(int filter) - { - return (filter == SW_NEAREST || filter == SW_LINEAR); - } - - static inline bool sw_is_texture_wrap_valid(int wrap) - { - return (wrap == SW_REPEAT || wrap == SW_CLAMP_TO_EDGE || SW_MIRRORED_REPEAT); - } - - /* === Public Implementation === */ - - void swInit(int w, int h) - { - swViewport(0, 0, w, h); - - RLSW.framebuffer.color = SW_MALLOC(4 * w * h); - RLSW.framebuffer.depth = SW_MALLOC(2 * w * h); - - RLSW.framebuffer.width = w; - RLSW.framebuffer.height = h; - - RLSW.loadedTextures = SW_MALLOC(SW_MAX_TEXTURES); - RLSW.freeTextureIds = SW_MALLOC(SW_MAX_TEXTURES); - - RLSW.clearColor[0] = 0; - RLSW.clearColor[1] = 0; - RLSW.clearColor[2] = 0; - RLSW.clearColor[3] = 255; - RLSW.clearDepth = UINT16_MAX; - - RLSW.currentMatrixMode = SW_MODELVIEW; - RLSW.currentMatrix = &RLSW.matView; - - sw_matrix_id(RLSW.matProjection); - sw_matrix_id(RLSW.matTexture); - sw_matrix_id(RLSW.matModel); - sw_matrix_id(RLSW.matView); - - RLSW.vertexBuffer[0].color[0] = 1.0f; - RLSW.vertexBuffer[0].color[1] = 1.0f; - RLSW.vertexBuffer[0].color[2] = 1.0f; - RLSW.vertexBuffer[0].color[3] = 1.0f; - - RLSW.vertexBuffer[0].texcoord[0] = 0.0f; - RLSW.vertexBuffer[0].texcoord[1] = 0.0f; - - RLSW.vertexBuffer[0].normal[0] = 0.0f; - RLSW.vertexBuffer[0].normal[1] = 0.0f; - RLSW.vertexBuffer[0].normal[2] = 1.0f; - - static const float defTex[3*2*2] = - { - 1.0f, 1.0f, 1.0f, - 1.0f, 1.0f, 1.0f, - 1.0f, 1.0f, 1.0f, - 1.0f, 1.0f, 1.0f, - }; - - RLSW.loadedTextures[0].pixels = defTex; - RLSW.loadedTextures[0].width = 2; - RLSW.loadedTextures[0].height = 2; - RLSW.loadedTextures[0].format = SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32; - RLSW.loadedTextures[0].minFilter = SW_NEAREST; - RLSW.loadedTextures[0].magFilter = SW_NEAREST; - RLSW.loadedTextures[0].sWrap = SW_REPEAT; - RLSW.loadedTextures[0].tWrap = SW_REPEAT; - RLSW.loadedTextures[0].tx = 0.5f; - RLSW.loadedTextures[0].ty = 0.5f; - - RLSW.loadedTextureCount = 1; - } - - void swClose(void) - { - SW_FREE(RLSW.framebuffer.color); - SW_FREE(RLSW.framebuffer.depth); - - SW_FREE(RLSW.loadedTextures); - SW_FREE(RLSW.freeTextureIds); - } - - void* swGetColorBuffer(int* w, int* h) - { - if (w) *w = RLSW.framebuffer.width; - if (h) *h = RLSW.framebuffer.height; - - return RLSW.framebuffer.color; - } - - void swMatrixMode(SWmatrix mode) - { - switch (mode) { - case SW_PROJECTION: - RLSW.currentMatrix = &RLSW.matProjection; - break; - case SW_MODELVIEW: - RLSW.currentMatrix = RLSW.modelMatrixUsed - ? &RLSW.matModel : &RLSW.matView; - break; - case SW_TEXTURE: - RLSW.currentMatrix = &RLSW.matTexture; - break; - default: - RLSW.errCode = SW_INVALID_ENUM; - return; - } - - RLSW.currentMatrixMode = mode; - } - - void swPushMatrix(void) - { - switch (RLSW.currentMatrixMode) { - - case SW_PROJECTION: - if (RLSW.stackProjectionCounter >= SW_MAX_PROJECTION_STACK_SIZE) { - RLSW.errCode = SW_STACK_OVERFLOW; - return; - } - for (int i = 0; i < 16; i++) { - RLSW.stackProjection[RLSW.stackProjectionCounter][i] = RLSW.matProjection[i]; - } - RLSW.stackProjectionCounter++; - break; - - case SW_MODELVIEW: - if (RLSW.stackModelviewCounter >= SW_MAX_MODELVIEW_STACK_SIZE) { - RLSW.errCode = SW_STACK_OVERFLOW; - return; - } - if (RLSW.modelMatrixUsed) { - for (int i = 0; i < 16; i++) { - RLSW.stackModelview[RLSW.stackModelviewCounter][i] = RLSW.matModel[i]; - } - RLSW.stackModelviewCounter++; - } else { - RLSW.currentMatrix = &RLSW.matModel; - RLSW.modelMatrixUsed = true; - } - break; - - case SW_TEXTURE: - if (RLSW.stackTextureCounter >= SW_MAX_TEXTURE_STACK_SIZE) { - RLSW.errCode = SW_STACK_OVERFLOW; - return; - } - for (int i = 0; i < 16; i++) { - RLSW.stackTexture[RLSW.stackTextureCounter][i] = RLSW.matTexture[i]; - } - RLSW.stackTextureCounter++; - break; - - } - } - - void swPopMatrix(void) - { - switch (RLSW.currentMatrixMode) { - - case SW_PROJECTION: - if (RLSW.stackProjectionCounter <= 0) { - RLSW.errCode = SW_STACK_UNDERFLOW; - return; - } - RLSW.stackProjectionCounter--; - for (int i = 0; i < 16; i++) { - RLSW.matProjection[i] = RLSW.stackProjection[RLSW.stackProjectionCounter][i]; - } - break; - - case SW_MODELVIEW: - if (RLSW.stackModelviewCounter == 0) { - if (!RLSW.modelMatrixUsed) { - RLSW.errCode = SW_STACK_UNDERFLOW; - return; - } - sw_matrix_id(RLSW.matModel); - RLSW.currentMatrix = &RLSW.matView; - RLSW.modelMatrixUsed = false; - } else { - RLSW.stackModelviewCounter--; - for (int i = 0; i < 16; i++) { - RLSW.matModel[i] = RLSW.stackModelview[RLSW.stackModelviewCounter][i]; - } - } - break; - - case SW_TEXTURE: - if (RLSW.stackTextureCounter <= 0) { - RLSW.errCode = SW_STACK_UNDERFLOW; - return; - } - RLSW.stackTextureCounter--; - for (int i = 0; i < 16; i++) { - RLSW.matTexture[i] = RLSW.stackTexture[RLSW.stackTextureCounter][i]; - } - break; - - } - } - - void swLoadIdentity(void) - { - sw_matrix_id(*RLSW.currentMatrix); - } - - void swTranslatef(float x, float y, float z) - { - sw_matrix_t mat; - sw_matrix_id(mat); - - mat[12] = x; - mat[13] = y; - mat[14] = z; - - sw_matrix_mul(*RLSW.currentMatrix, mat, *RLSW.currentMatrix); - } - - void swRotatef(float angle, float x, float y, float z) - { - angle *= SW_DEG2RAD; - - sw_matrix_t mat; - sw_matrix_id(mat); - - float lengthSq = x*x + y*y + z*z; - - if (lengthSq != 1.0f && lengthSq != 0.0f) { - float invLenght = 1.0f / lengthSq; - x *= invLenght; - y *= invLenght; - z *= invLenght; - } - - float sinres = sinf(angle); - float cosres = cosf(angle); - float t = 1.0f - cosres; - - mat[0] = x*x*t + cosres; - mat[1] = y*x*t + z*sinres; - mat[2] = z*x*t - y*sinres; - - mat[4] = x*y*t - z*sinres; - mat[5] = y*y*t + cosres; - mat[6] = z*y*t + x*sinres; - - mat[8] = x*z*t + y*sinres; - mat[9] = y*z*t - x*sinres; - mat[10] = z*z*t + cosres; - - sw_matrix_mul(*RLSW.currentMatrix, mat, *RLSW.currentMatrix); - } - - void swScalef(float x, float y, float z) - { - sw_matrix_t mat; - - mat[0] = x, mat[1] = 0, mat[2] = 0, mat[3] = 0; - mat[4] = 0, mat[5] = y, mat[6] = 0, mat[7] = 0; - mat[8] = 0, mat[9] = 0, mat[10] = z, mat[11] = 0; - mat[12] = 0, mat[13] = 0, mat[14] = 0, mat[15] = 1; - - sw_matrix_mul(*RLSW.currentMatrix, mat, *RLSW.currentMatrix); - } - - void swMultMatrixf(const float* mat) - { - sw_matrix_mul(*RLSW.currentMatrix, *RLSW.currentMatrix, mat); - } - - void swFrustum(float left, float right, float bottom, float top, float znear, float zfar) - { - sw_matrix_t mat = { 0 }; - - float rl = right - left; - float tb = top - bottom; - float fn = zfar - znear; - - mat[0] = (znear * 2.0f) / rl; - mat[5] = (znear * 2.0f) / tb; - - mat[8] = (right + left) / rl; - mat[9] = (top + bottom) / tb; - mat[10] = -(zfar + znear) / fn; - mat[11] = -1.0f; - - mat[14] = -(zfar * znear * 2.0f) / fn; - - sw_matrix_mul(*RLSW.currentMatrix, *RLSW.currentMatrix, mat); - } - - void swOrtho(float left, float right, float bottom, float top, float znear, float zfar) - { - sw_matrix_t mat = { 0 }; - - float rl = (right - left); - float tb = (top - bottom); - float fn = (zfar - znear); - - mat[0] = 2.0f / rl; - mat[5] = 2.0f / tb; - - mat[10] = -2.0f / fn; - mat[11] = 0.0f; - mat[12] = -(left + right) / rl; - mat[13] = -(top + bottom) / tb; - - mat[14] = -(zfar + znear) / fn; - mat[15] = 1.0f; - - sw_matrix_mul(*RLSW.currentMatrix, *RLSW.currentMatrix, mat); - } - - void swViewport(int x, int y, int width, int height) - { - if (x <= -width || y <= -height) { - RLSW.errCode = SW_INVALID_OPERATION; - return; - } - - RLSW.vpPos[0] = x; - RLSW.vpPos[1] = y; - - RLSW.vpDim[0] = width - 1; - RLSW.vpDim[1] = height - 1; - - RLSW.vpMin[0] = (x < 0) ? 0 : x; - RLSW.vpMin[1] = (y < 0) ? 0 : y; - - int fbW = RLSW.framebuffer.width - 1; - int fbH = RLSW.framebuffer.height - 1; - - int vpMaxX = x + width; - int vpMaxY = y + height; - - RLSW.vpMax[0] = (vpMaxX < fbW) ? vpMaxX : fbW; - RLSW.vpMax[1] = (vpMaxY < fbH) ? vpMaxY : fbH; - } - - void swClearColor(float r, float g, float b, float a) - { - RLSW.clearColor[0] = r * 255; - RLSW.clearColor[1] = g * 255; - RLSW.clearColor[2] = b * 255; - RLSW.clearColor[3] = a * 255; - } - - void swClear(void) - { - int size = RLSW.framebuffer.width * RLSW.framebuffer.height; - - for (int i = 0; i < size; i++) { - ((uint32_t*)RLSW.framebuffer.color)[i] = *((uint32_t*)RLSW.clearColor); - RLSW.framebuffer.depth[i] = RLSW.clearDepth; - } - } - - void swBegin(SWfill mode) - { - if (mode < SW_POINTS || mode > SW_QUADS) { - RLSW.errCode = SW_INVALID_ENUM; - return; - } - RLSW.vertexCounter = 0; - RLSW.fillMode = mode; - } - - void swEnd(void) - { - RLSW.vertexCounter = 0; - } - - void swVertex2i(int x, int y) - { - float v[4] = { (float)x, (float)y, 0.0f, 1.0f }; - swVertex4fv(v); - } - - void swVertex2f(float x, float y) - { - float v[4] = { x, y, 0.0f, 1.0f }; - swVertex4fv(v); - } - - void swVertex2fv(const float* v) - { - float v4[4] = { v[0], v[1], 0.0f, 1.0f }; - swVertex4fv(v4); - } - - void swVertex3i(int x, int y, int z) - { - float v[4] = { (float)x, (float)y, (float)z, 1.0f }; - swVertex4fv(v); - } - - void swVertex3f(float x, float y, float z) - { - float v[4] = { x, y, z, 1.0f }; - swVertex4fv(v); - } - - void swVertex3fv(const float* v) - { - float v4[4] = { v[0], v[1], v[2], 1.0f }; - swVertex4fv(v4); - } - - void swVertex4i(int x, int y, int z, int w) - { - float v[4] = { (float)x, (float)y, (float)z, (float)w }; - swVertex4fv(v); - } - - void swVertex4f(float x, float y, float z, float w) - { - float v[4] = { x, y, z, w }; - swVertex4fv(v); - } - - void swVertex4fv(const float* v) - { - for (int i = 0; i < 4; i++) { - RLSW.vertexBuffer[RLSW.vertexCounter].position[i] = v[i]; - } - RLSW.vertexCounter++; - - int neededVertices = 0; - switch (RLSW.fillMode) { - case SW_POINTS: - neededVertices = 1; - break; - case SW_LINES: - neededVertices = 2; - break; - case SW_TRIANGLES: - neededVertices = 3; - break; - case SW_QUADS: - neededVertices = 4; - break; - } - - if (RLSW.vertexCounter == neededVertices) { - - // TODO: Optimize MVP calculation - sw_matrix_mul(RLSW.matMVP, RLSW.matModel, RLSW.matView); - sw_matrix_mul(RLSW.matMVP, RLSW.matMVP, RLSW.matProjection); - - switch (RLSW.fillMode) { - case SW_POINTS: - break; - case SW_LINES: - neededVertices = 2; - break; - case SW_TRIANGLES: - sw_render_triangle( - &RLSW.vertexBuffer[0], - &RLSW.vertexBuffer[1], - &RLSW.vertexBuffer[2] - ); - break; - case SW_QUADS: - sw_render_triangle( - &RLSW.vertexBuffer[0], - &RLSW.vertexBuffer[1], - &RLSW.vertexBuffer[2] - ); - sw_render_triangle( - &RLSW.vertexBuffer[2], - &RLSW.vertexBuffer[3], - &RLSW.vertexBuffer[0] - ); - break; - } - - RLSW.vertexBuffer[0] = RLSW.vertexBuffer[neededVertices - 1]; - RLSW.vertexCounter = 0; - } - else { - RLSW.vertexBuffer[RLSW.vertexCounter] = RLSW.vertexBuffer[RLSW.vertexCounter - 1]; - } - } - - void swColor1ui(uint32_t color) - { - union { - uint32_t v; - uint8_t a[4]; - } c = { .v = color }; - - float cv[4]; - cv[0] = (float)c.a[0] / 255; - cv[1] = (float)c.a[1] / 255; - cv[2] = (float)c.a[2] / 255; - cv[3] = (float)c.a[3] / 255; - - swColor4fv(cv); - } - - void swColor3ub(uint8_t r, uint8_t g, uint8_t b) - { - float cv[4]; - cv[0] = (float)r / 255; - cv[1] = (float)g / 255; - cv[2] = (float)b / 255; - cv[3] = 1.0f; - - swColor4fv(cv); - } - - void swColor3ubv(const uint8_t* v) - { - float cv[4]; - cv[0] = (float)v[0] / 255; - cv[1] = (float)v[1] / 255; - cv[2] = (float)v[2] / 255; - cv[3] = 1.0f; - - swColor4fv(cv); - } - - void swColor3us(uint16_t r, uint16_t g, uint16_t b) - { - float cv[4]; - cv[0] = (float)((uint8_t)(r >> 8)) / 255; - cv[1] = (float)((uint8_t)(g >> 8)) / 255; - cv[2] = (float)((uint8_t)(b >> 8)) / 255; - cv[3] = 1.0f; - - swColor4fv(cv); - } - - void swColor3usv(const uint16_t* v) - { - float cv[4]; - cv[0] = (float)((uint8_t)(v[0] >> 8)) / 255; - cv[1] = (float)((uint8_t)(v[1] >> 8)) / 255; - cv[2] = (float)((uint8_t)(v[2] >> 8)) / 255; - cv[3] = 1.0f; - - swColor4fv(cv); - } - - void swColor3ui(uint32_t r, uint32_t g, uint32_t b) - { - float cv[4]; - cv[0] = (float)((uint8_t)(r >> 24)) / 255; - cv[1] = (float)((uint8_t)(g >> 24)) / 255; - cv[2] = (float)((uint8_t)(b >> 24)) / 255; - cv[3] = 1.0f; - - swColor4fv(cv); - } - - void swColor3uiv(const uint32_t* v) - { - float cv[4]; - cv[0] = (float)((uint8_t)(v[0] >> 24)) / 255; - cv[1] = (float)((uint8_t)(v[1] >> 24)) / 255; - cv[2] = (float)((uint8_t)(v[2] >> 24)) / 255; - cv[3] = 1.0f; - - swColor4fv(cv); - } - - void swColor3f(float r, float g, float b) - { - float cv[4]; - cv[0] = r; - cv[1] = g; - cv[2] = b; - cv[3] = 1.0f; - - swColor4fv(cv); - } - - void swColor3fv(const float* v) - { - float cv[4]; - cv[0] = v[0]; - cv[1] = v[1]; - cv[2] = v[2]; - cv[3] = 1.0f; - - swColor4fv(cv); - } - - void swColor4ub(uint8_t r, uint8_t g, uint8_t b, uint8_t a) - { - float cv[4]; - cv[0] = (float)r / 255; - cv[1] = (float)g / 255; - cv[2] = (float)b / 255; - cv[3] = (float)a / 255; - - swColor4fv(cv); - } - - void swColor4ubv(const uint8_t* v) - { - float cv[4]; - cv[0] = (float)v[0] / 255; - cv[1] = (float)v[1] / 255; - cv[2] = (float)v[2] / 255; - cv[3] = (float)v[3] / 255; - - swColor4fv(cv); - } - - void swColor4us(uint16_t r, uint16_t g, uint16_t b, uint16_t a) - { - float cv[4]; - cv[0] = (float)((uint8_t)(r >> 8)) / 255; - cv[1] = (float)((uint8_t)(g >> 8)) / 255; - cv[2] = (float)((uint8_t)(b >> 8)) / 255; - cv[3] = (float)((uint8_t)(a >> 8)) / 255; - - swColor4fv(cv); - } - - void swColor4usv(const uint16_t* v) - { - float cv[4]; - cv[0] = (float)((uint8_t)(v[0] >> 8)) / 255; - cv[1] = (float)((uint8_t)(v[1] >> 8)) / 255; - cv[2] = (float)((uint8_t)(v[2] >> 8)) / 255; - cv[3] = (float)((uint8_t)(v[3] >> 8)) / 255; - - swColor4fv(cv); - } - - void swColor4ui(uint32_t r, uint32_t g, uint32_t b, uint32_t a) - { - float cv[4]; - cv[0] = (float)((uint8_t)(r >> 24)) / 255; - cv[1] = (float)((uint8_t)(g >> 24)) / 255; - cv[2] = (float)((uint8_t)(b >> 24)) / 255; - cv[3] = (float)((uint8_t)(a >> 24)) / 255; - - swColor4fv(cv); - } - - void swColor4uiv(const uint32_t* v) - { - float cv[4]; - cv[0] = (float)((uint8_t)(v[0] >> 24)) / 255; - cv[1] = (float)((uint8_t)(v[1] >> 24)) / 255; - cv[2] = (float)((uint8_t)(v[2] >> 24)) / 255; - cv[3] = (float)((uint8_t)(v[3] >> 24)) / 255; - - swColor4fv(cv); - } - - void swColor4f(float r, float g, float b, float a) - { - float cv[4]; - cv[0] = r; - cv[1] = g; - cv[2] = b; - cv[3] = a; - - swColor4fv(cv); - } - - void swColor4fv(const float* v) - { - for (int i = 0; i < 4; i++) { - RLSW.vertexBuffer[RLSW.vertexCounter].color[i] = v[i]; - } - } - - void swTexCoord2f(float u, float v) - { - float s = RLSW.matTexture[0]*u + RLSW.matTexture[4]*v + RLSW.matTexture[12]; - float t = RLSW.matTexture[1]*u + RLSW.matTexture[5]*v + RLSW.matTexture[13]; - - RLSW.vertexBuffer[RLSW.vertexCounter].texcoord[0] = s; - RLSW.vertexBuffer[RLSW.vertexCounter].texcoord[1] = t; - } - - void swTexCoordfv(const float* v) - { - float s = RLSW.matTexture[0]*v[0] + RLSW.matTexture[4]*v[1] + RLSW.matTexture[12]; - float t = RLSW.matTexture[1]*v[0] + RLSW.matTexture[5]*v[1] + RLSW.matTexture[13]; - - RLSW.vertexBuffer[RLSW.vertexCounter].texcoord[0] = s; - RLSW.vertexBuffer[RLSW.vertexCounter].texcoord[1] = t; - } - - void swNormal3f(float x, float y, float z) - { - RLSW.vertexBuffer[RLSW.vertexCounter].normal[0] = x; - RLSW.vertexBuffer[RLSW.vertexCounter].normal[1] = y; - RLSW.vertexBuffer[RLSW.vertexCounter].normal[2] = z; - } - - void swNormal3fv(const float* v) - { - RLSW.vertexBuffer[RLSW.vertexCounter].normal[0] = v[0]; - RLSW.vertexBuffer[RLSW.vertexCounter].normal[1] = v[1]; - RLSW.vertexBuffer[RLSW.vertexCounter].normal[2] = v[2]; - } - - void swBindArray(SWarray type, void *buffer) - { - switch (type) { - case SW_VERTEX_ARRAY: - RLSW.array.positions = buffer; - break; - case SW_TEXTURE_COORD_ARRAY: - RLSW.array.texcoords = buffer; - break; - case SW_NORMAL_ARRAY: - RLSW.array.normals = buffer; - break; - case SW_COLOR_ARRAY: - RLSW.array.colors = buffer; - break; - default: - break; - } - } - - void swDrawArrays(SWfill mode, int offset, int count) - { - if (RLSW.array.positions == 0) { - RLSW.errCode = SW_INVALID_OPERATION; - return; - } - - swBegin(mode); - - for (int i = offset; i < count; i++) { - if (RLSW.array.texcoords) { - swTexCoordfv(RLSW.array.texcoords + 2 * i); - } - if (RLSW.array.normals) { - swNormal3fv(RLSW.array.normals + 3 * i); - } - if (RLSW.array.colors) { - swColor4ubv(RLSW.array.colors + 4 * i); - } - swVertex3fv(RLSW.array.positions + 3 * i); - } - - swEnd(); - } - - uint32_t swLoadTexture(const void *data, int width, int height, int format, int mipmapCount) - { - if (RLSW.loadedTextureCount >= SW_MAX_TEXTURES) { - RLSW.errCode = SW_ERROR_OUT_OF_MEMORY; - return 0; - } - - sw_texture_t texture = { 0 }; - texture.pixels = data; - texture.width = width; - texture.height = height; - texture.format = format; - texture.minFilter = SW_NEAREST; - texture.magFilter = SW_NEAREST; - texture.sWrap = SW_REPEAT; - texture.tWrap = SW_REPEAT; - texture.tx = 1.0f / width; - texture.ty = 1.0f / height; - (void)mipmapCount; - - uint32_t id = 0; - if (RLSW.freeTextureIdCount > 0) { - id = RLSW.freeTextureIds[--RLSW.freeTextureIdCount]; - } - else { - id = RLSW.loadedTextureCount++; - } - - RLSW.loadedTextures[id] = texture; - - return id; - } - - void swUnloadTexture(uint32_t id) - { - if (!sw_is_texture_id_valid(id)) { - RLSW.errCode = SW_INVALID_VALUE; - return; - } - - RLSW.loadedTextures[id].pixels = 0; - RLSW.freeTextureIds[RLSW.freeTextureIdCount++] = id; - } - - void swTextureParameters(uint32_t id, int param, int value) - { - if (!sw_is_texture_id_valid(id)) { - RLSW.errCode = SW_INVALID_VALUE; - return; - } - - sw_texture_t* texture = &RLSW.loadedTextures[id]; - - switch (param) { - - case SW_TEXTURE_MIN_FILTER: - if (!sw_is_texture_filter_valid(value)) { - RLSW.errCode = SW_INVALID_ENUM; - return; - } - texture->minFilter = value; - break; - - case SW_TEXTURE_MAG_FILTER: - if (!sw_is_texture_filter_valid(value)) { - RLSW.errCode = SW_INVALID_ENUM; - return; - } - texture->magFilter = value; - break; - - case SW_TEXTURE_WRAP_S: - if (!sw_is_texture_wrap_valid(value)) { - RLSW.errCode = SW_INVALID_ENUM; - return; - } - texture->sWrap = value; - break; - - case SW_TEXTURE_WRAP_T: - if (!sw_is_texture_wrap_valid(value)) { - RLSW.errCode = SW_INVALID_ENUM; - return; - } - texture->tWrap = value; - break; - - default: - RLSW.errCode = SW_INVALID_ENUM; - return; - - } - } - - void swBindTexture(uint32_t id) - { - if (id >= SW_MAX_TEXTURES) { - RLSW.errCode = SW_INVALID_VALUE; - return; - } - - if (id > 0 && RLSW.loadedTextures[id].pixels == 0) { - RLSW.errCode = SW_INVALID_OPERATION; - return; - } - - RLSW.currentTexture = id; - } - - #endif // RLSW_IMPL - \ No newline at end of file +#ifndef RLSW_H +#define RLSW_H + +#include +#include + + +/* === RLSW Definition And Macros === */ + +#ifndef SW_MALLOC +# define SW_MALLOC(sz) malloc(sz) +#endif + +#ifndef SW_FREE +# define SW_FREE(ptr) free(ptr) +#endif + +#ifndef SW_MAX_PROJECTION_STACK_SIZE +# define SW_MAX_PROJECTION_STACK_SIZE 2 +#endif + +#ifndef SW_MAX_MODELVIEW_STACK_SIZE +# define SW_MAX_MODELVIEW_STACK_SIZE 8 +#endif + +#ifndef SW_MAX_TEXTURE_STACK_SIZE +# define SW_MAX_TEXTURE_STACK_SIZE 4 +#endif + +#ifndef SW_MAX_TEXTURES +# define SW_MAX_TEXTURES 128 +#endif + +#ifndef SW_MAX_CLIPPED_POLYGON_VERTICES +# define SW_MAX_CLIPPED_POLYGON_VERTICES 12 +#endif + +#ifndef SW_CLIP_EPSILON +# define SW_CLIP_EPSILON 1e-4f +#endif + + +/* === OpenGL Definitions === */ + +#define GL_TEXTURE_2D 0x0DE1 +#define GL_DEPTH_TEST 0x0B71 +#define GL_CULL_FACE 0x0B44 + +#define GL_MODELVIEW 0x1700 +#define GL_PROJECTION 0x1701 +#define GL_TEXTURE 0x1702 + +#define GL_VERTEX_ARRAY 0x8074 +#define GL_NORMAL_ARRAY 0x8075 +#define GL_COLOR_ARRAY 0x8076 +//#define GL_INDEX_ARRAY 0x8077 +#define GL_TEXTURE_COORD_ARRAY 0x8078 + +#define GL_POINTS 0x0000 +#define GL_LINES 0x0001 +//#define GL_LINE_LOOP 0x0002 +//#define GL_LINE_STRIP 0x0003 +#define GL_TRIANGLES 0x0004 +//#define GL_TRIANGLE_STRIP 0x0005 +//#define GL_TRIANGLE_FAN 0x0006 +#define GL_QUADS 0x0007 +//#define GL_QUAD_STRIP 0x0008 +//#define GL_POLYGON 0x0009 + +//#define GL_CW 0x0900 +//#define GL_CCW 0x0901 + +#define GL_FRONT 0x0404 +#define GL_BACK 0x0405 + +#define GL_NEAREST 0x2600 +#define GL_LINEAR 0x2601 + +#define GL_REPEAT 0x2901 +#define GL_CLAMP_TO_EDGE 0x812F //< (OpenGL 1.2) +#define GL_MIRRORED_REPEAT 0x8370 //< (OpenGL 2.0) + +#define GL_TEXTURE_MAG_FILTER 0x2800 +#define GL_TEXTURE_MIN_FILTER 0x2801 + +#define GL_TEXTURE_WRAP_S 0x2802 +#define GL_TEXTURE_WRAP_T 0x2803 + +#define GL_NO_ERROR 0 +#define GL_INVALID_ENUM 0x0500 +#define GL_INVALID_VALUE 0x0501 +#define GL_INVALID_OPERATION 0x0502 +#define GL_STACK_OVERFLOW 0x0503 +#define GL_STACK_UNDERFLOW 0x0504 +#define GL_OUT_OF_MEMORY 0x0505 + + +/* === RLSW Enums === */ + +typedef enum { + SW_TEXTURE_2D = GL_TEXTURE_2D, + SW_DEPTH_TEST = GL_DEPTH_TEST, + SW_CULL_FACE = GL_CULL_FACE +} SWstate; + +typedef enum { + SW_PROJECTION = GL_PROJECTION, + SW_MODELVIEW = GL_MODELVIEW, + SW_TEXTURE = GL_TEXTURE +} SWmatrix; + +typedef enum { + SW_VERTEX_ARRAY = GL_VERTEX_ARRAY, + SW_TEXTURE_COORD_ARRAY = GL_TEXTURE_COORD_ARRAY, + SW_NORMAL_ARRAY = GL_NORMAL_ARRAY, + SW_COLOR_ARRAY = GL_COLOR_ARRAY +} SWarray; + +typedef enum { + SW_POINTS = GL_POINTS, + SW_LINES = GL_LINES, + SW_TRIANGLES = GL_TRIANGLES, + SW_QUADS = GL_QUADS, +} SWfill; + +typedef enum { + SW_FRONT = GL_FRONT, + SW_BACK = GL_BACK, +} SWface; + +typedef enum { + SW_PIXELFORMAT_UNCOMPRESSED_GRAYSCALE = 1, // 8 bit per pixel (no alpha) + SW_PIXELFORMAT_UNCOMPRESSED_GRAY_ALPHA, // 8*2 bpp (2 channels) + SW_PIXELFORMAT_UNCOMPRESSED_R5G6B5, // 16 bpp + SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8, // 24 bpp + SW_PIXELFORMAT_UNCOMPRESSED_R5G5B5A1, // 16 bpp (1 bit alpha) + SW_PIXELFORMAT_UNCOMPRESSED_R4G4B4A4, // 16 bpp (4 bit alpha) + SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8A8, // 32 bpp + SW_PIXELFORMAT_UNCOMPRESSED_R32, // 32 bpp (1 channel - float) + SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32, // 32*3 bpp (3 channels - float) + SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32A32, // 32*4 bpp (4 channels - float) + SW_PIXELFORMAT_UNCOMPRESSED_R16, // 16 bpp (1 channel - half float) + SW_PIXELFORMAT_UNCOMPRESSED_R16G16B16, // 16*3 bpp (3 channels - half float) + SW_PIXELFORMAT_UNCOMPRESSED_R16G16B16A16, // 16*4 bpp (4 channels - half float) + SW_PIXELFORMAT_COMPRESSED_DXT1_RGB, // 4 bpp (no alpha) + SW_PIXELFORMAT_COMPRESSED_DXT1_RGBA, // 4 bpp (1 bit alpha) + SW_PIXELFORMAT_COMPRESSED_DXT3_RGBA, // 8 bpp + SW_PIXELFORMAT_COMPRESSED_DXT5_RGBA, // 8 bpp + SW_PIXELFORMAT_COMPRESSED_ETC1_RGB, // 4 bpp + SW_PIXELFORMAT_COMPRESSED_ETC2_RGB, // 4 bpp + SW_PIXELFORMAT_COMPRESSED_ETC2_EAC_RGBA, // 8 bpp + SW_PIXELFORMAT_COMPRESSED_PVRT_RGB, // 4 bpp + SW_PIXELFORMAT_COMPRESSED_PVRT_RGBA, // 4 bpp + SW_PIXELFORMAT_COMPRESSED_ASTC_4x4_RGBA, // 8 bpp + SW_PIXELFORMAT_COMPRESSED_ASTC_8x8_RGBA // 2 bpp +} SWpixelformat; + +typedef enum { + SW_NEAREST = GL_NEAREST, + SW_LINEAR = GL_LINEAR +} SWfilter; + +typedef enum { + SW_REPEAT = GL_REPEAT, + SW_CLAMP_TO_EDGE = GL_CLAMP_TO_EDGE, + SW_MIRRORED_REPEAT = GL_MIRRORED_REPEAT +} SWwrap; + +typedef enum { + SW_TEXTURE_MIN_FILTER = GL_TEXTURE_MIN_FILTER, + SW_TEXTURE_MAG_FILTER = GL_TEXTURE_MAG_FILTER, + SW_TEXTURE_WRAP_S = GL_TEXTURE_WRAP_S, + SW_TEXTURE_WRAP_T = GL_TEXTURE_WRAP_T +} SWtexparam; + +typedef enum { + SW_NO_ERROR = GL_NO_ERROR, + SW_INVALID_ENUM = GL_INVALID_ENUM, + SW_INVALID_VALUE = GL_INVALID_VALUE, + SW_STACK_OVERFLOW = GL_STACK_OVERFLOW, + SW_STACK_UNDERFLOW = GL_STACK_UNDERFLOW, + SW_INVALID_OPERATION = GL_INVALID_OPERATION, +} SWerrcode; + +/* === Public API === */ + +void swInit(int w, int h); +void swClose(void); + +void swEnable(SWstate state); +void swDisable(SWstate state); + +void* swGetColorBuffer(int* w, int* h); + +void swMatrixMode(SWmatrix mode); +void swPushMatrix(void); +void swPopMatrix(void); +void swLoadIdentity(void); +void swTranslatef(float x, float y, float z); +void swRotatef(float angle, float x, float y, float z); +void swScalef(float x, float y, float z); +void swMultMatrixf(const float* mat); +void swFrustum(float left, float right, float bottom, float top, float znear, float zfar); +void swOrtho(float left, float right, float bottom, float top, float znear, float zfar); + +void swViewport(int x, int y, int width, int height); + +void swClearColor(float r, float g, float b, float a); +void swClear(void); + +void swBegin(SWfill mode); +void swEnd(void); + +void swVertex2i(int x, int y); +void swVertex2f(float x, float y); +void swVertex2fv(const float* v); +void swVertex3i(int x, int y, int z); +void swVertex3f(float x, float y, float z); +void swVertex3fv(const float* v); +void swVertex4i(int x, int y, int z, int w); +void swVertex4f(float x, float y, float z, float w); +void swVertex4fv(const float* v); + +void swColor1ui(uint32_t color); +void swColor3ub(uint8_t r, uint8_t g, uint8_t b); +void swColor3ubv(const uint8_t* v); +void swColor3us(uint16_t r, uint16_t g, uint16_t b); +void swColor3usv(const uint16_t* v); +void swColor3ui(uint32_t r, uint32_t g, uint32_t b); +void swColor3uiv(const uint32_t* v); +void swColor3f(float r, float g, float b); +void swColor3fv(const float* v); +void swColor4ub(uint8_t r, uint8_t g, uint8_t b, uint8_t a); +void swColor4ubv(const uint8_t* v); +void swColor4us(uint16_t r, uint16_t g, uint16_t b, uint16_t a); +void swColor4usv(const uint16_t* v); +void swColor4ui(uint32_t r, uint32_t g, uint32_t b, uint32_t a); +void swColor4uiv(const uint32_t* v); +void swColor4f(float r, float g, float b, float a); +void swColor4fv(const float* v); + +void swTexCoord2f(float u, float v); +void swTexCoordfv(const float* v); + +void swNormal3f(float x, float y, float z); +void swNormal3fv(const float* v); + +void swBindArray(SWarray type, void *buffer); +void swDrawArrays(SWfill mode, int offset, int count); + +uint32_t swLoadTexture(const void *data, int width, int height, int format, int mipmapCount); +void swUnloadTexture(uint32_t id); + +void swTextureParameters(uint32_t id, int param, int value); +void swBindTexture(uint32_t id); + +#endif // RLSW_H + + +#ifdef RLSW_IMPL + +#include +#include + +/* === Defines and Macros === */ + +#define SW_PI 3.14159265358979323846f +#define SW_DEG2RAD (SW_PI/180.0f) +#define SW_RAD2DEG (180.0f/SW_PI) + +#define SW_STATE_TEXTURE_2D (1 << 0) +#define SW_STATE_DEPTH_TEST (1 << 1) +#define SW_STATE_CULL_FACE (1 << 2) + +/* === Internal Structs === */ + +typedef float sw_matrix_t[4*4]; +typedef uint16_t sw_half_t; + +typedef struct { + + float position[4]; // Position coordinates + float normal[3]; // Normal vector + float texcoord[2]; // Texture coordinates + float color[4]; // Color + + float homogeneous[4]; // Homogeneous coordinates + float screen[2]; // Screen coordinates + +} sw_vertex_t; + +typedef struct { + + const void* pixels; + int width; + int height; + int format; + + SWfilter minFilter; + SWfilter magFilter; + + SWwrap sWrap; + SWwrap tWrap; + + float tx; + float ty; + +} sw_texture_t; + +typedef struct { + uint8_t *color; // 32-bit RGBA color buffer + uint16_t *depth; // 16-bit fixed fract buffer + int width, height; +} sw_framebuffer_t; + +typedef struct { + + sw_framebuffer_t framebuffer; + uint8_t clearColor[4]; // Color used to clear the screen + uint16_t clearDepth; // Depth value used to clear the screen + + uint32_t currentTexture; + sw_matrix_t *currentMatrix; + + uint32_t blendFunction; + uint32_t depthFunction; + + int vpPos[2]; // Represents the top-left corner of the viewport + int vpDim[2]; // Represents the dimensions of the viewport (minus one) + int vpMin[2]; // Represents the minimum renderable point of the viewport (top-left) + int vpMax[2]; // Represents the maximum renderable point of the viewport (bottom-right) + + struct { + float* positions; + float* texcoords; + float* normals; + uint8_t* colors; + } array; + + sw_vertex_t vertexBuffer[4]; // Buffer used for storing primitive vertices, used for processing and rendering + int vertexCounter; // Number of vertices in 'ctx.vertexBuffer' + + SWfill fillMode; // Current polygon filling mode (e.g., lines, triangles) + float pointSize; // Rasterized point size + float lineWidth; // Rasterized line width + + sw_matrix_t matProjection; // Projection matrix, user adjustable + sw_matrix_t matTexture; // Texture matrix, user adjustable + sw_matrix_t matModel; // Model matrix, user adjustable (the one used if we push in SW_MODELVIEW mode) + sw_matrix_t matView; // View matrix, user adjustable (the default one used in SW_MODELVIEW mode) + sw_matrix_t matMVP; // Model view projection matrix, calculated and used internally + + sw_matrix_t stackProjection[SW_MAX_PROJECTION_STACK_SIZE]; // Projection matrix stack for push/pop operations + sw_matrix_t stackModelview[SW_MAX_MODELVIEW_STACK_SIZE]; // Modelview matrix stack for push/pop operations + sw_matrix_t stackTexture[SW_MAX_TEXTURE_STACK_SIZE]; // Texture matrix stack for push/pop operations + uint32_t stackProjectionCounter; // Counter for matrix stack operations + uint32_t stackModelviewCounter; // Counter for matrix stack operations + uint32_t stackTextureCounter; // Counter for matrix stack operations + + SWmatrix currentMatrixMode; // Current matrix mode (e.g., sw_MODELVIEW, sw_PROJECTION) + bool modelMatrixUsed; // Flag indicating if the model matrix is used + + SWface cullFace; // Faces to cull + SWerrcode errCode; // Last error code + + sw_texture_t* loadedTextures; + int loadedTextureCount; + + uint32_t* freeTextureIds; + int freeTextureIdCount; + + uint32_t stateFlags; + +} sw_data_t; + + +/* === Global Data === */ + +static sw_data_t RLSW = { 0 }; + + +/* === Helper Functions === */ + +static inline void sw_matrix_id(sw_matrix_t dst) +{ + dst[0] = 1, dst[1] = 0, dst[2] = 0, dst[3] = 0; + dst[4] = 0, dst[5] = 1, dst[6] = 0, dst[7] = 0; + dst[8] = 0, dst[9] = 0, dst[10] = 1, dst[11] = 0; + dst[12] = 0, dst[13] = 0, dst[14] = 0, dst[15] = 1; +} + +static inline void sw_matrix_mul(sw_matrix_t dst, const sw_matrix_t left, const sw_matrix_t right) +{ + sw_matrix_t result; + for (int i = 0; i < 4; i++) { + for (int j = 0; j < 4; j++) { + float sum = 0.0; + for (int k = 0; k < 4; k++) { + sum += left[i * 4 + k] * right[k * 4 + j]; + } + result[i * 4 + j] = sum; + } + } + for (int i = 0; i < 16; i++) { + dst[i] = result[i]; + } +} + +static inline void sw_vec4_transform(float dst[4], const float v[4], const sw_matrix_t mat) +{ + float tmp[4] = { + mat[0] * v[0] + mat[4] * v[1] + mat[8] * v[2] + mat[12] * v[3], + mat[1] * v[0] + mat[5] * v[1] + mat[9] * v[2] + mat[13] * v[3], + mat[2] * v[0] + mat[6] * v[1] + mat[10] * v[2] + mat[14] * v[3], + mat[3] * v[0] + mat[7] * v[1] + mat[11] * v[2] + mat[15] * v[3] + }; + + for (int i = 0; i < 4; i++) { + dst[i] = tmp[i]; + } +} + +static inline float sw_lerp(float a, float b, float t) +{ + return a + t * (b - a); +} + +static inline sw_vertex_t sw_lerp_vertex(const sw_vertex_t* a, const sw_vertex_t* b, float t) +{ + sw_vertex_t result; + for (int i = 0; i < sizeof(sw_vertex_t) / sizeof(float); i++) { + ((float*)&result)[i] = sw_lerp(((float*)a)[i], ((float*)b)[i], t); + } + return result; +} + +static inline uint32_t sw_cvt_hf_ui(uint16_t h) +{ + uint32_t s = (uint32_t)(h & 0x8000) << 16; + int32_t em = h & 0x7fff; + + // bias exponent and pad mantissa with 0; 112 is relative exponent bias (127-15) + int32_t r = (em + (112 << 10)) << 13; + + // denormal: flush to zero + r = (em < (1 << 10)) ? 0 : r; + + // infinity/NaN; note that we preserve NaN payload as a byproduct of unifying inf/nan cases + // 112 is an exponent bias fixup; since we already applied it once, applying it twice converts 31 to 255 + r += (em >= (31 << 10)) ? (112 << 23) : 0; + + return s | r; +} + +static inline float sw_cvt_hf(sw_half_t y) +{ + union { float f; uint32_t i; } v = { + .i = sw_cvt_hf_ui(y) + }; + return v.f; +} + +static inline void sw_get_pixel_grayscale(float* color, const void* pixels, uint32_t offset) +{ + float gray = (float)((uint8_t*)pixels)[offset] / 255; + + color[0] = gray; + color[1] = gray; + color[2] = gray; + color[3] = 1.0f; +} + +static inline void sw_get_pixel_red_16(float* color, const void* pixels, uint32_t offset) +{ + float value = sw_cvt_hf(((sw_half_t*)pixels)[offset]); + + color[0] = value; + color[1] = value; + color[2] = value; + color[3] = 1.0f; +} + +static inline void sw_get_pixel_red_32(float* color, const void* pixels, uint32_t offset) +{ + float value = ((float*)pixels)[offset]; + + color[0] = value; + color[1] = value; + color[2] = value; + color[3] = 1.0f; +} + +static inline void sw_get_pixel_grayscale_alpha(float* color, const void* pixels, uint32_t offset) +{ + float gray = (float)((uint8_t*)pixels)[2 * offset] / 255; + float alpha = (float)((uint8_t*)pixels)[2 * offset + 1] / 255; + + color[0] = gray; + color[1] = gray; + color[2] = gray; + color[3] = alpha; +} + +static inline void sw_get_pixel_rgb_565(float* color, const void* pixels, uint32_t offset) +{ + uint16_t pixel = ((uint16_t*)pixels)[offset]; + + color[0] = (float)((pixel & 0xF800) >> 11) / 31; + color[1] = (float)((pixel & 0x7E0) >> 5) / 63; + color[2] = (float)(pixel & 0x1F) / 31; + color[3] = 1.0f; +} + +static inline void sw_get_pixel_rgb_888(float* color, const void* pixels, uint32_t offset) +{ + const uint8_t* pixel = (uint8_t*)pixels + 3 * offset; + + color[0] = (float)pixel[0] / 255; + color[1] = (float)pixel[1] / 255; + color[2] = (float)pixel[2] / 255; + color[3] = 1.0f; +} + +static inline void sw_get_pixel_rgb_161616(float* color, const void* pixels, uint32_t offset) +{ + const sw_half_t *pixel = (sw_half_t*)pixels + 3 * offset; + + color[0] = sw_cvt_hf(pixel[0]); + color[1] = sw_cvt_hf(pixel[1]); + color[2] = sw_cvt_hf(pixel[2]); + color[3] = 1.0f; +} + +static inline void sw_get_pixel_rgb_323232(float* color, const void* pixels, uint32_t offset) +{ + const float *pixel = (float*)pixels + 3 * offset; + + color[0] = pixel[0]; + color[1] = pixel[1]; + color[2] = pixel[2]; + color[3] = 1.0f; +} + +static inline void sw_get_pixel_rgba_5551(float* color, const void* pixels, uint32_t offset) +{ + uint16_t pixel = ((uint16_t*)pixels)[offset]; + + color[0] = (float)((pixel & 0xF800) >> 11) / 31; + color[1] = (float)((pixel & 0x7C0) >> 6) / 31; + color[2] = (float)((pixel & 0x3E) >> 1) / 31; + color[3] = (float)(pixel & 0x1); +} + +static inline void sw_get_pixel_rgba_4444(float* color, const void* pixels, uint32_t offset) +{ + uint16_t pixel = ((uint16_t*)pixels)[offset]; + + color[0] = (float)((pixel & 0xF000) >> 12) / 15; + color[1] = (float)((pixel & 0xF00) >> 8) / 15; + color[2] = (float)((pixel & 0xF0) >> 4) / 15; + color[3] = (float)(pixel & 0xF) / 15; +} + +static inline void sw_get_pixel_rgba_8888(float* color, const void* pixels, uint32_t offset) +{ + const uint8_t *pixel = (uint8_t*)pixels + 4 * offset; + + color[0] = (float)pixel[0] / 255; + color[1] = (float)pixel[1] / 255; + color[2] = (float)pixel[2] / 255; + color[3] = (float)pixel[3] / 255; +} + +static inline void sw_get_pixel_rgba_16161616(float* color, const void* pixels, uint32_t offset) +{ + const sw_half_t *pixel = (sw_half_t*)pixels + 4 * offset; + + color[0] = sw_cvt_hf(pixel[0]); + color[1] = sw_cvt_hf(pixel[1]); + color[2] = sw_cvt_hf(pixel[2]); + color[3] = sw_cvt_hf(pixel[3]); +} + +static inline void sw_get_pixel_rgba_32323232(float* color, const void* pixels, uint32_t offset) +{ + const float *pixel = (float*)pixels + 4 * offset; + + color[0] = pixel[0]; + color[1] = pixel[1]; + color[2] = pixel[2]; + color[3] = pixel[3]; +} + +static inline void sw_get_pixel(float* color, const void* pixels, uint32_t offset, SWpixelformat format) +{ + switch (format) { + + case SW_PIXELFORMAT_UNCOMPRESSED_GRAYSCALE: + sw_get_pixel_grayscale(color, pixels, offset); + break; + + case SW_PIXELFORMAT_UNCOMPRESSED_GRAY_ALPHA: + sw_get_pixel_grayscale_alpha(color, pixels, offset); + break; + + case SW_PIXELFORMAT_UNCOMPRESSED_R5G6B5: + sw_get_pixel_rgb_565(color, pixels, offset); + break; + + case SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8: + sw_get_pixel_rgb_888(color, pixels, offset); + break; + + case SW_PIXELFORMAT_UNCOMPRESSED_R5G5B5A1: + sw_get_pixel_rgba_5551(color, pixels, offset); + break; + + case SW_PIXELFORMAT_UNCOMPRESSED_R4G4B4A4: + sw_get_pixel_rgba_4444(color, pixels, offset); + break; + + case SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8A8: + sw_get_pixel_rgba_8888(color, pixels, offset); + break; + + case SW_PIXELFORMAT_UNCOMPRESSED_R32: + sw_get_pixel_red_32(color, pixels, offset); + break; + + case SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32: + sw_get_pixel_rgb_323232(color, pixels, offset); + break; + + case SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32A32: + sw_get_pixel_rgba_32323232(color, pixels, offset); + break; + + case SW_PIXELFORMAT_UNCOMPRESSED_R16: + sw_get_pixel_red_16(color, pixels, offset); + break; + + case SW_PIXELFORMAT_UNCOMPRESSED_R16G16B16: + sw_get_pixel_rgb_161616(color, pixels, offset); + break; + + case SW_PIXELFORMAT_UNCOMPRESSED_R16G16B16A16: + sw_get_pixel_rgba_16161616(color, pixels, offset); + break; + + case SW_PIXELFORMAT_COMPRESSED_DXT1_RGB: + case SW_PIXELFORMAT_COMPRESSED_DXT1_RGBA: + case SW_PIXELFORMAT_COMPRESSED_DXT3_RGBA: + case SW_PIXELFORMAT_COMPRESSED_DXT5_RGBA: + case SW_PIXELFORMAT_COMPRESSED_ETC1_RGB: + case SW_PIXELFORMAT_COMPRESSED_ETC2_RGB: + case SW_PIXELFORMAT_COMPRESSED_ETC2_EAC_RGBA: + case SW_PIXELFORMAT_COMPRESSED_PVRT_RGB: + case SW_PIXELFORMAT_COMPRESSED_PVRT_RGBA: + case SW_PIXELFORMAT_COMPRESSED_ASTC_4x4_RGBA: + case SW_PIXELFORMAT_COMPRESSED_ASTC_8x8_RGBA: + break; + + } +} + +static inline void sw_map_repeat(int* out, float in, int max) +{ + // Upscale to nearest texture coordinates + // NOTE: We use '(int)(x+0.5)' although this is incorrect + // regarding the direction of rounding in case of negative values + // and also less accurate than roundf, but it remains so much more + // efficient that it is preferable for now to opt for this option. + + *out = abs((int)((in - (int)in) * (max - 1) + 0.5f)); +} + +static inline void sw_map_clamp_to_edge(int* out, float in, int max) +{ + in = (in > 1.0f) ? 1.0f : ((in < 0.0f) ? 0.0f : in); + *out = (int)(in * (max - 1) + 0.5f); +} + +static inline void sw_map_mirrored_repeat(int* out, float in, int max) +{ + in = fmodf(fabsf(in), 2); + if (in > 1.0f) in = 1.0f - (in - 1.0f); + *out = (int)(in * (max - 1) + 0.5f); +} + +static inline void sw_map(int* out, float in, int max, SWwrap mode) +{ + switch (mode) { + case SW_REPEAT: + sw_map_repeat(out, in, max); + break; + case SW_CLAMP_TO_EDGE: + sw_map_clamp_to_edge(out, in, max); + break; + case SW_MIRRORED_REPEAT: + sw_map_mirrored_repeat(out, in, max); + break; + } +} + +static inline void sw_sample_texture_nearest(float* color, const sw_texture_t* tex, float u, float v) +{ + int x, y; + sw_map(&x, u, tex->width, tex->sWrap); + sw_map(&y, v, tex->height, tex->tWrap); + sw_get_pixel(color, tex->pixels, y * tex->width + x, tex->format); +} + +static inline void sw_sample_texture_bilinear(float* color, const sw_texture_t* tex, float u, float v) +{ + int x0, y0, x1, y1; + sw_map(&x0, u, tex->width, tex->sWrap); + sw_map(&y0, v, tex->height, tex->tWrap); + sw_map(&x1, u + tex->tx, tex->width, tex->sWrap); + sw_map(&y1, v + tex->ty, tex->height, tex->tWrap); + + float fx = u * (tex->width - 1) - x0; + float fy = v * (tex->height - 1) - y0; + + float c00[4], c10[4], c01[4], c11[4]; + sw_get_pixel(c00, tex->pixels, y0 * tex->width + x0, tex->format); + sw_get_pixel(c10, tex->pixels, y0 * tex->width + x1, tex->format); + sw_get_pixel(c01, tex->pixels, y1 * tex->width + x0, tex->format); + sw_get_pixel(c11, tex->pixels, y1 * tex->width + x1, tex->format); + + float c0[4], c1[4]; + for (int i = 0; i < 4; i++) { + float a = sw_lerp(c00[i], c10[i], fx); + float b = sw_lerp(c01[i], c11[i], fx); + color[i] = sw_lerp(a, b, fy); + } +} + +static inline void sw_sample_texture(float* color, const sw_texture_t* tex, float u, float v, + float xDu, float yDu, float xDv, float yDv) +{ + // TODO: It seems there are some incorrect detections depending on the context + // This is probably due to the fact that the fractions are obtained + // at the wrong moment during rasterization. It would be worth reviewing + // this, although the scanline method complicates things. + + // Calculate the derivatives for each axis + float du = sqrtf(xDu * xDu + yDu * yDu); + float dv = sqrtf(xDv * xDv + yDv * yDv); + float L = (du > dv) ? du : dv; + + // Select the filter based on the size of the footprint + if (L > 1.0f) { + // Minification + if (tex->minFilter == SW_NEAREST) { + sw_sample_texture_nearest(color, tex, u, v); + } else if (tex->minFilter == SW_LINEAR) { + sw_sample_texture_bilinear(color, tex, u, v); + } + } else { + // Magnification + if (tex->magFilter == SW_NEAREST) { + sw_sample_texture_nearest(color, tex, u, v); + } else if (tex->magFilter == SW_LINEAR) { + sw_sample_texture_bilinear(color, tex, u, v); + } + } +} + +static inline bool sw_clip_polygon_w(sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON_VERTICES], int* vertexCounter) +{ + sw_vertex_t input[SW_MAX_CLIPPED_POLYGON_VERTICES]; + for (int i = 0; i < SW_MAX_CLIPPED_POLYGON_VERTICES; i++) { + input[i] = polygon[i]; + } + + int inputCounter = *vertexCounter; + *vertexCounter = 0; + + const sw_vertex_t *prevVt = &input[inputCounter-1]; + char prevDot = (prevVt->homogeneous[3] < SW_CLIP_EPSILON) ? -1 : 1; + + for (int i = 0; i < inputCounter; i++) { + char currDot = (input[i].homogeneous[3] < SW_CLIP_EPSILON) ? -1 : 1; + if (prevDot*currDot < 0) { + polygon[(*vertexCounter)++] = sw_lerp_vertex(prevVt, &input[i], + (SW_CLIP_EPSILON - prevVt->homogeneous[3]) / (input[i].homogeneous[3] - prevVt->homogeneous[3])); + } + if (currDot > 0) { + polygon[(*vertexCounter)++] = input[i]; + } + prevDot = currDot; + prevVt = &input[i]; + } + + return *vertexCounter > 0; +} + +static inline bool sw_clip_polygon_xyz(sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON_VERTICES], int* vertexCounter) +{ + for (int iAxis = 0; iAxis < 3; iAxis++) + { + if (*vertexCounter == 0) return false; + + sw_vertex_t input[SW_MAX_CLIPPED_POLYGON_VERTICES]; + int inputCounter; + + const sw_vertex_t *prevVt; + char prevDot; + + // Clip against first plane + + for (int i = 0; i < SW_MAX_CLIPPED_POLYGON_VERTICES; i++) { + input[i] = polygon[i]; + } + inputCounter = *vertexCounter; + *vertexCounter = 0; + + prevVt = &input[inputCounter-1]; + prevDot = (prevVt->homogeneous[iAxis] <= prevVt->homogeneous[3]) ? 1 : -1; + + for (int i = 0; i < inputCounter; i++) { + char currDot = (input[i].homogeneous[iAxis] <= input[i].homogeneous[3]) ? 1 : -1; + if (prevDot * currDot <= 0) { + polygon[(*vertexCounter)++] = sw_lerp_vertex(prevVt, &input[i], (prevVt->homogeneous[3] - prevVt->homogeneous[iAxis]) / + ((prevVt->homogeneous[3] - prevVt->homogeneous[iAxis]) - (input[i].homogeneous[3] - input[i].homogeneous[iAxis]))); + } + if (currDot > 0) { + polygon[(*vertexCounter)++] = input[i]; + } + prevDot = currDot; + prevVt = &input[i]; + } + + if (*vertexCounter == 0) return false; + + // Clip against opposite plane + + for (int i = 0; i < SW_MAX_CLIPPED_POLYGON_VERTICES; i++) { + input[i] = polygon[i]; + } + inputCounter = *vertexCounter; + *vertexCounter = 0; + + prevVt = &input[inputCounter-1]; + prevDot = (-prevVt->homogeneous[iAxis] <= prevVt->homogeneous[3]) ? 1 : -1; + + for (int i = 0; i < inputCounter; i++) { + char currDot = (-input[i].homogeneous[iAxis] <= input[i].homogeneous[3]) ? 1 : -1; + if (prevDot*currDot <= 0) { + polygon[(*vertexCounter)++] = sw_lerp_vertex(prevVt, &input[i], (prevVt->homogeneous[3] + prevVt->homogeneous[iAxis]) / + ((prevVt->homogeneous[3] + prevVt->homogeneous[iAxis]) - (input[i].homogeneous[3] + input[i].homogeneous[iAxis]))); + } + if (currDot > 0) { + polygon[(*vertexCounter)++] = input[i]; + } + prevDot = currDot; + prevVt = &input[i]; + } + } + + return *vertexCounter > 0; +} + +void sw_project_and_clip_triangle(sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON_VERTICES], int* vertexCounter) +{ + for (int i = 0; i < *vertexCounter; i++) { + sw_vertex_t *v = polygon + i; + for (int j = 0; j < 4; j++) v->homogeneous[j] = v->position[j]; + sw_vec4_transform(v->homogeneous, v->homogeneous, RLSW.matMVP); + } + + if (sw_clip_polygon_w(polygon, vertexCounter) && sw_clip_polygon_xyz(polygon, vertexCounter)) { + for (int i = 0; i < *vertexCounter; i++) { + sw_vertex_t *v = polygon + i; + + // Calculation of the reciprocal of W for normalization + // as well as perspective correct attributes + v->homogeneous[3] = 1.0f / v->homogeneous[3]; + + // Division of XYZ coordinates by weight + v->homogeneous[0] *= v->homogeneous[3]; + v->homogeneous[1] *= v->homogeneous[3]; + v->homogeneous[2] *= v->homogeneous[3]; + + // Division of texture coordinates (perspective correct) + v->texcoord[0] *= v->homogeneous[3]; + v->texcoord[1] *= v->homogeneous[3]; + + // Transform to screen space + v->screen[0] = RLSW.vpPos[0] + (v->homogeneous[0] + 1.0f) * 0.5f * RLSW.vpDim[0]; + v->screen[1] = RLSW.vpPos[1] + (v->homogeneous[1] + 1.0f) * 0.5f * RLSW.vpDim[1]; + } + } +} + +#define DEFINE_RASTER_SCANLINE(FUNC_NAME, ENABLE_TEXTURE, ENABLE_DEPTH_TEST) \ +void FUNC_NAME(const sw_texture_t* tex, const sw_vertex_t* start, \ + const sw_vertex_t* end, float yDu, float yDv) \ +{ \ + /* Calculate the horizontal width and avoid division by zero */ \ + float dx = end->screen[0] - start->screen[0]; \ + if (fabsf(dx) < 1e-4f) return; \ + \ + /* Convert and center the screen coordinates */ \ + int xStart = (int)(start->screen[0] + 0.5f); \ + int xEnd = (int)(end->screen[0] + 0.5f); \ + int y = (int)(start->screen[1] + 0.5f); \ + \ + /* Calculate the initial interpolation parameter and its increment */ \ + float dt = 1.0f / dx; \ + float t = (xStart - start->screen[0]) * dt; \ + \ + float xDu, xDv; \ + if (ENABLE_TEXTURE) { \ + /* Calculate the horizontal gradients for UV coordinates */ \ + xDu = (end->texcoord[0] - start->texcoord[0]) * dt; \ + xDv = (end->texcoord[1] - start->texcoord[1]) * dt; \ + } \ + \ + /* Pre-calculate the color differences for interpolation */ \ + float dcol[4]; \ + for (int i = 0; i < 4; i++) { \ + dcol[i] = end->color[i] - start->color[i]; \ + } \ + \ + /* Pre-calculate the differences in Z and W \ + (for depth testing and perspective correction) */ \ + float dz = end->homogeneous[2] - start->homogeneous[2]; \ + float dw = end->homogeneous[3] - start->homogeneous[3]; \ + \ + float u, v; \ + if (ENABLE_TEXTURE) { \ + /* Initialize the interpolated texture coordinates */ \ + u = start->texcoord[0] + t * xDu; \ + v = start->texcoord[1] + t * xDv; \ + } \ + \ + /* Pre-calculate the starting pointer for the color framebuffer row */ \ + uint8_t* row_ptr = (uint8_t*)((uint32_t*)RLSW.framebuffer.color + y * RLSW.framebuffer.width); \ + uint8_t* dst = row_ptr + xStart * 4; \ + \ + /* Pre-calculate the pointer for the depth buffer row */ \ + uint16_t* depth_row = RLSW.framebuffer.depth + y * RLSW.framebuffer.width + xStart; \ + uint16_t* dptr = depth_row; \ + \ + /* Scanline rasterization loop */ \ + for (int x = xStart; x < xEnd; x++) { \ + /* Interpolate Z and W for depth testing and perspective correction */ \ + float w = 1.0f / (start->homogeneous[3] + t * dw); \ + float z = start->homogeneous[2] + t * dz; \ + \ + if (ENABLE_DEPTH_TEST) { \ + /* Depth testing with direct access to the depth buffer */ \ + /* TODO: Implement different depth funcs? */ \ + float depth = (float)(*dptr) / UINT16_MAX; \ + if (z > depth) goto discard; \ + } \ + \ + /* Update the depth buffer */ \ + *dptr = (uint16_t)(z * UINT16_MAX); \ + \ + if (ENABLE_TEXTURE) \ + { \ + /* Sample the texture */ \ + float texColor[4]; \ + sw_sample_texture(texColor, tex, u * w, v * w, xDu, yDu, xDv, yDv); \ + \ + /* Interpolate the color and modulate by the texture color */ \ + for (int i = 0; i < 4; i++) { \ + float lerp = start->color[i] + t * dcol[i]; \ + float finalColor = texColor[i] * lerp; \ + /* Inline clamp to keep the value between 0 and 1 */ \ + /* NOTE: The need for clamp the colors could be a sign of problem during interpolation (?) */ \ + finalColor = (finalColor < 0.0f) ? 0.0f : (finalColor > 1.0f ? 1.0f : finalColor); \ + dst[i] = (uint8_t)(finalColor * 255.0f); \ + } \ + } \ + else \ + { \ + /* Interpolate the color */ \ + for (int i = 0; i < 4; i++) { \ + float finalColor = start->color[i] + t * dcol[i]; \ + /* Inline clamp to keep the value between 0 and 1 */ \ + /* NOTE: The need for clamp the colors could be a sign of problem during interpolation (?) */ \ + finalColor = (finalColor < 0.0f) ? 0.0f : (finalColor > 1.0f ? 1.0f : finalColor); \ + dst[i] = (uint8_t)(finalColor * 255.0f); \ + } \ + } \ + \ + /* Increment the interpolation parameter, UVs, and pointers */ \ + discard: \ + t += dt; \ + dst += 4; \ + dptr++; \ + if (ENABLE_TEXTURE) { \ + u += xDu; \ + v += xDv; \ + } \ + } \ +} + +#define DEFINE_RASTER_TRIANGLE(FUNC_NAME, FUNC_SCANLINE, ENABLE_TEXTURE) \ +void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1, const sw_vertex_t* v2, \ + const sw_texture_t* tex) \ +{ \ + /* Swap vertices by increasing y */ \ + if (v0->screen[1] > v1->screen[1]) { const sw_vertex_t* tmp = v0; v0 = v1; v1 = tmp; } \ + if (v1->screen[1] > v2->screen[1]) { const sw_vertex_t* tmp = v1; v1 = v2; v2 = tmp; } \ + if (v0->screen[1] > v1->screen[1]) { const sw_vertex_t* tmp = v0; v0 = v1; v1 = tmp; } \ + \ + /* Extracting coordinates from the sorted vertices */ \ + float x0 = v0->screen[0], y0 = v0->screen[1]; \ + float x1 = v1->screen[0], y1 = v1->screen[1]; \ + float x2 = v2->screen[0], y2 = v2->screen[1]; \ + \ + /* Reject degenerate triangles */ \ + float height = y2 - y0; \ + if (height < 1e-4f) return; \ + \ + /* Precompute the inverse of the triangle height and */ \ + /* edge lengths with checks to avoid division by zero. */ \ + float inv_height = 1.0f / height; \ + float inv_y1y0 = (y1 - y0 > 1e-4f) ? 1.0f / (y1 - y0) : 0.0f; \ + float inv_y2y1 = (y2 - y1 > 1e-4f) ? 1.0f / (y2 - y1) : 0.0f; \ + \ + /* Pre-calculation of slopes (dx/dy) */ \ + float dx02 = (x2 - x0) * inv_height; \ + float dx01 = (x1 - x0) * inv_y1y0; \ + float dx12 = (x2 - x1) * inv_y2y1; \ + \ + /* Y bounds (vertical clipping) */ \ + int yTop = (int)(y0 + 0.5f); \ + int yMiddle = (int)(y1 + 0.5f); \ + int yBottom = (int)(y2 + 0.5f); \ + \ + /* Global calculation of vertical texture gradients for the triangle */ \ + float yDu, yDv; \ + if (ENABLE_TEXTURE) { \ + yDu = (v2->texcoord[0] - v0->texcoord[0]) * inv_height; \ + yDv = (v2->texcoord[1] - v0->texcoord[1]) * inv_height; \ + } \ + \ + /* Initializing scanline variables */ \ + float xLeft = x0, xRight = x0; \ + sw_vertex_t start, end; \ + \ + /* Scanline for the upper part of the triangle */ \ + for (int y = yTop; y < yMiddle; y++) { \ + float dy = (float)y - y0; \ + float t1 = dy * inv_height; \ + float t2 = dy * inv_y1y0; \ + \ + /* Vertex interpolation */ \ + start = sw_lerp_vertex(v0, v2, t1); \ + end = sw_lerp_vertex(v0, v1, t2); \ + start.screen[0] = xLeft; \ + start.screen[1] = (float)y; \ + end.screen[0] = xRight; \ + end.screen[1] = (float)y; \ + \ + if (xLeft > xRight) { sw_vertex_t tmp = start; start = end; end = tmp; } \ + FUNC_SCANLINE(tex, &start, &end, yDu, yDv); \ + \ + /* Incremental update */ \ + xLeft += dx02; \ + xRight += dx01; \ + } \ + \ + /* Scanline for the lower part of the triangle */ \ + xRight = x1; /* Restart the right side from the second vertex */ \ + for (int y = yMiddle; y < yBottom; y++) { \ + float dy = (float)y - y0; \ + float t1 = dy * inv_height; \ + float t2 = (float)(y - y1) * inv_y2y1; \ + \ + /* Vertex interpolation */ \ + start = sw_lerp_vertex(v0, v2, t1); \ + end = sw_lerp_vertex(v1, v2, t2); \ + start.screen[0] = xLeft; \ + start.screen[1] = (float)y; \ + end.screen[0] = xRight; \ + end.screen[1] = (float)y; \ + \ + if (xLeft > xRight) { sw_vertex_t tmp = start; start = end; end = tmp; } \ + FUNC_SCANLINE(tex, &start, &end, yDu, yDv); \ + \ + /* Incremental update */ \ + xLeft += dx02; \ + xRight += dx12; \ + } \ +} + +DEFINE_RASTER_SCANLINE(sw_raster_scanline, false, false) +DEFINE_RASTER_SCANLINE(sw_raster_scanline_tex, true, false) +DEFINE_RASTER_SCANLINE(sw_raster_scanline_depth, false, true) +DEFINE_RASTER_SCANLINE(sw_raster_scanline_tex_depth, true, true) + +DEFINE_RASTER_TRIANGLE(sw_raster_triangle, sw_raster_scanline, false) +DEFINE_RASTER_TRIANGLE(sw_raster_triangle_tex, sw_raster_scanline_tex, true) +DEFINE_RASTER_TRIANGLE(sw_raster_triangle_depth, sw_raster_scanline_depth, false) +DEFINE_RASTER_TRIANGLE(sw_raster_triangle_tex_depth, sw_raster_scanline_tex_depth, true) + +void sw_render_triangle(const sw_vertex_t* v0, const sw_vertex_t* v1, const sw_vertex_t* v2) +{ + int vertexCounter = 3; + + sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON_VERTICES]; + polygon[0] = *v0; + polygon[1] = *v1; + polygon[2] = *v2; + + sw_project_and_clip_triangle(polygon, &vertexCounter); + + if (vertexCounter < 3) { + return; + } + + if ((RLSW.stateFlags & SW_STATE_TEXTURE_2D) && (RLSW.stateFlags & SW_STATE_DEPTH_TEST)) { + for (int_fast8_t i = 0; i < vertexCounter - 2; i++) { + sw_raster_triangle_tex_depth( + &polygon[0], &polygon[i + 1], &polygon[i + 2], + &RLSW.loadedTextures[RLSW.currentTexture] + ); + } + } + else if (RLSW.stateFlags & SW_STATE_TEXTURE_2D) { + for (int_fast8_t i = 0; i < vertexCounter - 2; i++) { + sw_raster_triangle_tex( + &polygon[0], &polygon[i + 1], &polygon[i + 2], + &RLSW.loadedTextures[RLSW.currentTexture] + ); + } + } + else if (RLSW.stateFlags & SW_STATE_DEPTH_TEST) { + for (int_fast8_t i = 0; i < vertexCounter - 2; i++) { + sw_raster_triangle_depth( + &polygon[0], &polygon[i + 1], &polygon[i + 2], + &RLSW.loadedTextures[RLSW.currentTexture] + ); + } + } + else { + for (int_fast8_t i = 0; i < vertexCounter - 2; i++) { + sw_raster_triangle( + &polygon[0], &polygon[i + 1], &polygon[i + 2], + &RLSW.loadedTextures[RLSW.currentTexture] + ); + } + } +} + +static inline bool sw_is_texture_id_valid(uint32_t id) +{ + bool valid = true; + + if (id == 0) valid = false; + else if (id >= SW_MAX_TEXTURES) valid = false; + else if (RLSW.loadedTextures[id].pixels == 0) valid = false; + + return true; +} + +static inline bool sw_is_texture_filter_valid(int filter) +{ + return (filter == SW_NEAREST || filter == SW_LINEAR); +} + +static inline bool sw_is_texture_wrap_valid(int wrap) +{ + return (wrap == SW_REPEAT || wrap == SW_CLAMP_TO_EDGE || SW_MIRRORED_REPEAT); +} + +/* === Public Implementation === */ + +void swInit(int w, int h) +{ + swViewport(0, 0, w, h); + + RLSW.framebuffer.color = SW_MALLOC(4 * w * h); + RLSW.framebuffer.depth = SW_MALLOC(2 * w * h); + + RLSW.framebuffer.width = w; + RLSW.framebuffer.height = h; + + RLSW.loadedTextures = SW_MALLOC(SW_MAX_TEXTURES); + RLSW.freeTextureIds = SW_MALLOC(SW_MAX_TEXTURES); + + RLSW.clearColor[0] = 0; + RLSW.clearColor[1] = 0; + RLSW.clearColor[2] = 0; + RLSW.clearColor[3] = 255; + RLSW.clearDepth = UINT16_MAX; + + RLSW.currentMatrixMode = SW_MODELVIEW; + RLSW.currentMatrix = &RLSW.matView; + + sw_matrix_id(RLSW.matProjection); + sw_matrix_id(RLSW.matTexture); + sw_matrix_id(RLSW.matModel); + sw_matrix_id(RLSW.matView); + + RLSW.vertexBuffer[0].color[0] = 1.0f; + RLSW.vertexBuffer[0].color[1] = 1.0f; + RLSW.vertexBuffer[0].color[2] = 1.0f; + RLSW.vertexBuffer[0].color[3] = 1.0f; + + RLSW.vertexBuffer[0].texcoord[0] = 0.0f; + RLSW.vertexBuffer[0].texcoord[1] = 0.0f; + + RLSW.vertexBuffer[0].normal[0] = 0.0f; + RLSW.vertexBuffer[0].normal[1] = 0.0f; + RLSW.vertexBuffer[0].normal[2] = 1.0f; + + static const float defTex[3*2*2] = + { + 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, + }; + + RLSW.loadedTextures[0].pixels = defTex; + RLSW.loadedTextures[0].width = 2; + RLSW.loadedTextures[0].height = 2; + RLSW.loadedTextures[0].format = SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32; + RLSW.loadedTextures[0].minFilter = SW_NEAREST; + RLSW.loadedTextures[0].magFilter = SW_NEAREST; + RLSW.loadedTextures[0].sWrap = SW_REPEAT; + RLSW.loadedTextures[0].tWrap = SW_REPEAT; + RLSW.loadedTextures[0].tx = 0.5f; + RLSW.loadedTextures[0].ty = 0.5f; + + RLSW.loadedTextureCount = 1; +} + +void swClose(void) +{ + SW_FREE(RLSW.framebuffer.color); + SW_FREE(RLSW.framebuffer.depth); + + SW_FREE(RLSW.loadedTextures); + SW_FREE(RLSW.freeTextureIds); +} + +void swEnable(SWstate state) +{ + switch (state) { + case SW_TEXTURE_2D: + RLSW.stateFlags |= SW_STATE_TEXTURE_2D; + break; + case SW_DEPTH_TEST: + RLSW.stateFlags |= SW_STATE_DEPTH_TEST; + break; + case SW_CULL_FACE: + RLSW.stateFlags |= SW_STATE_CULL_FACE; + break; + default: + RLSW.errCode = SW_INVALID_ENUM; + break; + } +} + +void swDisable(SWstate state) +{ + switch (state) { + case SW_TEXTURE_2D: + RLSW.stateFlags &= ~SW_STATE_TEXTURE_2D; + break; + case SW_DEPTH_TEST: + RLSW.stateFlags &= ~SW_STATE_DEPTH_TEST; + break; + case SW_CULL_FACE: + RLSW.stateFlags &= ~SW_STATE_CULL_FACE; + break; + default: + RLSW.errCode = SW_INVALID_ENUM; + break; + } +} + +void* swGetColorBuffer(int* w, int* h) +{ + if (w) *w = RLSW.framebuffer.width; + if (h) *h = RLSW.framebuffer.height; + + return RLSW.framebuffer.color; +} + +void swMatrixMode(SWmatrix mode) +{ + switch (mode) { + case SW_PROJECTION: + RLSW.currentMatrix = &RLSW.matProjection; + break; + case SW_MODELVIEW: + RLSW.currentMatrix = RLSW.modelMatrixUsed + ? &RLSW.matModel : &RLSW.matView; + break; + case SW_TEXTURE: + RLSW.currentMatrix = &RLSW.matTexture; + break; + default: + RLSW.errCode = SW_INVALID_ENUM; + return; + } + + RLSW.currentMatrixMode = mode; +} + +void swPushMatrix(void) +{ + switch (RLSW.currentMatrixMode) { + + case SW_PROJECTION: + if (RLSW.stackProjectionCounter >= SW_MAX_PROJECTION_STACK_SIZE) { + RLSW.errCode = SW_STACK_OVERFLOW; + return; + } + for (int i = 0; i < 16; i++) { + RLSW.stackProjection[RLSW.stackProjectionCounter][i] = RLSW.matProjection[i]; + } + RLSW.stackProjectionCounter++; + break; + + case SW_MODELVIEW: + if (RLSW.stackModelviewCounter >= SW_MAX_MODELVIEW_STACK_SIZE) { + RLSW.errCode = SW_STACK_OVERFLOW; + return; + } + if (RLSW.modelMatrixUsed) { + for (int i = 0; i < 16; i++) { + RLSW.stackModelview[RLSW.stackModelviewCounter][i] = RLSW.matModel[i]; + } + RLSW.stackModelviewCounter++; + } else { + RLSW.currentMatrix = &RLSW.matModel; + RLSW.modelMatrixUsed = true; + } + break; + + case SW_TEXTURE: + if (RLSW.stackTextureCounter >= SW_MAX_TEXTURE_STACK_SIZE) { + RLSW.errCode = SW_STACK_OVERFLOW; + return; + } + for (int i = 0; i < 16; i++) { + RLSW.stackTexture[RLSW.stackTextureCounter][i] = RLSW.matTexture[i]; + } + RLSW.stackTextureCounter++; + break; + + } +} + +void swPopMatrix(void) +{ + switch (RLSW.currentMatrixMode) { + + case SW_PROJECTION: + if (RLSW.stackProjectionCounter <= 0) { + RLSW.errCode = SW_STACK_UNDERFLOW; + return; + } + RLSW.stackProjectionCounter--; + for (int i = 0; i < 16; i++) { + RLSW.matProjection[i] = RLSW.stackProjection[RLSW.stackProjectionCounter][i]; + } + break; + + case SW_MODELVIEW: + if (RLSW.stackModelviewCounter == 0) { + if (!RLSW.modelMatrixUsed) { + RLSW.errCode = SW_STACK_UNDERFLOW; + return; + } + sw_matrix_id(RLSW.matModel); + RLSW.currentMatrix = &RLSW.matView; + RLSW.modelMatrixUsed = false; + } else { + RLSW.stackModelviewCounter--; + for (int i = 0; i < 16; i++) { + RLSW.matModel[i] = RLSW.stackModelview[RLSW.stackModelviewCounter][i]; + } + } + break; + + case SW_TEXTURE: + if (RLSW.stackTextureCounter <= 0) { + RLSW.errCode = SW_STACK_UNDERFLOW; + return; + } + RLSW.stackTextureCounter--; + for (int i = 0; i < 16; i++) { + RLSW.matTexture[i] = RLSW.stackTexture[RLSW.stackTextureCounter][i]; + } + break; + + } +} + +void swLoadIdentity(void) +{ + sw_matrix_id(*RLSW.currentMatrix); +} + +void swTranslatef(float x, float y, float z) +{ + sw_matrix_t mat; + sw_matrix_id(mat); + + mat[12] = x; + mat[13] = y; + mat[14] = z; + + sw_matrix_mul(*RLSW.currentMatrix, mat, *RLSW.currentMatrix); +} + +void swRotatef(float angle, float x, float y, float z) +{ + angle *= SW_DEG2RAD; + + sw_matrix_t mat; + sw_matrix_id(mat); + + float lengthSq = x*x + y*y + z*z; + + if (lengthSq != 1.0f && lengthSq != 0.0f) { + float invLenght = 1.0f / lengthSq; + x *= invLenght; + y *= invLenght; + z *= invLenght; + } + + float sinres = sinf(angle); + float cosres = cosf(angle); + float t = 1.0f - cosres; + + mat[0] = x*x*t + cosres; + mat[1] = y*x*t + z*sinres; + mat[2] = z*x*t - y*sinres; + + mat[4] = x*y*t - z*sinres; + mat[5] = y*y*t + cosres; + mat[6] = z*y*t + x*sinres; + + mat[8] = x*z*t + y*sinres; + mat[9] = y*z*t - x*sinres; + mat[10] = z*z*t + cosres; + + sw_matrix_mul(*RLSW.currentMatrix, mat, *RLSW.currentMatrix); +} + +void swScalef(float x, float y, float z) +{ + sw_matrix_t mat; + + mat[0] = x, mat[1] = 0, mat[2] = 0, mat[3] = 0; + mat[4] = 0, mat[5] = y, mat[6] = 0, mat[7] = 0; + mat[8] = 0, mat[9] = 0, mat[10] = z, mat[11] = 0; + mat[12] = 0, mat[13] = 0, mat[14] = 0, mat[15] = 1; + + sw_matrix_mul(*RLSW.currentMatrix, mat, *RLSW.currentMatrix); +} + +void swMultMatrixf(const float* mat) +{ + sw_matrix_mul(*RLSW.currentMatrix, *RLSW.currentMatrix, mat); +} + +void swFrustum(float left, float right, float bottom, float top, float znear, float zfar) +{ + sw_matrix_t mat = { 0 }; + + float rl = right - left; + float tb = top - bottom; + float fn = zfar - znear; + + mat[0] = (znear * 2.0f) / rl; + mat[5] = (znear * 2.0f) / tb; + + mat[8] = (right + left) / rl; + mat[9] = (top + bottom) / tb; + mat[10] = -(zfar + znear) / fn; + mat[11] = -1.0f; + + mat[14] = -(zfar * znear * 2.0f) / fn; + + sw_matrix_mul(*RLSW.currentMatrix, *RLSW.currentMatrix, mat); +} + +void swOrtho(float left, float right, float bottom, float top, float znear, float zfar) +{ + sw_matrix_t mat = { 0 }; + + float rl = (right - left); + float tb = (top - bottom); + float fn = (zfar - znear); + + mat[0] = 2.0f / rl; + mat[5] = 2.0f / tb; + + mat[10] = -2.0f / fn; + mat[11] = 0.0f; + mat[12] = -(left + right) / rl; + mat[13] = -(top + bottom) / tb; + + mat[14] = -(zfar + znear) / fn; + mat[15] = 1.0f; + + sw_matrix_mul(*RLSW.currentMatrix, *RLSW.currentMatrix, mat); +} + +void swViewport(int x, int y, int width, int height) +{ + if (x <= -width || y <= -height) { + RLSW.errCode = SW_INVALID_OPERATION; + return; + } + + RLSW.vpPos[0] = x; + RLSW.vpPos[1] = y; + + RLSW.vpDim[0] = width - 1; + RLSW.vpDim[1] = height - 1; + + RLSW.vpMin[0] = (x < 0) ? 0 : x; + RLSW.vpMin[1] = (y < 0) ? 0 : y; + + int fbW = RLSW.framebuffer.width - 1; + int fbH = RLSW.framebuffer.height - 1; + + int vpMaxX = x + width; + int vpMaxY = y + height; + + RLSW.vpMax[0] = (vpMaxX < fbW) ? vpMaxX : fbW; + RLSW.vpMax[1] = (vpMaxY < fbH) ? vpMaxY : fbH; +} + +void swClearColor(float r, float g, float b, float a) +{ + RLSW.clearColor[0] = r * 255; + RLSW.clearColor[1] = g * 255; + RLSW.clearColor[2] = b * 255; + RLSW.clearColor[3] = a * 255; +} + +void swClear(void) +{ + int size = RLSW.framebuffer.width * RLSW.framebuffer.height; + + for (int i = 0; i < size; i++) { + ((uint32_t*)RLSW.framebuffer.color)[i] = *((uint32_t*)RLSW.clearColor); + RLSW.framebuffer.depth[i] = RLSW.clearDepth; + } +} + +void swBegin(SWfill mode) +{ + if (mode < SW_POINTS || mode > SW_QUADS) { + RLSW.errCode = SW_INVALID_ENUM; + return; + } + RLSW.vertexCounter = 0; + RLSW.fillMode = mode; +} + +void swEnd(void) +{ + RLSW.vertexCounter = 0; +} + +void swVertex2i(int x, int y) +{ + float v[4] = { (float)x, (float)y, 0.0f, 1.0f }; + swVertex4fv(v); +} + +void swVertex2f(float x, float y) +{ + float v[4] = { x, y, 0.0f, 1.0f }; + swVertex4fv(v); +} + +void swVertex2fv(const float* v) +{ + float v4[4] = { v[0], v[1], 0.0f, 1.0f }; + swVertex4fv(v4); +} + +void swVertex3i(int x, int y, int z) +{ + float v[4] = { (float)x, (float)y, (float)z, 1.0f }; + swVertex4fv(v); +} + +void swVertex3f(float x, float y, float z) +{ + float v[4] = { x, y, z, 1.0f }; + swVertex4fv(v); +} + +void swVertex3fv(const float* v) +{ + float v4[4] = { v[0], v[1], v[2], 1.0f }; + swVertex4fv(v4); +} + +void swVertex4i(int x, int y, int z, int w) +{ + float v[4] = { (float)x, (float)y, (float)z, (float)w }; + swVertex4fv(v); +} + +void swVertex4f(float x, float y, float z, float w) +{ + float v[4] = { x, y, z, w }; + swVertex4fv(v); +} + +void swVertex4fv(const float* v) +{ + for (int i = 0; i < 4; i++) { + RLSW.vertexBuffer[RLSW.vertexCounter].position[i] = v[i]; + } + RLSW.vertexCounter++; + + int neededVertices = 0; + switch (RLSW.fillMode) { + case SW_POINTS: + neededVertices = 1; + break; + case SW_LINES: + neededVertices = 2; + break; + case SW_TRIANGLES: + neededVertices = 3; + break; + case SW_QUADS: + neededVertices = 4; + break; + } + + if (RLSW.vertexCounter == neededVertices) { + + // TODO: Optimize MVP calculation + sw_matrix_mul(RLSW.matMVP, RLSW.matModel, RLSW.matView); + sw_matrix_mul(RLSW.matMVP, RLSW.matMVP, RLSW.matProjection); + + switch (RLSW.fillMode) { + case SW_POINTS: + break; + case SW_LINES: + neededVertices = 2; + break; + case SW_TRIANGLES: + sw_render_triangle( + &RLSW.vertexBuffer[0], + &RLSW.vertexBuffer[1], + &RLSW.vertexBuffer[2] + ); + break; + case SW_QUADS: + sw_render_triangle( + &RLSW.vertexBuffer[0], + &RLSW.vertexBuffer[1], + &RLSW.vertexBuffer[2] + ); + sw_render_triangle( + &RLSW.vertexBuffer[2], + &RLSW.vertexBuffer[3], + &RLSW.vertexBuffer[0] + ); + break; + } + + RLSW.vertexBuffer[0] = RLSW.vertexBuffer[neededVertices - 1]; + RLSW.vertexCounter = 0; + } + else { + RLSW.vertexBuffer[RLSW.vertexCounter] = RLSW.vertexBuffer[RLSW.vertexCounter - 1]; + } +} + +void swColor1ui(uint32_t color) +{ + union { + uint32_t v; + uint8_t a[4]; + } c = { .v = color }; + + float cv[4]; + cv[0] = (float)c.a[0] / 255; + cv[1] = (float)c.a[1] / 255; + cv[2] = (float)c.a[2] / 255; + cv[3] = (float)c.a[3] / 255; + + swColor4fv(cv); +} + +void swColor3ub(uint8_t r, uint8_t g, uint8_t b) +{ + float cv[4]; + cv[0] = (float)r / 255; + cv[1] = (float)g / 255; + cv[2] = (float)b / 255; + cv[3] = 1.0f; + + swColor4fv(cv); +} + +void swColor3ubv(const uint8_t* v) +{ + float cv[4]; + cv[0] = (float)v[0] / 255; + cv[1] = (float)v[1] / 255; + cv[2] = (float)v[2] / 255; + cv[3] = 1.0f; + + swColor4fv(cv); +} + +void swColor3us(uint16_t r, uint16_t g, uint16_t b) +{ + float cv[4]; + cv[0] = (float)((uint8_t)(r >> 8)) / 255; + cv[1] = (float)((uint8_t)(g >> 8)) / 255; + cv[2] = (float)((uint8_t)(b >> 8)) / 255; + cv[3] = 1.0f; + + swColor4fv(cv); +} + +void swColor3usv(const uint16_t* v) +{ + float cv[4]; + cv[0] = (float)((uint8_t)(v[0] >> 8)) / 255; + cv[1] = (float)((uint8_t)(v[1] >> 8)) / 255; + cv[2] = (float)((uint8_t)(v[2] >> 8)) / 255; + cv[3] = 1.0f; + + swColor4fv(cv); +} + +void swColor3ui(uint32_t r, uint32_t g, uint32_t b) +{ + float cv[4]; + cv[0] = (float)((uint8_t)(r >> 24)) / 255; + cv[1] = (float)((uint8_t)(g >> 24)) / 255; + cv[2] = (float)((uint8_t)(b >> 24)) / 255; + cv[3] = 1.0f; + + swColor4fv(cv); +} + +void swColor3uiv(const uint32_t* v) +{ + float cv[4]; + cv[0] = (float)((uint8_t)(v[0] >> 24)) / 255; + cv[1] = (float)((uint8_t)(v[1] >> 24)) / 255; + cv[2] = (float)((uint8_t)(v[2] >> 24)) / 255; + cv[3] = 1.0f; + + swColor4fv(cv); +} + +void swColor3f(float r, float g, float b) +{ + float cv[4]; + cv[0] = r; + cv[1] = g; + cv[2] = b; + cv[3] = 1.0f; + + swColor4fv(cv); +} + +void swColor3fv(const float* v) +{ + float cv[4]; + cv[0] = v[0]; + cv[1] = v[1]; + cv[2] = v[2]; + cv[3] = 1.0f; + + swColor4fv(cv); +} + +void swColor4ub(uint8_t r, uint8_t g, uint8_t b, uint8_t a) +{ + float cv[4]; + cv[0] = (float)r / 255; + cv[1] = (float)g / 255; + cv[2] = (float)b / 255; + cv[3] = (float)a / 255; + + swColor4fv(cv); +} + +void swColor4ubv(const uint8_t* v) +{ + float cv[4]; + cv[0] = (float)v[0] / 255; + cv[1] = (float)v[1] / 255; + cv[2] = (float)v[2] / 255; + cv[3] = (float)v[3] / 255; + + swColor4fv(cv); +} + +void swColor4us(uint16_t r, uint16_t g, uint16_t b, uint16_t a) +{ + float cv[4]; + cv[0] = (float)((uint8_t)(r >> 8)) / 255; + cv[1] = (float)((uint8_t)(g >> 8)) / 255; + cv[2] = (float)((uint8_t)(b >> 8)) / 255; + cv[3] = (float)((uint8_t)(a >> 8)) / 255; + + swColor4fv(cv); +} + +void swColor4usv(const uint16_t* v) +{ + float cv[4]; + cv[0] = (float)((uint8_t)(v[0] >> 8)) / 255; + cv[1] = (float)((uint8_t)(v[1] >> 8)) / 255; + cv[2] = (float)((uint8_t)(v[2] >> 8)) / 255; + cv[3] = (float)((uint8_t)(v[3] >> 8)) / 255; + + swColor4fv(cv); +} + +void swColor4ui(uint32_t r, uint32_t g, uint32_t b, uint32_t a) +{ + float cv[4]; + cv[0] = (float)((uint8_t)(r >> 24)) / 255; + cv[1] = (float)((uint8_t)(g >> 24)) / 255; + cv[2] = (float)((uint8_t)(b >> 24)) / 255; + cv[3] = (float)((uint8_t)(a >> 24)) / 255; + + swColor4fv(cv); +} + +void swColor4uiv(const uint32_t* v) +{ + float cv[4]; + cv[0] = (float)((uint8_t)(v[0] >> 24)) / 255; + cv[1] = (float)((uint8_t)(v[1] >> 24)) / 255; + cv[2] = (float)((uint8_t)(v[2] >> 24)) / 255; + cv[3] = (float)((uint8_t)(v[3] >> 24)) / 255; + + swColor4fv(cv); +} + +void swColor4f(float r, float g, float b, float a) +{ + float cv[4]; + cv[0] = r; + cv[1] = g; + cv[2] = b; + cv[3] = a; + + swColor4fv(cv); +} + +void swColor4fv(const float* v) +{ + for (int i = 0; i < 4; i++) { + RLSW.vertexBuffer[RLSW.vertexCounter].color[i] = v[i]; + } +} + +void swTexCoord2f(float u, float v) +{ + float s = RLSW.matTexture[0]*u + RLSW.matTexture[4]*v + RLSW.matTexture[12]; + float t = RLSW.matTexture[1]*u + RLSW.matTexture[5]*v + RLSW.matTexture[13]; + + RLSW.vertexBuffer[RLSW.vertexCounter].texcoord[0] = s; + RLSW.vertexBuffer[RLSW.vertexCounter].texcoord[1] = t; +} + +void swTexCoordfv(const float* v) +{ + float s = RLSW.matTexture[0]*v[0] + RLSW.matTexture[4]*v[1] + RLSW.matTexture[12]; + float t = RLSW.matTexture[1]*v[0] + RLSW.matTexture[5]*v[1] + RLSW.matTexture[13]; + + RLSW.vertexBuffer[RLSW.vertexCounter].texcoord[0] = s; + RLSW.vertexBuffer[RLSW.vertexCounter].texcoord[1] = t; +} + +void swNormal3f(float x, float y, float z) +{ + RLSW.vertexBuffer[RLSW.vertexCounter].normal[0] = x; + RLSW.vertexBuffer[RLSW.vertexCounter].normal[1] = y; + RLSW.vertexBuffer[RLSW.vertexCounter].normal[2] = z; +} + +void swNormal3fv(const float* v) +{ + RLSW.vertexBuffer[RLSW.vertexCounter].normal[0] = v[0]; + RLSW.vertexBuffer[RLSW.vertexCounter].normal[1] = v[1]; + RLSW.vertexBuffer[RLSW.vertexCounter].normal[2] = v[2]; +} + +void swBindArray(SWarray type, void *buffer) +{ + switch (type) { + case SW_VERTEX_ARRAY: + RLSW.array.positions = buffer; + break; + case SW_TEXTURE_COORD_ARRAY: + RLSW.array.texcoords = buffer; + break; + case SW_NORMAL_ARRAY: + RLSW.array.normals = buffer; + break; + case SW_COLOR_ARRAY: + RLSW.array.colors = buffer; + break; + default: + break; + } +} + +void swDrawArrays(SWfill mode, int offset, int count) +{ + if (RLSW.array.positions == 0) { + RLSW.errCode = SW_INVALID_OPERATION; + return; + } + + swBegin(mode); + + for (int i = offset; i < count; i++) { + if (RLSW.array.texcoords) { + swTexCoordfv(RLSW.array.texcoords + 2 * i); + } + if (RLSW.array.normals) { + swNormal3fv(RLSW.array.normals + 3 * i); + } + if (RLSW.array.colors) { + swColor4ubv(RLSW.array.colors + 4 * i); + } + swVertex3fv(RLSW.array.positions + 3 * i); + } + + swEnd(); +} + +uint32_t swLoadTexture(const void *data, int width, int height, int format, int mipmapCount) +{ + if (RLSW.loadedTextureCount >= SW_MAX_TEXTURES) { + RLSW.errCode = SW_STACK_OVERFLOW; //< Out of memory, not really stack overflow + return 0; + } + + sw_texture_t texture = { 0 }; + texture.pixels = data; + texture.width = width; + texture.height = height; + texture.format = format; + texture.minFilter = SW_NEAREST; + texture.magFilter = SW_NEAREST; + texture.sWrap = SW_REPEAT; + texture.tWrap = SW_REPEAT; + texture.tx = 1.0f / width; + texture.ty = 1.0f / height; + (void)mipmapCount; + + uint32_t id = 0; + if (RLSW.freeTextureIdCount > 0) { + id = RLSW.freeTextureIds[--RLSW.freeTextureIdCount]; + } + else { + id = RLSW.loadedTextureCount++; + } + + RLSW.loadedTextures[id] = texture; + + return id; +} + +void swUnloadTexture(uint32_t id) +{ + if (!sw_is_texture_id_valid(id)) { + RLSW.errCode = SW_INVALID_VALUE; + return; + } + + RLSW.loadedTextures[id].pixels = 0; + RLSW.freeTextureIds[RLSW.freeTextureIdCount++] = id; +} + +void swTextureParameters(uint32_t id, int param, int value) +{ + if (!sw_is_texture_id_valid(id)) { + RLSW.errCode = SW_INVALID_VALUE; + return; + } + + sw_texture_t* texture = &RLSW.loadedTextures[id]; + + switch (param) { + + case SW_TEXTURE_MIN_FILTER: + if (!sw_is_texture_filter_valid(value)) { + RLSW.errCode = SW_INVALID_ENUM; + return; + } + texture->minFilter = value; + break; + + case SW_TEXTURE_MAG_FILTER: + if (!sw_is_texture_filter_valid(value)) { + RLSW.errCode = SW_INVALID_ENUM; + return; + } + texture->magFilter = value; + break; + + case SW_TEXTURE_WRAP_S: + if (!sw_is_texture_wrap_valid(value)) { + RLSW.errCode = SW_INVALID_ENUM; + return; + } + texture->sWrap = value; + break; + + case SW_TEXTURE_WRAP_T: + if (!sw_is_texture_wrap_valid(value)) { + RLSW.errCode = SW_INVALID_ENUM; + return; + } + texture->tWrap = value; + break; + + default: + RLSW.errCode = SW_INVALID_ENUM; + return; + + } +} + +void swBindTexture(uint32_t id) +{ + if (id >= SW_MAX_TEXTURES) { + RLSW.errCode = SW_INVALID_VALUE; + return; + } + + if (id > 0 && RLSW.loadedTextures[id].pixels == 0) { + RLSW.errCode = SW_INVALID_OPERATION; + return; + } + + RLSW.currentTexture = id; +} + +#endif // RLSW_IMPL From f305490c33a382d35a732ab678d848b102600487 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Wed, 12 Mar 2025 18:20:05 +0100 Subject: [PATCH 03/43] branchless float saturation --- src/external/rlsw.h | 38 ++++++++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 6af7fcf84..6b7a7376a 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -444,6 +444,27 @@ static inline void sw_vec4_transform(float dst[4], const float v[4], const sw_ma } } +static inline float sw_saturate(float x) +{ + // After several comparisons, this saturation method + // seems to be the most optimized by GCC and Clang, + // and it does not produce any conditional branching. + + // However, it is possible that a clamp could be + // more efficient on certain platforms. + // Comparisons will need to be made. + + // SEE: https://godbolt.org/z/5qYznK5zj + + // Saturation from below: max(0, x) + float y = 0.5f * (x + fabsf(x)); + + // Saturation from above: min(1, y) + return y - 0.5f * ((y - 1.0f) + fabsf(y - 1.0f)); + + // return (x < 0.0f) ? 0.0f : ((x > 1.0f) ? 1.0f : x); +} + static inline float sw_lerp(float a, float b, float t) { return a + t * (b - a); @@ -700,8 +721,7 @@ static inline void sw_map_repeat(int* out, float in, int max) static inline void sw_map_clamp_to_edge(int* out, float in, int max) { - in = (in > 1.0f) ? 1.0f : ((in < 0.0f) ? 0.0f : in); - *out = (int)(in * (max - 1) + 0.5f); + *out = (int)(sw_saturate(in) * (max - 1) + 0.5f); } static inline void sw_map_mirrored_repeat(int* out, float in, int max) @@ -991,12 +1011,9 @@ void FUNC_NAME(const sw_texture_t* tex, const sw_vertex_t* start, \ /* Interpolate the color and modulate by the texture color */ \ for (int i = 0; i < 4; i++) { \ - float lerp = start->color[i] + t * dcol[i]; \ - float finalColor = texColor[i] * lerp; \ - /* Inline clamp to keep the value between 0 and 1 */ \ - /* NOTE: The need for clamp the colors could be a sign of problem during interpolation (?) */ \ - finalColor = (finalColor < 0.0f) ? 0.0f : (finalColor > 1.0f ? 1.0f : finalColor); \ - dst[i] = (uint8_t)(finalColor * 255.0f); \ + float finalColor = texColor[i]; \ + finalColor *= start->color[i] + t * dcol[i]; \ + dst[i] = (uint8_t)(sw_saturate(finalColor) * 255.0f); \ } \ } \ else \ @@ -1004,10 +1021,7 @@ void FUNC_NAME(const sw_texture_t* tex, const sw_vertex_t* start, /* Interpolate the color */ \ for (int i = 0; i < 4; i++) { \ float finalColor = start->color[i] + t * dcol[i]; \ - /* Inline clamp to keep the value between 0 and 1 */ \ - /* NOTE: The need for clamp the colors could be a sign of problem during interpolation (?) */ \ - finalColor = (finalColor < 0.0f) ? 0.0f : (finalColor > 1.0f ? 1.0f : finalColor); \ - dst[i] = (uint8_t)(finalColor * 255.0f); \ + dst[i] = (uint8_t)(sw_saturate(finalColor) * 255.0f); \ } \ } \ \ From b70f02fdf263632b7e9549044b6da58fe74476ba Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Wed, 12 Mar 2025 18:32:36 +0100 Subject: [PATCH 04/43] apply perspective correction to colors --- src/external/rlsw.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 6b7a7376a..d8155636c 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -930,6 +930,12 @@ void sw_project_and_clip_triangle(sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON_VER v->texcoord[0] *= v->homogeneous[3]; v->texcoord[1] *= v->homogeneous[3]; + // Division of colors (perspective correct) + v->color[0] *= v->homogeneous[3]; + v->color[1] *= v->homogeneous[3]; + v->color[2] *= v->homogeneous[3]; + v->color[3] *= v->homogeneous[3]; + // Transform to screen space v->screen[0] = RLSW.vpPos[0] + (v->homogeneous[0] + 1.0f) * 0.5f * RLSW.vpDim[0]; v->screen[1] = RLSW.vpPos[1] + (v->homogeneous[1] + 1.0f) * 0.5f * RLSW.vpDim[1]; @@ -1012,7 +1018,7 @@ void FUNC_NAME(const sw_texture_t* tex, const sw_vertex_t* start, /* Interpolate the color and modulate by the texture color */ \ for (int i = 0; i < 4; i++) { \ float finalColor = texColor[i]; \ - finalColor *= start->color[i] + t * dcol[i]; \ + finalColor *= (start->color[i] + t * dcol[i]) * w; \ dst[i] = (uint8_t)(sw_saturate(finalColor) * 255.0f); \ } \ } \ @@ -1020,7 +1026,7 @@ void FUNC_NAME(const sw_texture_t* tex, const sw_vertex_t* start, { \ /* Interpolate the color */ \ for (int i = 0; i < 4; i++) { \ - float finalColor = start->color[i] + t * dcol[i]; \ + float finalColor = (start->color[i] + t * dcol[i]) * w; \ dst[i] = (uint8_t)(sw_saturate(finalColor) * 255.0f); \ } \ } \ From dcd60be4779d4cd0cc9e56fe64b72d8b395b5160 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Wed, 12 Mar 2025 21:23:31 +0100 Subject: [PATCH 05/43] impl line clipping and rasterization + tweak function names --- src/external/rlsw.h | 406 ++++++++++++++++++++++++++++++++++++++------ 1 file changed, 350 insertions(+), 56 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index d8155636c..c4d10a9fb 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -296,6 +296,12 @@ void swBindTexture(uint32_t id); #define SW_STATE_DEPTH_TEST (1 << 1) #define SW_STATE_CULL_FACE (1 << 2) +#define SW_CLIP_INSIDE (0x00) // 0000 +#define SW_CLIP_LEFT (0x01) // 0001 +#define SW_CLIP_RIGHT (0x02) // 0010 +#define SW_CLIP_BOTTOM (0x04) // 0100 +#define SW_CLIP_TOP (0x08) // 1000 + /* === Internal Structs === */ typedef float sw_matrix_t[4*4]; @@ -479,6 +485,9 @@ static inline sw_vertex_t sw_lerp_vertex(const sw_vertex_t* a, const sw_vertex_t return result; } + +/* === Pixel Format Conversion Part === */ + static inline uint32_t sw_cvt_hf_ui(uint16_t h) { uint32_t s = (uint32_t)(h & 0x8000) << 16; @@ -708,7 +717,10 @@ static inline void sw_get_pixel(float* color, const void* pixels, uint32_t offse } } -static inline void sw_map_repeat(int* out, float in, int max) + +/* === Texture Sampling Part === */ + +static inline void sw_texture_map_repeat(int* out, float in, int max) { // Upscale to nearest texture coordinates // NOTE: We use '(int)(x+0.5)' although this is incorrect @@ -719,48 +731,48 @@ static inline void sw_map_repeat(int* out, float in, int max) *out = abs((int)((in - (int)in) * (max - 1) + 0.5f)); } -static inline void sw_map_clamp_to_edge(int* out, float in, int max) +static inline void sw_texture_map_clamp_to_edge(int* out, float in, int max) { *out = (int)(sw_saturate(in) * (max - 1) + 0.5f); } -static inline void sw_map_mirrored_repeat(int* out, float in, int max) +static inline void sw_texture_map_mirrored_repeat(int* out, float in, int max) { in = fmodf(fabsf(in), 2); if (in > 1.0f) in = 1.0f - (in - 1.0f); *out = (int)(in * (max - 1) + 0.5f); } -static inline void sw_map(int* out, float in, int max, SWwrap mode) +static inline void sw_texture_map(int* out, float in, int max, SWwrap mode) { switch (mode) { case SW_REPEAT: - sw_map_repeat(out, in, max); + sw_texture_map_repeat(out, in, max); break; case SW_CLAMP_TO_EDGE: - sw_map_clamp_to_edge(out, in, max); + sw_texture_map_clamp_to_edge(out, in, max); break; case SW_MIRRORED_REPEAT: - sw_map_mirrored_repeat(out, in, max); + sw_texture_map_mirrored_repeat(out, in, max); break; } } -static inline void sw_sample_texture_nearest(float* color, const sw_texture_t* tex, float u, float v) +static inline void sw_texture_sample_nearest(float* color, const sw_texture_t* tex, float u, float v) { int x, y; - sw_map(&x, u, tex->width, tex->sWrap); - sw_map(&y, v, tex->height, tex->tWrap); + sw_texture_map(&x, u, tex->width, tex->sWrap); + sw_texture_map(&y, v, tex->height, tex->tWrap); sw_get_pixel(color, tex->pixels, y * tex->width + x, tex->format); } -static inline void sw_sample_texture_bilinear(float* color, const sw_texture_t* tex, float u, float v) +static inline void sw_texture_sample_linear(float* color, const sw_texture_t* tex, float u, float v) { int x0, y0, x1, y1; - sw_map(&x0, u, tex->width, tex->sWrap); - sw_map(&y0, v, tex->height, tex->tWrap); - sw_map(&x1, u + tex->tx, tex->width, tex->sWrap); - sw_map(&y1, v + tex->ty, tex->height, tex->tWrap); + sw_texture_map(&x0, u, tex->width, tex->sWrap); + sw_texture_map(&y0, v, tex->height, tex->tWrap); + sw_texture_map(&x1, u + tex->tx, tex->width, tex->sWrap); + sw_texture_map(&y1, v + tex->ty, tex->height, tex->tWrap); float fx = u * (tex->width - 1) - x0; float fy = v * (tex->height - 1) - y0; @@ -779,7 +791,7 @@ static inline void sw_sample_texture_bilinear(float* color, const sw_texture_t* } } -static inline void sw_sample_texture(float* color, const sw_texture_t* tex, float u, float v, +static inline void sw_texture_sample(float* color, const sw_texture_t* tex, float u, float v, float xDu, float yDu, float xDv, float yDv) { // TODO: It seems there are some incorrect detections depending on the context @@ -796,21 +808,33 @@ static inline void sw_sample_texture(float* color, const sw_texture_t* tex, floa if (L > 1.0f) { // Minification if (tex->minFilter == SW_NEAREST) { - sw_sample_texture_nearest(color, tex, u, v); + sw_texture_sample_nearest(color, tex, u, v); } else if (tex->minFilter == SW_LINEAR) { - sw_sample_texture_bilinear(color, tex, u, v); + sw_texture_sample_linear(color, tex, u, v); } } else { // Magnification if (tex->magFilter == SW_NEAREST) { - sw_sample_texture_nearest(color, tex, u, v); + sw_texture_sample_nearest(color, tex, u, v); } else if (tex->magFilter == SW_LINEAR) { - sw_sample_texture_bilinear(color, tex, u, v); + sw_texture_sample_linear(color, tex, u, v); } } } -static inline bool sw_clip_polygon_w(sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON_VERTICES], int* vertexCounter) + +/* === Projection Helper Functions === */ + +static inline void sw_project_ndc_to_screen(float screen[2], const float ndc[4]) +{ + screen[0] = RLSW.vpPos[0] + (ndc[0] + 1.0f) * 0.5f * RLSW.vpDim[0]; + screen[1] = RLSW.vpPos[1] + (ndc[1] + 1.0f) * 0.5f * RLSW.vpDim[1]; +} + + +/* === Triangle Rendering Part === */ + +static inline bool sw_triangle_clip_w(sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON_VERTICES], int* vertexCounter) { sw_vertex_t input[SW_MAX_CLIPPED_POLYGON_VERTICES]; for (int i = 0; i < SW_MAX_CLIPPED_POLYGON_VERTICES; i++) { @@ -839,7 +863,7 @@ static inline bool sw_clip_polygon_w(sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON_ return *vertexCounter > 0; } -static inline bool sw_clip_polygon_xyz(sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON_VERTICES], int* vertexCounter) +static inline bool sw_triangle_clip_xyz(sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON_VERTICES], int* vertexCounter) { for (int iAxis = 0; iAxis < 3; iAxis++) { @@ -905,15 +929,14 @@ static inline bool sw_clip_polygon_xyz(sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGO return *vertexCounter > 0; } -void sw_project_and_clip_triangle(sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON_VERTICES], int* vertexCounter) +static inline void sw_triangle_project_and_clip(sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON_VERTICES], int* vertexCounter) { for (int i = 0; i < *vertexCounter; i++) { sw_vertex_t *v = polygon + i; - for (int j = 0; j < 4; j++) v->homogeneous[j] = v->position[j]; - sw_vec4_transform(v->homogeneous, v->homogeneous, RLSW.matMVP); + sw_vec4_transform(v->homogeneous, v->position, RLSW.matMVP); } - if (sw_clip_polygon_w(polygon, vertexCounter) && sw_clip_polygon_xyz(polygon, vertexCounter)) { + if (sw_triangle_clip_w(polygon, vertexCounter) && sw_triangle_clip_xyz(polygon, vertexCounter)) { for (int i = 0; i < *vertexCounter; i++) { sw_vertex_t *v = polygon + i; @@ -937,14 +960,13 @@ void sw_project_and_clip_triangle(sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON_VER v->color[3] *= v->homogeneous[3]; // Transform to screen space - v->screen[0] = RLSW.vpPos[0] + (v->homogeneous[0] + 1.0f) * 0.5f * RLSW.vpDim[0]; - v->screen[1] = RLSW.vpPos[1] + (v->homogeneous[1] + 1.0f) * 0.5f * RLSW.vpDim[1]; + sw_project_ndc_to_screen(v->screen, v->homogeneous); } } } -#define DEFINE_RASTER_SCANLINE(FUNC_NAME, ENABLE_TEXTURE, ENABLE_DEPTH_TEST) \ -void FUNC_NAME(const sw_texture_t* tex, const sw_vertex_t* start, \ +#define DEFINE_TRIANGLE_RASTER_SCANLINE(FUNC_NAME, ENABLE_TEXTURE, ENABLE_DEPTH_TEST) \ +static inline void FUNC_NAME(const sw_texture_t* tex, const sw_vertex_t* start, \ const sw_vertex_t* end, float yDu, float yDv) \ { \ /* Calculate the horizontal width and avoid division by zero */ \ @@ -1013,7 +1035,7 @@ void FUNC_NAME(const sw_texture_t* tex, const sw_vertex_t* start, { \ /* Sample the texture */ \ float texColor[4]; \ - sw_sample_texture(texColor, tex, u * w, v * w, xDu, yDu, xDv, yDv); \ + sw_texture_sample(texColor, tex, u * w, v * w, xDu, yDu, xDv, yDv); \ \ /* Interpolate the color and modulate by the texture color */ \ for (int i = 0; i < 4; i++) { \ @@ -1043,9 +1065,9 @@ void FUNC_NAME(const sw_texture_t* tex, const sw_vertex_t* start, } \ } -#define DEFINE_RASTER_TRIANGLE(FUNC_NAME, FUNC_SCANLINE, ENABLE_TEXTURE) \ -void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1, const sw_vertex_t* v2, \ - const sw_texture_t* tex) \ +#define DEFINE_TRIANGLE_RASTER(FUNC_NAME, FUNC_SCANLINE, ENABLE_TEXTURE) \ +static inline void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1, const sw_vertex_t* v2, \ + const sw_texture_t* tex) \ { \ /* Swap vertices by increasing y */ \ if (v0->screen[1] > v1->screen[1]) { const sw_vertex_t* tmp = v0; v0 = v1; v1 = tmp; } \ @@ -1134,17 +1156,17 @@ void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1, const sw_vertex_t* } \ } -DEFINE_RASTER_SCANLINE(sw_raster_scanline, false, false) -DEFINE_RASTER_SCANLINE(sw_raster_scanline_tex, true, false) -DEFINE_RASTER_SCANLINE(sw_raster_scanline_depth, false, true) -DEFINE_RASTER_SCANLINE(sw_raster_scanline_tex_depth, true, true) +DEFINE_TRIANGLE_RASTER_SCANLINE(sw_triangle_raster_scanline, false, false) +DEFINE_TRIANGLE_RASTER_SCANLINE(sw_triangle_raster_scanline_tex, true, false) +DEFINE_TRIANGLE_RASTER_SCANLINE(sw_triangle_raster_scanline_depth, false, true) +DEFINE_TRIANGLE_RASTER_SCANLINE(sw_triangle_raster_scanline_tex_depth, true, true) -DEFINE_RASTER_TRIANGLE(sw_raster_triangle, sw_raster_scanline, false) -DEFINE_RASTER_TRIANGLE(sw_raster_triangle_tex, sw_raster_scanline_tex, true) -DEFINE_RASTER_TRIANGLE(sw_raster_triangle_depth, sw_raster_scanline_depth, false) -DEFINE_RASTER_TRIANGLE(sw_raster_triangle_tex_depth, sw_raster_scanline_tex_depth, true) +DEFINE_TRIANGLE_RASTER(sw_triangle_raster, sw_triangle_raster_scanline, false) +DEFINE_TRIANGLE_RASTER(sw_triangle_raster_tex, sw_triangle_raster_scanline_tex, true) +DEFINE_TRIANGLE_RASTER(sw_triangle_raster_depth, sw_triangle_raster_scanline_depth, false) +DEFINE_TRIANGLE_RASTER(sw_triangle_raster_tex_depth, sw_triangle_raster_scanline_tex_depth, true) -void sw_render_triangle(const sw_vertex_t* v0, const sw_vertex_t* v1, const sw_vertex_t* v2) +static inline void sw_triangle_render(const sw_vertex_t* v0, const sw_vertex_t* v1, const sw_vertex_t* v2) { int vertexCounter = 3; @@ -1153,39 +1175,39 @@ void sw_render_triangle(const sw_vertex_t* v0, const sw_vertex_t* v1, const sw_v polygon[1] = *v1; polygon[2] = *v2; - sw_project_and_clip_triangle(polygon, &vertexCounter); + sw_triangle_project_and_clip(polygon, &vertexCounter); if (vertexCounter < 3) { return; } if ((RLSW.stateFlags & SW_STATE_TEXTURE_2D) && (RLSW.stateFlags & SW_STATE_DEPTH_TEST)) { - for (int_fast8_t i = 0; i < vertexCounter - 2; i++) { - sw_raster_triangle_tex_depth( + for (int i = 0; i < vertexCounter - 2; i++) { + sw_triangle_raster_tex_depth( &polygon[0], &polygon[i + 1], &polygon[i + 2], &RLSW.loadedTextures[RLSW.currentTexture] ); } } else if (RLSW.stateFlags & SW_STATE_TEXTURE_2D) { - for (int_fast8_t i = 0; i < vertexCounter - 2; i++) { - sw_raster_triangle_tex( + for (int i = 0; i < vertexCounter - 2; i++) { + sw_triangle_raster_tex( &polygon[0], &polygon[i + 1], &polygon[i + 2], &RLSW.loadedTextures[RLSW.currentTexture] ); } } else if (RLSW.stateFlags & SW_STATE_DEPTH_TEST) { - for (int_fast8_t i = 0; i < vertexCounter - 2; i++) { - sw_raster_triangle_depth( + for (int i = 0; i < vertexCounter - 2; i++) { + sw_triangle_raster_depth( &polygon[0], &polygon[i + 1], &polygon[i + 2], &RLSW.loadedTextures[RLSW.currentTexture] ); } } else { - for (int_fast8_t i = 0; i < vertexCounter - 2; i++) { - sw_raster_triangle( + for (int i = 0; i < vertexCounter - 2; i++) { + sw_triangle_raster( &polygon[0], &polygon[i + 1], &polygon[i + 2], &RLSW.loadedTextures[RLSW.currentTexture] ); @@ -1193,6 +1215,274 @@ void sw_render_triangle(const sw_vertex_t* v0, const sw_vertex_t* v1, const sw_v } } + +/* === Line Rendering Part === */ + +uint8_t sw_line_clip_encode_2d(const float screen[2], int xMin, int yMin, int xMax, int yMax) +{ + uint8_t code = SW_CLIP_INSIDE; + if (screen[0] < xMin) code |= SW_CLIP_LEFT; + if (screen[0] > xMax) code |= SW_CLIP_RIGHT; + if (screen[1] < yMin) code |= SW_CLIP_TOP; + if (screen[1] > yMax) code |= SW_CLIP_BOTTOM; + return code; +} + +bool sw_line_clip_2d(sw_vertex_t* v1, sw_vertex_t* v2) +{ + int xMin = RLSW.vpMin[0]; + int yMin = RLSW.vpMin[1]; + int xMax = RLSW.vpMax[0]; + int yMax = RLSW.vpMax[1]; + + bool accept = false; + uint8_t code0, code1; + float m = 0; + + if (v1->screen[0] != v2->screen[0]) { + m = (v2->screen[1] - v1->screen[1]) / (v2->screen[0] - v1->screen[0]); + } + + for (;;) { + code0 = sw_line_clip_encode_2d(v1->screen, xMin, yMin, xMax, yMax); + code1 = sw_line_clip_encode_2d(v2->screen, xMin, yMin, xMax, yMax); + + // Accepted if both endpoints lie within rectangle + if ((code0 | code1) == 0) { + accept = true; + break; + } + + // Rejected if both endpoints are outside rectangle, in same region + if (code0 & code1) break; + + if (code0 == SW_CLIP_INSIDE) { + uint8_t ctmp = code0; code0 = code1; code1 = ctmp; + sw_vertex_t vtmp = *v1; *v1 = *v2; *v2 = vtmp; + } + + if (code0 & SW_CLIP_LEFT) { + v1->screen[1] += (RLSW.vpMin[0] - v1->screen[0])*m; + v1->screen[0] = (float)RLSW.vpMin[0]; + } + else if (code0 & SW_CLIP_RIGHT) { + v1->screen[1] += (RLSW.vpMax[0] - v1->screen[0])*m; + v1->screen[0] = (float)RLSW.vpMax[0]; + } + else if (code0 & SW_CLIP_BOTTOM) { + if (m) v1->screen[0] += (RLSW.vpMin[1] - v1->screen[1]) / m; + v1->screen[1] = (float)RLSW.vpMin[1]; + } + else if (code0 & SW_CLIP_TOP) { + if (m) v1->screen[0] += (RLSW.vpMax[1] - v1->screen[1]) / m; + v1->screen[1] = (float)RLSW.vpMax[1]; + } + } + + return accept; +} + +bool sw_line_clip_coord_3d(float q, float p, float* t1, float* t2) +{ + if (fabsf(p) < SW_CLIP_EPSILON) { + // Check if the line is entirely outside the window + if (q < -SW_CLIP_EPSILON) return 0; // Completely outside + return 1; // Completely inside or on the edges + } + + const float r = q / p; + + if (p < 0) { + if (r > *t2) return 0; + if (r > *t1) *t1 = r; + } else { + if (r < *t1) return 0; + if (r < *t2) *t2 = r; + } + + return 1; +} + +bool sw_line_clip_3d(sw_vertex_t* v1, sw_vertex_t* v2) +{ + // TODO: Lerp all vertices here, not just homogeneous coordinates + + float t1 = 0, t2 = 1; + + float delta[4]; + for (int i = 0; i < 4; i++) { + delta[i] = v2->homogeneous[i] - v1->homogeneous[i]; + } + + if (!sw_line_clip_coord_3d(v1->homogeneous[3] - v1->homogeneous[0], -delta[3] + delta[0], &t1, &t2)) return false; + if (!sw_line_clip_coord_3d(v1->homogeneous[3] + v1->homogeneous[0], -delta[3] - delta[0], &t1, &t2)) return false; + + if (!sw_line_clip_coord_3d(v1->homogeneous[3] - v1->homogeneous[1], -delta[3] + delta[1], &t1, &t2)) return false; + if (!sw_line_clip_coord_3d(v1->homogeneous[3] + v1->homogeneous[1], -delta[3] - delta[1], &t1, &t2)) return false; + + if (!sw_line_clip_coord_3d(v1->homogeneous[3] - v1->homogeneous[2], -delta[3] + delta[2], &t1, &t2)) return false; + if (!sw_line_clip_coord_3d(v1->homogeneous[3] + v1->homogeneous[2], -delta[3] - delta[2], &t1, &t2)) return false; + + if (t2 < 1) { + for (int i = 0; i < 4; i++) { + v2->homogeneous[i] = v1->homogeneous[i] + t2 * delta[i]; + } + } + + if (t1 > 0) { + for (int i = 0; i < 4; i++) { + v1->homogeneous[i] = v1->homogeneous[i] + t1 * delta[i]; + } + } + + return true; +} + +bool sw_line_project_and_clip(sw_vertex_t* v0, sw_vertex_t* v1) +{ + sw_vec4_transform(v0->homogeneous, v0->position, RLSW.matMVP); + sw_vec4_transform(v1->homogeneous, v1->position, RLSW.matMVP); + + if (v0->homogeneous[3] == 1.0f && v1->homogeneous[3] == 1.0f) { + sw_project_ndc_to_screen(v0->screen, v0->homogeneous); + sw_project_ndc_to_screen(v1->screen, v1->homogeneous); + if (!sw_line_clip_2d(v0, v1)) { + return false; + } + } + else { + if (!sw_line_clip_3d(v0, v1)) { + return false; + } + // Convert XYZ coordinates to NDC + v0->homogeneous[3] = 1.0f / v0->homogeneous[3]; + v1->homogeneous[3] = 1.0f / v1->homogeneous[3]; + for (int i = 0; i < 3; i++) { + v0->homogeneous[i] *= v0->homogeneous[3]; + v1->homogeneous[i] *= v1->homogeneous[3]; + } + // Convert NDC coordinates to screen space + sw_project_ndc_to_screen(v0->screen, v0->homogeneous); + sw_project_ndc_to_screen(v1->screen, v1->homogeneous); + } + + return true; +} + +#define DEFINE_LINE_RASTER(FUNC_NAME, ENABLE_DEPTH_TEST) \ +void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1) \ +{ \ + int x1 = (int)v0->screen[0]; \ + int y1 = (int)v0->screen[1]; \ + int x2 = (int)v1->screen[0]; \ + int y2 = (int)v1->screen[1]; \ + \ + float z1 = v0->homogeneous[2]; \ + float z2 = v1->homogeneous[2]; \ + \ + int shortLen = y2 - y1; \ + int longLen = x2 - x1; \ + bool yLonger = 0; \ + \ + if (abs(shortLen) > abs(longLen)) { \ + int tmp = shortLen; \ + shortLen = longLen; \ + longLen = tmp; \ + yLonger = 1; \ + } \ + \ + float invEndVal = 1.0f / longLen; \ + int endVal = longLen; \ + int sgnInc = 1; \ + \ + if (longLen < 0) { \ + longLen = -longLen; \ + sgnInc = -1; \ + } \ + \ + int decInc = (longLen == 0) ? 0 \ + : (shortLen << 16) / longLen; \ + \ + const int fb_width = RLSW.framebuffer.width; \ + const float z_diff = z2 - z1; \ + \ + uint8_t* color_buffer = RLSW.framebuffer.color; \ + uint16_t* depth_buffer = RLSW.framebuffer.depth; \ + \ + int j = 0; \ + if (yLonger) { \ + for (int i = 0; i != endVal; i += sgnInc, j += decInc) { \ + float t = (float)i * invEndVal; \ + \ + int x = x1 + (j >> 16); \ + int y = y1 + i; \ + float z = z1 + t * z_diff; \ + int pixel_index = y * fb_width + x; \ + \ + uint16_t* dptr = &depth_buffer[pixel_index]; \ + if (ENABLE_DEPTH_TEST) { \ + float depth = (float)(*dptr) / UINT16_MAX; \ + if (z > depth) continue; \ + } \ + \ + *dptr = (uint16_t)(z * UINT16_MAX); \ + \ + int color_index = 4 * pixel_index; \ + uint8_t* cptr = &color_buffer[color_index]; \ + \ + for (int j = 0; j < 4; j++) { \ + float finalColor = sw_lerp(v0->color[j], v1->color[j], t); \ + cptr[j] = (uint8_t)(finalColor * 255); \ + } \ + } \ + } \ + else { \ + for (int i = 0; i != endVal; i += sgnInc, j += decInc) { \ + float t = (float)i * invEndVal; \ + \ + int x = x1 + i; \ + int y = y1 + (j >> 16); \ + float z = z1 + t * z_diff; \ + int pixel_index = y * fb_width + x; \ + \ + uint16_t* dptr = &depth_buffer[pixel_index]; \ + if (ENABLE_DEPTH_TEST) { \ + float depth = (float)(*dptr) / UINT16_MAX; \ + if (z > depth) continue; \ + } \ + \ + *dptr = (uint16_t)(z * UINT16_MAX); \ + \ + int color_index = 4 * pixel_index; \ + uint8_t* cptr = &color_buffer[color_index]; \ + \ + for (int j = 0; j < 4; j++) { \ + float finalColor = sw_lerp(v0->color[j], v1->color[j], t); \ + cptr[j] = (uint8_t)(finalColor * 255); \ + } \ + } \ + } \ +} + +DEFINE_LINE_RASTER(sw_line_raster, false) +DEFINE_LINE_RASTER(sw_line_raster_depth, true) + +static inline void sw_line_render(sw_vertex_t* v0, sw_vertex_t* v1) +{ + if (!sw_line_project_and_clip(v0, v1)) { + return; + } + + if (RLSW.stateFlags & SW_STATE_DEPTH_TEST) { + sw_line_raster_depth(v0, v1); + } + else { + sw_line_raster(v0, v1); + } +} + +/* === Some Validity Check Helper === */ + static inline bool sw_is_texture_id_valid(uint32_t id) { bool valid = true; @@ -1214,6 +1504,7 @@ static inline bool sw_is_texture_wrap_valid(int wrap) return (wrap == SW_REPEAT || wrap == SW_CLAMP_TO_EDGE || SW_MIRRORED_REPEAT); } + /* === Public Implementation === */ void swInit(int w, int h) @@ -1694,22 +1985,25 @@ void swVertex4fv(const float* v) case SW_POINTS: break; case SW_LINES: - neededVertices = 2; + sw_line_render( + &RLSW.vertexBuffer[0], + &RLSW.vertexBuffer[1] + ); break; case SW_TRIANGLES: - sw_render_triangle( + sw_triangle_render( &RLSW.vertexBuffer[0], &RLSW.vertexBuffer[1], &RLSW.vertexBuffer[2] ); break; case SW_QUADS: - sw_render_triangle( + sw_triangle_render( &RLSW.vertexBuffer[0], &RLSW.vertexBuffer[1], &RLSW.vertexBuffer[2] ); - sw_render_triangle( + sw_triangle_render( &RLSW.vertexBuffer[2], &RLSW.vertexBuffer[3], &RLSW.vertexBuffer[0] From 5f92a0fcd0213c83a6de294fcf9aba3e0d1162df Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Wed, 12 Mar 2025 21:42:29 +0100 Subject: [PATCH 06/43] impl face culling --- src/external/rlsw.h | 50 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 40 insertions(+), 10 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index c4d10a9fb..3e1340569 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -145,7 +145,7 @@ typedef enum { SW_LINES = GL_LINES, SW_TRIANGLES = GL_TRIANGLES, SW_QUADS = GL_QUADS, -} SWfill; +} SWdraw; typedef enum { SW_FRONT = GL_FRONT, @@ -232,7 +232,9 @@ void swViewport(int x, int y, int width, int height); void swClearColor(float r, float g, float b, float a); void swClear(void); -void swBegin(SWfill mode); +void swCullFace(SWface face); + +void swBegin(SWdraw mode); void swEnd(void); void swVertex2i(int x, int y); @@ -270,7 +272,7 @@ void swNormal3f(float x, float y, float z); void swNormal3fv(const float* v); void swBindArray(SWarray type, void *buffer); -void swDrawArrays(SWfill mode, int offset, int count); +void swDrawArrays(SWdraw mode, int offset, int count); uint32_t swLoadTexture(const void *data, int width, int height, int format, int mipmapCount); void swUnloadTexture(uint32_t id); @@ -370,7 +372,7 @@ typedef struct { sw_vertex_t vertexBuffer[4]; // Buffer used for storing primitive vertices, used for processing and rendering int vertexCounter; // Number of vertices in 'ctx.vertexBuffer' - SWfill fillMode; // Current polygon filling mode (e.g., lines, triangles) + SWdraw drawMode; // Current polygon filling mode (e.g., lines, triangles) float pointSize; // Rasterized point size float lineWidth; // Rasterized line width @@ -936,6 +938,18 @@ static inline void sw_triangle_project_and_clip(sw_vertex_t polygon[SW_MAX_CLIPP sw_vec4_transform(v->homogeneous, v->position, RLSW.matMVP); } + if (RLSW.stateFlags & SW_STATE_CULL_FACE) { + float x0 = polygon[0].homogeneous[0], y0 = polygon[0].homogeneous[1]; + float x1 = polygon[1].homogeneous[0], y1 = polygon[1].homogeneous[1]; + float x2 = polygon[2].homogeneous[0], y2 = polygon[2].homogeneous[1]; + + float sgnArea = (x1 - x0) * (y2 - y0) - (x2 - x0) * (y1 - y0); + if ((RLSW.cullFace == SW_BACK && sgnArea >= 0) || (RLSW.cullFace == SW_FRONT && sgnArea <= 0)) { + *vertexCounter = 0; + return; + } + } + if (sw_triangle_clip_w(polygon, vertexCounter) && sw_triangle_clip_xyz(polygon, vertexCounter)) { for (int i = 0; i < *vertexCounter; i++) { sw_vertex_t *v = polygon + i; @@ -965,7 +979,7 @@ static inline void sw_triangle_project_and_clip(sw_vertex_t polygon[SW_MAX_CLIPP } } -#define DEFINE_TRIANGLE_RASTER_SCANLINE(FUNC_NAME, ENABLE_TEXTURE, ENABLE_DEPTH_TEST) \ +#define DEFINE_TRIANGLE_RASTER_SCANLINE(FUNC_NAME, ENABLE_TEXTURE, ENABLE_DEPTH_TEST) \ static inline void FUNC_NAME(const sw_texture_t* tex, const sw_vertex_t* start, \ const sw_vertex_t* end, float yDu, float yDv) \ { \ @@ -1504,6 +1518,11 @@ static inline bool sw_is_texture_wrap_valid(int wrap) return (wrap == SW_REPEAT || wrap == SW_CLAMP_TO_EDGE || SW_MIRRORED_REPEAT); } +static inline bool sw_is_face_valid(int face) +{ + return (face == SW_FRONT || face == SW_BACK); +} + /* === Public Implementation === */ @@ -1546,6 +1565,8 @@ void swInit(int w, int h) RLSW.vertexBuffer[0].normal[1] = 0.0f; RLSW.vertexBuffer[0].normal[2] = 1.0f; + RLSW.cullFace = SW_BACK; + static const float defTex[3*2*2] = { 1.0f, 1.0f, 1.0f, @@ -1889,14 +1910,23 @@ void swClear(void) } } -void swBegin(SWfill mode) +void swCullFace(SWface face) +{ + if (!sw_is_face_valid(face)) { + RLSW.errCode = SW_INVALID_ENUM; + return; + } + RLSW.cullFace = face; +} + +void swBegin(SWdraw mode) { if (mode < SW_POINTS || mode > SW_QUADS) { RLSW.errCode = SW_INVALID_ENUM; return; } RLSW.vertexCounter = 0; - RLSW.fillMode = mode; + RLSW.drawMode = mode; } void swEnd(void) @@ -1960,7 +1990,7 @@ void swVertex4fv(const float* v) RLSW.vertexCounter++; int neededVertices = 0; - switch (RLSW.fillMode) { + switch (RLSW.drawMode) { case SW_POINTS: neededVertices = 1; break; @@ -1981,7 +2011,7 @@ void swVertex4fv(const float* v) sw_matrix_mul(RLSW.matMVP, RLSW.matModel, RLSW.matView); sw_matrix_mul(RLSW.matMVP, RLSW.matMVP, RLSW.matProjection); - switch (RLSW.fillMode) { + switch (RLSW.drawMode) { case SW_POINTS: break; case SW_LINES: @@ -2259,7 +2289,7 @@ void swBindArray(SWarray type, void *buffer) } } -void swDrawArrays(SWfill mode, int offset, int count) +void swDrawArrays(SWdraw mode, int offset, int count) { if (RLSW.array.positions == 0) { RLSW.errCode = SW_INVALID_OPERATION; From 804240966abf787c7686bbeb8771399fc0866f9d Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Thu, 13 Mar 2025 00:16:58 +0100 Subject: [PATCH 07/43] impl color blending --- src/external/rlsw.h | 472 ++++++++++++++++++++++++++++++++++++-------- 1 file changed, 392 insertions(+), 80 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 3e1340569..8f6fc94ca 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -69,6 +69,7 @@ #define GL_TEXTURE_2D 0x0DE1 #define GL_DEPTH_TEST 0x0B71 #define GL_CULL_FACE 0x0B44 +#define GL_BLEND 0x0BE2 #define GL_MODELVIEW 0x1700 #define GL_PROJECTION 0x1701 @@ -97,6 +98,18 @@ #define GL_FRONT 0x0404 #define GL_BACK 0x0405 +#define GL_ZERO 0 +#define GL_ONE 1 +#define GL_SRC_COLOR 0x0300 +#define GL_ONE_MINUS_SRC_COLOR 0x0301 +#define GL_SRC_ALPHA 0x0302 +#define GL_ONE_MINUS_SRC_ALPHA 0x0303 +#define GL_DST_ALPHA 0x0304 +#define GL_ONE_MINUS_DST_ALPHA 0x0305 +#define GL_DST_COLOR 0x0306 +#define GL_ONE_MINUS_DST_COLOR 0x0307 +#define GL_SRC_ALPHA_SATURATE 0x0308 + #define GL_NEAREST 0x2600 #define GL_LINEAR 0x2601 @@ -124,7 +137,8 @@ typedef enum { SW_TEXTURE_2D = GL_TEXTURE_2D, SW_DEPTH_TEST = GL_DEPTH_TEST, - SW_CULL_FACE = GL_CULL_FACE + SW_CULL_FACE = GL_CULL_FACE, + SW_BLEND = GL_BLEND } SWstate; typedef enum { @@ -152,6 +166,20 @@ typedef enum { SW_BACK = GL_BACK, } SWface; +typedef enum { + SW_ZERO = GL_ZERO, + SW_ONE = GL_ONE, + SW_SRC_COLOR = GL_SRC_COLOR, + SW_ONE_MINUS_SRC_COLOR = GL_ONE_MINUS_SRC_COLOR, + SW_SRC_ALPHA = GL_SRC_ALPHA, + SW_ONE_MINUS_SRC_ALPHA = GL_ONE_MINUS_SRC_ALPHA, + SW_DST_ALPHA = GL_DST_ALPHA, + SW_ONE_MINUS_DST_ALPHA = GL_ONE_MINUS_DST_ALPHA, + SW_DST_COLOR = GL_DST_COLOR, + SW_ONE_MINUS_DST_COLOR = GL_ONE_MINUS_DST_COLOR, + SW_SRC_ALPHA_SATURATE = GL_SRC_ALPHA_SATURATE +} SWfactor; + typedef enum { SW_PIXELFORMAT_UNCOMPRESSED_GRAYSCALE = 1, // 8 bit per pixel (no alpha) SW_PIXELFORMAT_UNCOMPRESSED_GRAY_ALPHA, // 8*2 bpp (2 channels) @@ -206,6 +234,7 @@ typedef enum { SW_INVALID_OPERATION = GL_INVALID_OPERATION, } SWerrcode; + /* === Public API === */ void swInit(int w, int h); @@ -232,6 +261,7 @@ void swViewport(int x, int y, int width, int height); void swClearColor(float r, float g, float b, float a); void swClear(void); +void swBlendFunc(SWfactor sfactor, SWfactor dfactor); void swCullFace(SWface face); void swBegin(SWdraw mode); @@ -294,9 +324,12 @@ void swBindTexture(uint32_t id); #define SW_DEG2RAD (SW_PI/180.0f) #define SW_RAD2DEG (180.0f/SW_PI) +#define SW_STATE_CHECK(flags) ((RLSW.stateFlags & (flags)) == (flags)) + #define SW_STATE_TEXTURE_2D (1 << 0) #define SW_STATE_DEPTH_TEST (1 << 1) #define SW_STATE_CULL_FACE (1 << 2) +#define SW_STATE_BLEND (1 << 3) #define SW_CLIP_INSIDE (0x00) // 0000 #define SW_CLIP_LEFT (0x01) // 0001 @@ -354,9 +387,6 @@ typedef struct { uint32_t currentTexture; sw_matrix_t *currentMatrix; - uint32_t blendFunction; - uint32_t depthFunction; - int vpPos[2]; // Represents the top-left corner of the viewport int vpDim[2]; // Represents the dimensions of the viewport (minus one) int vpMin[2]; // Represents the minimum renderable point of the viewport (top-left) @@ -392,6 +422,9 @@ typedef struct { SWmatrix currentMatrixMode; // Current matrix mode (e.g., sw_MODELVIEW, sw_PROJECTION) bool modelMatrixUsed; // Flag indicating if the model matrix is used + SWfactor srcFactor; + SWfactor dstFactor; + SWface cullFace; // Faces to cull SWerrcode errCode; // Last error code @@ -825,6 +858,144 @@ static inline void sw_texture_sample(float* color, const sw_texture_t* tex, floa } +/* === Color Blending Functions === */ + +static inline void sw_blend_colors(float dst[4], float src[4]) +{ + float src_factor[4] = { 0 }; + float dst_factor[4] = { 0 }; + + switch (RLSW.srcFactor) { + case SW_ZERO: + src_factor[0] = src_factor[1] = src_factor[2] = src_factor[3] = 0.0f; + break; + case SW_ONE: + src_factor[0] = src_factor[1] = src_factor[2] = src_factor[3] = 1.0f; + break; + case SW_SRC_COLOR: + src_factor[0] = src[0]; + src_factor[1] = src[1]; + src_factor[2] = src[2]; + src_factor[3] = src[3]; + break; + case SW_ONE_MINUS_SRC_COLOR: + src_factor[0] = 1.0f - src[0]; + src_factor[1] = 1.0f - src[1]; + src_factor[2] = 1.0f - src[2]; + src_factor[3] = 1.0f - src[3]; + break; + case SW_SRC_ALPHA: + src_factor[0] = src[3]; + src_factor[1] = src[3]; + src_factor[2] = src[3]; + src_factor[3] = src[3]; + break; + case SW_ONE_MINUS_SRC_ALPHA: + src_factor[0] = 1.0f - src[3]; + src_factor[1] = 1.0f - src[3]; + src_factor[2] = 1.0f - src[3]; + src_factor[3] = 1.0f - src[3]; + break; + case SW_DST_ALPHA: + src_factor[0] = dst[3]; + src_factor[1] = dst[3]; + src_factor[2] = dst[3]; + src_factor[3] = dst[3]; + break; + case SW_ONE_MINUS_DST_ALPHA: + src_factor[0] = 1.0f - dst[3]; + src_factor[1] = 1.0f - dst[3]; + src_factor[2] = 1.0f - dst[3]; + src_factor[3] = 1.0f - dst[3]; + break; + case SW_DST_COLOR: + src_factor[0] = dst[0]; + src_factor[1] = dst[1]; + src_factor[2] = dst[2]; + src_factor[3] = dst[3]; + break; + case SW_ONE_MINUS_DST_COLOR: + src_factor[0] = 1.0f - dst[0]; + src_factor[1] = 1.0f - dst[1]; + src_factor[2] = 1.0f - dst[2]; + src_factor[3] = 1.0f - dst[3]; + break; + case SW_SRC_ALPHA_SATURATE: + src_factor[0] = 1.0f; + src_factor[1] = 1.0f; + src_factor[2] = 1.0f; + src_factor[3] = fminf(src[3], 1.0f); + break; + } + + switch (RLSW.dstFactor) { + case SW_ZERO: + dst_factor[0] = dst_factor[1] = dst_factor[2] = dst_factor[3] = 0.0f; + break; + case SW_ONE: + dst_factor[0] = dst_factor[1] = dst_factor[2] = dst_factor[3] = 1.0f; + break; + case SW_SRC_COLOR: + dst_factor[0] = src[0]; + dst_factor[1] = src[1]; + dst_factor[2] = src[2]; + dst_factor[3] = src[3]; + break; + case SW_ONE_MINUS_SRC_COLOR: + dst_factor[0] = 1.0f - src[0]; + dst_factor[1] = 1.0f - src[1]; + dst_factor[2] = 1.0f - src[2]; + dst_factor[3] = 1.0f - src[3]; + break; + case SW_SRC_ALPHA: + dst_factor[0] = src[3]; + dst_factor[1] = src[3]; + dst_factor[2] = src[3]; + dst_factor[3] = src[3]; + break; + case SW_ONE_MINUS_SRC_ALPHA: + dst_factor[0] = 1.0f - src[3]; + dst_factor[1] = 1.0f - src[3]; + dst_factor[2] = 1.0f - src[3]; + dst_factor[3] = 1.0f - src[3]; + break; + case SW_DST_ALPHA: + dst_factor[0] = dst[3]; + dst_factor[1] = dst[3]; + dst_factor[2] = dst[3]; + dst_factor[3] = dst[3]; + break; + case SW_ONE_MINUS_DST_ALPHA: + dst_factor[0] = 1.0f - dst[3]; + dst_factor[1] = 1.0f - dst[3]; + dst_factor[2] = 1.0f - dst[3]; + dst_factor[3] = 1.0f - dst[3]; + break; + case SW_DST_COLOR: + dst_factor[0] = dst[0]; + dst_factor[1] = dst[1]; + dst_factor[2] = dst[2]; + dst_factor[3] = dst[3]; + break; + case SW_ONE_MINUS_DST_COLOR: + dst_factor[0] = 1.0f - dst[0]; + dst_factor[1] = 1.0f - dst[1]; + dst_factor[2] = 1.0f - dst[2]; + dst_factor[3] = 1.0f - dst[3]; + break; + case SW_SRC_ALPHA_SATURATE: + // NOTE: This case is only available for the source. + // Since the factors are validated before assignment, + // we should never reach this point. + break; + } + + for (int i = 0; i < 4; ++i) { + dst[i] = src_factor[i] * src[i] + dst_factor[i] * dst[i]; + } +} + + /* === Projection Helper Functions === */ static inline void sw_project_ndc_to_screen(float screen[2], const float ndc[4]) @@ -979,9 +1150,9 @@ static inline void sw_triangle_project_and_clip(sw_vertex_t polygon[SW_MAX_CLIPP } } -#define DEFINE_TRIANGLE_RASTER_SCANLINE(FUNC_NAME, ENABLE_TEXTURE, ENABLE_DEPTH_TEST) \ +#define DEFINE_TRIANGLE_RASTER_SCANLINE(FUNC_NAME, ENABLE_TEXTURE, ENABLE_DEPTH_TEST, ENABLE_COLOR_BLEND) \ static inline void FUNC_NAME(const sw_texture_t* tex, const sw_vertex_t* start, \ - const sw_vertex_t* end, float yDu, float yDv) \ + const sw_vertex_t* end, float yDu, float yDv) \ { \ /* Calculate the horizontal width and avoid division by zero */ \ float dx = end->screen[0] - start->screen[0]; \ @@ -1022,12 +1193,12 @@ static inline void FUNC_NAME(const sw_texture_t* tex, const sw_vertex_t* start, } \ \ /* Pre-calculate the starting pointer for the color framebuffer row */ \ - uint8_t* row_ptr = (uint8_t*)((uint32_t*)RLSW.framebuffer.color + y * RLSW.framebuffer.width); \ - uint8_t* dst = row_ptr + xStart * 4; \ + uint8_t* cptrRow = (uint8_t*)((uint32_t*)RLSW.framebuffer.color + y * RLSW.framebuffer.width); \ + uint8_t* cptr = cptrRow + xStart * 4; \ \ /* Pre-calculate the pointer for the depth buffer row */ \ - uint16_t* depth_row = RLSW.framebuffer.depth + y * RLSW.framebuffer.width + xStart; \ - uint16_t* dptr = depth_row; \ + uint16_t* dptrRow = RLSW.framebuffer.depth + y * RLSW.framebuffer.width + xStart; \ + uint16_t* dptr = dptrRow; \ \ /* Scanline rasterization loop */ \ for (int x = xStart; x < xEnd; x++) { \ @@ -1045,32 +1216,55 @@ static inline void FUNC_NAME(const sw_texture_t* tex, const sw_vertex_t* start, /* Update the depth buffer */ \ *dptr = (uint16_t)(z * UINT16_MAX); \ \ - if (ENABLE_TEXTURE) \ + if (ENABLE_COLOR_BLEND) \ { \ - /* Sample the texture */ \ - float texColor[4]; \ - sw_texture_sample(texColor, tex, u * w, v * w, xDu, yDu, xDv, yDv); \ + float dstColor[4] = { 0.0f, 0.0f, 0.0f, 0.0f }; \ + float srcColor[4] = { 1.0f, 1.0f, 1.0f, 1.0f }; \ + \ + if (ENABLE_TEXTURE) { \ + sw_texture_sample(srcColor, tex, u * w, v * w, xDu, yDu, xDv, yDv); \ + } \ \ - /* Interpolate the color and modulate by the texture color */ \ for (int i = 0; i < 4; i++) { \ - float finalColor = texColor[i]; \ - finalColor *= (start->color[i] + t * dcol[i]) * w; \ - dst[i] = (uint8_t)(sw_saturate(finalColor) * 255.0f); \ + dstColor[i] = (float)cptr[i] / 255; \ + srcColor[i] *= (start->color[i] + t * dcol[i]) * w; \ + } \ + \ + sw_blend_colors(dstColor, srcColor); \ + \ + for (int i = 0; i < 4; i++) { \ + cptr[i] = (uint8_t)(sw_saturate(dstColor[i]) * 255); \ } \ } \ else \ { \ - /* Interpolate the color */ \ - for (int i = 0; i < 4; i++) { \ - float finalColor = (start->color[i] + t * dcol[i]) * w; \ - dst[i] = (uint8_t)(sw_saturate(finalColor) * 255.0f); \ + if (ENABLE_TEXTURE) \ + { \ + /* Sample the texture */ \ + float texColor[4]; \ + sw_texture_sample(texColor, tex, u * w, v * w, xDu, yDu, xDv, yDv); \ + \ + /* Interpolate the color and modulate by the texture color */ \ + for (int i = 0; i < 4; i++) { \ + float finalColor = texColor[i]; \ + finalColor *= (start->color[i] + t * dcol[i]) * w; \ + cptr[i] = (uint8_t)(sw_saturate(finalColor) * 255.0f); \ + } \ + } \ + else \ + { \ + /* Interpolate the color */ \ + for (int i = 0; i < 4; i++) { \ + float finalColor = (start->color[i] + t * dcol[i]) * w; \ + cptr[i] = (uint8_t)(sw_saturate(finalColor) * 255.0f); \ + } \ } \ } \ \ /* Increment the interpolation parameter, UVs, and pointers */ \ discard: \ t += dt; \ - dst += 4; \ + cptr += 4; \ dptr++; \ if (ENABLE_TEXTURE) { \ u += xDu; \ @@ -1170,15 +1364,23 @@ static inline void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1, const } \ } -DEFINE_TRIANGLE_RASTER_SCANLINE(sw_triangle_raster_scanline, false, false) -DEFINE_TRIANGLE_RASTER_SCANLINE(sw_triangle_raster_scanline_tex, true, false) -DEFINE_TRIANGLE_RASTER_SCANLINE(sw_triangle_raster_scanline_depth, false, true) -DEFINE_TRIANGLE_RASTER_SCANLINE(sw_triangle_raster_scanline_tex_depth, true, true) +DEFINE_TRIANGLE_RASTER_SCANLINE(sw_triangle_raster_scanline, 0, 0, 0) +DEFINE_TRIANGLE_RASTER_SCANLINE(sw_triangle_raster_scanline_TEX, 1, 0, 0) +DEFINE_TRIANGLE_RASTER_SCANLINE(sw_triangle_raster_scanline_DEPTH, 0, 1, 0) +DEFINE_TRIANGLE_RASTER_SCANLINE(sw_triangle_raster_scanline_BLEND, 0, 0, 1) +DEFINE_TRIANGLE_RASTER_SCANLINE(sw_triangle_raster_scanline_TEX_DEPTH, 1, 1, 0) +DEFINE_TRIANGLE_RASTER_SCANLINE(sw_triangle_raster_scanline_TEX_BLEND, 1, 0, 1) +DEFINE_TRIANGLE_RASTER_SCANLINE(sw_triangle_raster_scanline_DEPTH_BLEND, 0, 1, 1) +DEFINE_TRIANGLE_RASTER_SCANLINE(sw_triangle_raster_scanline_TEX_DEPTH_BLEND, 1, 1, 1) DEFINE_TRIANGLE_RASTER(sw_triangle_raster, sw_triangle_raster_scanline, false) -DEFINE_TRIANGLE_RASTER(sw_triangle_raster_tex, sw_triangle_raster_scanline_tex, true) -DEFINE_TRIANGLE_RASTER(sw_triangle_raster_depth, sw_triangle_raster_scanline_depth, false) -DEFINE_TRIANGLE_RASTER(sw_triangle_raster_tex_depth, sw_triangle_raster_scanline_tex_depth, true) +DEFINE_TRIANGLE_RASTER(sw_triangle_raster_TEX, sw_triangle_raster_scanline_TEX, true) +DEFINE_TRIANGLE_RASTER(sw_triangle_raster_DEPTH, sw_triangle_raster_scanline_DEPTH, false) +DEFINE_TRIANGLE_RASTER(sw_triangle_raster_BLEND, sw_triangle_raster_scanline_BLEND, false) +DEFINE_TRIANGLE_RASTER(sw_triangle_raster_TEX_DEPTH, sw_triangle_raster_scanline_TEX_DEPTH, true) +DEFINE_TRIANGLE_RASTER(sw_triangle_raster_TEX_BLEND, sw_triangle_raster_scanline_TEX_BLEND, true) +DEFINE_TRIANGLE_RASTER(sw_triangle_raster_DEPTH_BLEND, sw_triangle_raster_scanline_DEPTH_BLEND, false) +DEFINE_TRIANGLE_RASTER(sw_triangle_raster_TEX_DEPTH_BLEND, sw_triangle_raster_scanline_TEX_DEPTH_BLEND, true) static inline void sw_triangle_render(const sw_vertex_t* v0, const sw_vertex_t* v1, const sw_vertex_t* v2) { @@ -1195,37 +1397,39 @@ static inline void sw_triangle_render(const sw_vertex_t* v0, const sw_vertex_t* return; } - if ((RLSW.stateFlags & SW_STATE_TEXTURE_2D) && (RLSW.stateFlags & SW_STATE_DEPTH_TEST)) { - for (int i = 0; i < vertexCounter - 2; i++) { - sw_triangle_raster_tex_depth( - &polygon[0], &polygon[i + 1], &polygon[i + 2], - &RLSW.loadedTextures[RLSW.currentTexture] - ); - } +# define TRIANGLE_RASTER(RASTER_FUNC) \ + { \ + for (int i = 0; i < vertexCounter - 2; i++) { \ + RASTER_FUNC( \ + &polygon[0], &polygon[i + 1], &polygon[i + 2], \ + &RLSW.loadedTextures[RLSW.currentTexture] \ + ); \ + } \ } - else if (RLSW.stateFlags & SW_STATE_TEXTURE_2D) { - for (int i = 0; i < vertexCounter - 2; i++) { - sw_triangle_raster_tex( - &polygon[0], &polygon[i + 1], &polygon[i + 2], - &RLSW.loadedTextures[RLSW.currentTexture] - ); - } + + if (SW_STATE_CHECK(SW_STATE_TEXTURE_2D | SW_STATE_DEPTH_TEST | SW_STATE_BLEND)) { + TRIANGLE_RASTER(sw_triangle_raster_TEX_DEPTH_BLEND) } - else if (RLSW.stateFlags & SW_STATE_DEPTH_TEST) { - for (int i = 0; i < vertexCounter - 2; i++) { - sw_triangle_raster_depth( - &polygon[0], &polygon[i + 1], &polygon[i + 2], - &RLSW.loadedTextures[RLSW.currentTexture] - ); - } + else if (SW_STATE_CHECK(SW_STATE_DEPTH_TEST | SW_STATE_BLEND)) { + TRIANGLE_RASTER(sw_triangle_raster_DEPTH_BLEND) + } + else if (SW_STATE_CHECK(SW_STATE_TEXTURE_2D | SW_STATE_BLEND)) { + TRIANGLE_RASTER(sw_triangle_raster_TEX_BLEND) + } + else if (SW_STATE_CHECK(SW_STATE_TEXTURE_2D | SW_STATE_DEPTH_TEST)) { + TRIANGLE_RASTER(sw_triangle_raster_TEX_DEPTH) + } + else if (SW_STATE_CHECK(SW_STATE_BLEND)) { + TRIANGLE_RASTER(sw_triangle_raster_BLEND) + } + else if (SW_STATE_CHECK(SW_STATE_DEPTH_TEST)) { + TRIANGLE_RASTER(sw_triangle_raster_DEPTH) + } + else if (SW_STATE_CHECK(SW_STATE_TEXTURE_2D)) { + TRIANGLE_RASTER(sw_triangle_raster_TEX) } else { - for (int i = 0; i < vertexCounter - 2; i++) { - sw_triangle_raster( - &polygon[0], &polygon[i + 1], &polygon[i + 2], - &RLSW.loadedTextures[RLSW.currentTexture] - ); - } + TRIANGLE_RASTER(sw_triangle_raster) } } @@ -1383,7 +1587,7 @@ bool sw_line_project_and_clip(sw_vertex_t* v0, sw_vertex_t* v1) return true; } -#define DEFINE_LINE_RASTER(FUNC_NAME, ENABLE_DEPTH_TEST) \ +#define DEFINE_LINE_RASTER(FUNC_NAME, ENABLE_DEPTH_TEST, ENABLE_COLOR_BLEND) \ void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1) \ { \ int x1 = (int)v0->screen[0]; \ @@ -1417,11 +1621,11 @@ void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1) \ int decInc = (longLen == 0) ? 0 \ : (shortLen << 16) / longLen; \ \ - const int fb_width = RLSW.framebuffer.width; \ - const float z_diff = z2 - z1; \ + const int fbWidth = RLSW.framebuffer.width; \ + const float zDiff = z2 - z1; \ \ - uint8_t* color_buffer = RLSW.framebuffer.color; \ - uint16_t* depth_buffer = RLSW.framebuffer.depth; \ + uint8_t* colorBuffer = RLSW.framebuffer.color; \ + uint16_t* depthBuffer = RLSW.framebuffer.depth; \ \ int j = 0; \ if (yLonger) { \ @@ -1430,10 +1634,10 @@ void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1) \ \ int x = x1 + (j >> 16); \ int y = y1 + i; \ - float z = z1 + t * z_diff; \ - int pixel_index = y * fb_width + x; \ + float z = z1 + t * zDiff; \ + int pixel_index = y * fbWidth + x; \ \ - uint16_t* dptr = &depth_buffer[pixel_index]; \ + uint16_t* dptr = &depthBuffer[pixel_index]; \ if (ENABLE_DEPTH_TEST) { \ float depth = (float)(*dptr) / UINT16_MAX; \ if (z > depth) continue; \ @@ -1442,11 +1646,27 @@ void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1) \ *dptr = (uint16_t)(z * UINT16_MAX); \ \ int color_index = 4 * pixel_index; \ - uint8_t* cptr = &color_buffer[color_index]; \ + uint8_t* cptr = &colorBuffer[color_index]; \ \ - for (int j = 0; j < 4; j++) { \ - float finalColor = sw_lerp(v0->color[j], v1->color[j], t); \ - cptr[j] = (uint8_t)(finalColor * 255); \ + if (ENABLE_COLOR_BLEND) \ + { \ + float dstColor[4]; \ + float srcColor[4]; \ + for (int j = 0; j < 4; j++) { \ + dstColor[j] = (float)cptr[i] / 255; \ + srcColor[j] = sw_lerp(v0->color[j], v1->color[j], t); \ + } \ + sw_blend_colors(dstColor, srcColor); \ + for (int j = 0; j < 4; j++) { \ + cptr[j] = (uint8_t)(dstColor[j] * 255); \ + } \ + } \ + else \ + { \ + for (int j = 0; j < 4; j++) { \ + float finalColor = sw_lerp(v0->color[j], v1->color[j], t); \ + cptr[j] = (uint8_t)(finalColor * 255); \ + } \ } \ } \ } \ @@ -1456,10 +1676,10 @@ void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1) \ \ int x = x1 + i; \ int y = y1 + (j >> 16); \ - float z = z1 + t * z_diff; \ - int pixel_index = y * fb_width + x; \ + float z = z1 + t * zDiff; \ + int pixel_index = y * fbWidth + x; \ \ - uint16_t* dptr = &depth_buffer[pixel_index]; \ + uint16_t* dptr = &depthBuffer[pixel_index]; \ if (ENABLE_DEPTH_TEST) { \ float depth = (float)(*dptr) / UINT16_MAX; \ if (z > depth) continue; \ @@ -1468,18 +1688,36 @@ void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1) \ *dptr = (uint16_t)(z * UINT16_MAX); \ \ int color_index = 4 * pixel_index; \ - uint8_t* cptr = &color_buffer[color_index]; \ + uint8_t* cptr = &colorBuffer[color_index]; \ \ - for (int j = 0; j < 4; j++) { \ - float finalColor = sw_lerp(v0->color[j], v1->color[j], t); \ - cptr[j] = (uint8_t)(finalColor * 255); \ + if (ENABLE_COLOR_BLEND) \ + { \ + float dstColor[4]; \ + float srcColor[4]; \ + for (int j = 0; j < 4; j++) { \ + dstColor[j] = (float)cptr[i] / 255; \ + srcColor[j] = sw_lerp(v0->color[j], v1->color[j], t); \ + } \ + sw_blend_colors(dstColor, srcColor); \ + for (int j = 0; j < 4; j++) { \ + cptr[j] = (uint8_t)(dstColor[j] * 255); \ + } \ + } \ + else \ + { \ + for (int j = 0; j < 4; j++) { \ + float finalColor = sw_lerp(v0->color[j], v1->color[j], t); \ + cptr[j] = (uint8_t)(finalColor * 255); \ + } \ } \ } \ } \ } -DEFINE_LINE_RASTER(sw_line_raster, false) -DEFINE_LINE_RASTER(sw_line_raster_depth, true) +DEFINE_LINE_RASTER(sw_line_raster, 0, 0) +DEFINE_LINE_RASTER(sw_line_raster_DEPTH, 1, 0) +DEFINE_LINE_RASTER(sw_line_raster_BLEND, 0, 1) +DEFINE_LINE_RASTER(sw_line_raster_DEPTH_BLEND, 1, 1) static inline void sw_line_render(sw_vertex_t* v0, sw_vertex_t* v1) { @@ -1487,8 +1725,14 @@ static inline void sw_line_render(sw_vertex_t* v0, sw_vertex_t* v1) return; } - if (RLSW.stateFlags & SW_STATE_DEPTH_TEST) { - sw_line_raster_depth(v0, v1); + if (SW_STATE_CHECK(SW_STATE_DEPTH_TEST | SW_STATE_BLEND)) { + sw_line_raster_DEPTH_BLEND(v0, v1); + } + else if (SW_STATE_CHECK(SW_STATE_BLEND)) { + sw_line_raster_BLEND(v0, v1); + } + else if (SW_STATE_CHECK(SW_STATE_DEPTH_TEST)) { + sw_line_raster_DEPTH(v0, v1); } else { sw_line_raster(v0, v1); @@ -1523,6 +1767,54 @@ static inline bool sw_is_face_valid(int face) return (face == SW_FRONT || face == SW_BACK); } +static inline bool sw_is_blend_src_factor_valid(int blend) +{ + bool result = false; + + switch (blend) { + case SW_ZERO: + case SW_ONE: + case SW_SRC_COLOR: + case SW_ONE_MINUS_SRC_COLOR: + case SW_SRC_ALPHA: + case SW_ONE_MINUS_SRC_ALPHA: + case SW_DST_ALPHA: + case SW_ONE_MINUS_DST_ALPHA: + case SW_DST_COLOR: + case SW_ONE_MINUS_DST_COLOR: + case SW_SRC_ALPHA_SATURATE: + result = true; + break; + default: + break; + } + + return result; +} + +static inline bool sw_is_blend_dst_factor_valid(int blend) +{ + bool result = false; + + switch (blend) { + case SW_ZERO: + case SW_ONE: + case SW_SRC_COLOR: + case SW_ONE_MINUS_SRC_COLOR: + case SW_SRC_ALPHA: + case SW_ONE_MINUS_SRC_ALPHA: + case SW_DST_ALPHA: + case SW_ONE_MINUS_DST_ALPHA: + case SW_DST_COLOR: + case SW_ONE_MINUS_DST_COLOR: + result = true; + break; + default: + break; + } + + return result; +} /* === Public Implementation === */ @@ -1565,6 +1857,9 @@ void swInit(int w, int h) RLSW.vertexBuffer[0].normal[1] = 0.0f; RLSW.vertexBuffer[0].normal[2] = 1.0f; + RLSW.srcFactor = SW_SRC_ALPHA; + RLSW.dstFactor = SW_ONE_MINUS_SRC_ALPHA; + RLSW.cullFace = SW_BACK; static const float defTex[3*2*2] = @@ -1610,6 +1905,9 @@ void swEnable(SWstate state) case SW_CULL_FACE: RLSW.stateFlags |= SW_STATE_CULL_FACE; break; + case SW_BLEND: + RLSW.stateFlags |= SW_STATE_BLEND; + break; default: RLSW.errCode = SW_INVALID_ENUM; break; @@ -1628,6 +1926,9 @@ void swDisable(SWstate state) case SW_CULL_FACE: RLSW.stateFlags &= ~SW_STATE_CULL_FACE; break; + case SW_BLEND: + RLSW.stateFlags &= ~SW_STATE_BLEND; + break; default: RLSW.errCode = SW_INVALID_ENUM; break; @@ -1910,6 +2211,17 @@ void swClear(void) } } +void swBlendFunc(SWfactor sfactor, SWfactor dfactor) +{ + if (!sw_is_blend_src_factor_valid(sfactor) + || !sw_is_blend_dst_factor_valid(dfactor)) { + RLSW.errCode = SW_INVALID_ENUM; + return; + } + RLSW.srcFactor = sfactor; + RLSW.dstFactor = dfactor; +} + void swCullFace(SWface face) { if (!sw_is_face_valid(face)) { From ff1849dc97ee27646c41a3f0f9e488c166fce98a Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Thu, 13 Mar 2025 01:47:16 +0100 Subject: [PATCH 08/43] fixes and tweaks --- src/external/rlsw.h | 106 +++++++++++++++++++++++++++----------------- 1 file changed, 66 insertions(+), 40 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 8f6fc94ca..90c1a7861 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -253,8 +253,8 @@ void swTranslatef(float x, float y, float z); void swRotatef(float angle, float x, float y, float z); void swScalef(float x, float y, float z); void swMultMatrixf(const float* mat); -void swFrustum(float left, float right, float bottom, float top, float znear, float zfar); -void swOrtho(float left, float right, float bottom, float top, float znear, float zfar); +void swFrustum(double left, double right, double bottom, double top, double znear, double zfar); +void swOrtho(double left, double right, double bottom, double top, double znear, double zfar); void swViewport(int x, int y, int width, int height); @@ -1115,7 +1115,7 @@ static inline void sw_triangle_project_and_clip(sw_vertex_t polygon[SW_MAX_CLIPP float x2 = polygon[2].homogeneous[0], y2 = polygon[2].homogeneous[1]; float sgnArea = (x1 - x0) * (y2 - y0) - (x2 - x0) * (y1 - y0); - if ((RLSW.cullFace == SW_BACK && sgnArea >= 0) || (RLSW.cullFace == SW_FRONT && sgnArea <= 0)) { + if ((RLSW.cullFace == SW_FRONT && sgnArea >= 0) || (RLSW.cullFace == SW_BACK && sgnArea <= 0)) { *vertexCounter = 0; return; } @@ -2076,33 +2076,40 @@ void swRotatef(float angle, float x, float y, float z) { angle *= SW_DEG2RAD; - sw_matrix_t mat; - sw_matrix_id(mat); - float lengthSq = x*x + y*y + z*z; if (lengthSq != 1.0f && lengthSq != 0.0f) { - float invLenght = 1.0f / lengthSq; - x *= invLenght; - y *= invLenght; - z *= invLenght; + float invLength = 1.0f/sqrtf(lengthSq); + x *= invLength; + y *= invLength; + z *= invLength; } float sinres = sinf(angle); float cosres = cosf(angle); float t = 1.0f - cosres; - mat[0] = x*x*t + cosres; - mat[1] = y*x*t + z*sinres; - mat[2] = z*x*t - y*sinres; + sw_matrix_t mat; + + mat[0] = x*x*t + cosres; + mat[1] = y*x*t + z*sinres; + mat[2] = z*x*t - y*sinres; + mat[3] = 0.0f; - mat[4] = x*y*t - z*sinres; - mat[5] = y*y*t + cosres; - mat[6] = z*y*t + x*sinres; + mat[4] = x*y*t - z*sinres; + mat[5] = y*y*t + cosres; + mat[6] = z*y*t + x*sinres; + mat[7] = 0.0f; - mat[8] = x*z*t + y*sinres; - mat[9] = y*z*t - x*sinres; + mat[8] = x*z*t + y*sinres; + mat[9] = y*z*t - x*sinres; mat[10] = z*z*t + cosres; + mat[11] = 0.0f; + + mat[12] = 0.0f; + mat[13] = 0.0f; + mat[14] = 0.0f; + mat[15] = 1.0f; sw_matrix_mul(*RLSW.currentMatrix, mat, *RLSW.currentMatrix); } @@ -2124,44 +2131,63 @@ void swMultMatrixf(const float* mat) sw_matrix_mul(*RLSW.currentMatrix, *RLSW.currentMatrix, mat); } -void swFrustum(float left, float right, float bottom, float top, float znear, float zfar) +void swFrustum(double left, double right, double bottom, double top, double znear, double zfar) { - sw_matrix_t mat = { 0 }; + sw_matrix_t mat; - float rl = right - left; - float tb = top - bottom; - float fn = zfar - znear; + double rl = right - left; + double tb = top - bottom; + double fn = zfar - znear; - mat[0] = (znear * 2.0f) / rl; - mat[5] = (znear * 2.0f) / tb; + mat[0] = (znear*2.0)/rl; + mat[1] = 0.0f; + mat[2] = 0.0f; + mat[3] = 0.0f; - mat[8] = (right + left) / rl; - mat[9] = (top + bottom) / tb; - mat[10] = -(zfar + znear) / fn; + mat[4] = 0.0f; + mat[5] = (znear*2.0)/tb; + mat[6] = 0.0f; + mat[7] = 0.0f; + + mat[8] = (right + left)/rl; + mat[9] = (top + bottom)/tb; + mat[10] = -(zfar + znear)/fn; mat[11] = -1.0f; - mat[14] = -(zfar * znear * 2.0f) / fn; + mat[12] = 0.0f; + mat[13] = 0.0f; + mat[14] = -(zfar*znear*2.0)/fn; + mat[15] = 0.0f; sw_matrix_mul(*RLSW.currentMatrix, *RLSW.currentMatrix, mat); } -void swOrtho(float left, float right, float bottom, float top, float znear, float zfar) +void swOrtho(double left, double right, double bottom, double top, double znear, double zfar) { - sw_matrix_t mat = { 0 }; + sw_matrix_t mat; + + double rl = right - left; + double tb = top - bottom; + double fn = zfar - znear; - float rl = (right - left); - float tb = (top - bottom); - float fn = (zfar - znear); + mat[0] = 2.0f/rl; + mat[1] = 0.0f; + mat[2] = 0.0f; + mat[3] = 0.0f; - mat[0] = 2.0f / rl; - mat[5] = 2.0f / tb; + mat[4] = 0.0f; + mat[5] = 2.0f/tb; + mat[6] = 0.0f; + mat[7] = 0.0f; - mat[10] = -2.0f / fn; + mat[8] = 0.0f; + mat[9] = 0.0f; + mat[10] = -2.0f/fn; mat[11] = 0.0f; - mat[12] = -(left + right) / rl; - mat[13] = -(top + bottom) / tb; - mat[14] = -(zfar + znear) / fn; + mat[12] = -(left + right)/rl; + mat[13] = -(top + bottom)/tb; + mat[14] = -(zfar + znear)/fn; mat[15] = 1.0f; sw_matrix_mul(*RLSW.currentMatrix, *RLSW.currentMatrix, mat); From a1b2ecbded5b7ab1f7ec36f7be84f583f89afbfe Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Thu, 13 Mar 2025 01:58:15 +0100 Subject: [PATCH 09/43] add clear buffer bitmasks --- src/external/rlsw.h | 36 +++++++++++++++++++++++++++++++----- 1 file changed, 31 insertions(+), 5 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 90c1a7861..04b2784d4 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -71,6 +71,9 @@ #define GL_CULL_FACE 0x0B44 #define GL_BLEND 0x0BE2 +#define GL_COLOR_BUFFER_BIT 0x00004000 +#define GL_DEPTH_BUFFER_BIT 0x00000100 + #define GL_MODELVIEW 0x1700 #define GL_PROJECTION 0x1701 #define GL_TEXTURE 0x1702 @@ -141,6 +144,11 @@ typedef enum { SW_BLEND = GL_BLEND } SWstate; +typedef enum { + SW_COLOR_BUFFER_BIT = GL_COLOR_BUFFER_BIT, + SW_DEPTH_BUFFER_BIT = GL_DEPTH_BUFFER_BIT +} SWbuffer; + typedef enum { SW_PROJECTION = GL_PROJECTION, SW_MODELVIEW = GL_MODELVIEW, @@ -259,7 +267,7 @@ void swOrtho(double left, double right, double bottom, double top, double znear, void swViewport(int x, int y, int width, int height); void swClearColor(float r, float g, float b, float a); -void swClear(void); +void swClear(uint32_t bitmask); void swBlendFunc(SWfactor sfactor, SWfactor dfactor); void swCullFace(SWface face); @@ -2227,13 +2235,31 @@ void swClearColor(float r, float g, float b, float a) RLSW.clearColor[3] = a * 255; } -void swClear(void) +void swClear(uint32_t bitmask) { int size = RLSW.framebuffer.width * RLSW.framebuffer.height; - for (int i = 0; i < size; i++) { - ((uint32_t*)RLSW.framebuffer.color)[i] = *((uint32_t*)RLSW.clearColor); - RLSW.framebuffer.depth[i] = RLSW.clearDepth; + uint32_t* cptr = (uint32_t*)RLSW.framebuffer.color; + uint16_t* dptr = RLSW.framebuffer.depth; + + uint32_t c = *((uint32_t*)RLSW.clearColor); + uint16_t d = RLSW.clearDepth; + + if ((bitmask & (SW_COLOR_BUFFER_BIT | SW_DEPTH_BUFFER_BIT)) == (SW_COLOR_BUFFER_BIT | SW_DEPTH_BUFFER_BIT)) { + for (int i = 0; i < size; i++) { + cptr[i] = c; + dptr[i] = d; + } + } + else if (bitmask & (SW_COLOR_BUFFER_BIT)) { + for (int i = 0; i < size; i++) { + cptr[i] = c; + } + } + else if (bitmask & SW_DEPTH_BUFFER_BIT) { + for (int i = 0; i < size; i++) { + dptr[i] = d; + } } } From 0b713cdfba89c0513888622539f47a0348005b42 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Thu, 13 Mar 2025 03:30:41 +0100 Subject: [PATCH 10/43] small optimizations / tweaks --- src/external/rlsw.h | 89 ++++++++++++++++++++++++++++----------------- 1 file changed, 56 insertions(+), 33 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 04b2784d4..c6254841a 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -324,6 +324,7 @@ void swBindTexture(uint32_t id); #ifdef RLSW_IMPL #include +#include #include /* === Defines and Macros === */ @@ -519,10 +520,10 @@ static inline float sw_lerp(float a, float b, float t) return a + t * (b - a); } -static inline sw_vertex_t sw_lerp_vertex(const sw_vertex_t* a, const sw_vertex_t* b, float t) +static inline sw_vertex_t sw_lerp_vertex_PNTCH(const sw_vertex_t* a, const sw_vertex_t* b, float t) { sw_vertex_t result; - for (int i = 0; i < sizeof(sw_vertex_t) / sizeof(float); i++) { + for (int i = 0; i < offsetof(sw_vertex_t, screen) / sizeof(float); i++) { ((float*)&result)[i] = sw_lerp(((float*)a)[i], ((float*)b)[i], t); } return result; @@ -1031,7 +1032,7 @@ static inline bool sw_triangle_clip_w(sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON for (int i = 0; i < inputCounter; i++) { char currDot = (input[i].homogeneous[3] < SW_CLIP_EPSILON) ? -1 : 1; if (prevDot*currDot < 0) { - polygon[(*vertexCounter)++] = sw_lerp_vertex(prevVt, &input[i], + polygon[(*vertexCounter)++] = sw_lerp_vertex_PNTCH(prevVt, &input[i], (SW_CLIP_EPSILON - prevVt->homogeneous[3]) / (input[i].homogeneous[3] - prevVt->homogeneous[3])); } if (currDot > 0) { @@ -1070,7 +1071,7 @@ static inline bool sw_triangle_clip_xyz(sw_vertex_t polygon[SW_MAX_CLIPPED_POLYG for (int i = 0; i < inputCounter; i++) { char currDot = (input[i].homogeneous[iAxis] <= input[i].homogeneous[3]) ? 1 : -1; if (prevDot * currDot <= 0) { - polygon[(*vertexCounter)++] = sw_lerp_vertex(prevVt, &input[i], (prevVt->homogeneous[3] - prevVt->homogeneous[iAxis]) / + polygon[(*vertexCounter)++] = sw_lerp_vertex_PNTCH(prevVt, &input[i], (prevVt->homogeneous[3] - prevVt->homogeneous[iAxis]) / ((prevVt->homogeneous[3] - prevVt->homogeneous[iAxis]) - (input[i].homogeneous[3] - input[i].homogeneous[iAxis]))); } if (currDot > 0) { @@ -1096,7 +1097,7 @@ static inline bool sw_triangle_clip_xyz(sw_vertex_t polygon[SW_MAX_CLIPPED_POLYG for (int i = 0; i < inputCounter; i++) { char currDot = (-input[i].homogeneous[iAxis] <= input[i].homogeneous[3]) ? 1 : -1; if (prevDot*currDot <= 0) { - polygon[(*vertexCounter)++] = sw_lerp_vertex(prevVt, &input[i], (prevVt->homogeneous[3] + prevVt->homogeneous[iAxis]) / + polygon[(*vertexCounter)++] = sw_lerp_vertex_PNTCH(prevVt, &input[i], (prevVt->homogeneous[3] + prevVt->homogeneous[iAxis]) / ((prevVt->homogeneous[3] + prevVt->homogeneous[iAxis]) - (input[i].homogeneous[3] + input[i].homogeneous[iAxis]))); } if (currDot > 0) { @@ -1162,17 +1163,13 @@ static inline void sw_triangle_project_and_clip(sw_vertex_t polygon[SW_MAX_CLIPP static inline void FUNC_NAME(const sw_texture_t* tex, const sw_vertex_t* start, \ const sw_vertex_t* end, float yDu, float yDv) \ { \ - /* Calculate the horizontal width and avoid division by zero */ \ - float dx = end->screen[0] - start->screen[0]; \ - if (fabsf(dx) < 1e-4f) return; \ - \ /* Convert and center the screen coordinates */ \ int xStart = (int)(start->screen[0] + 0.5f); \ int xEnd = (int)(end->screen[0] + 0.5f); \ int y = (int)(start->screen[1] + 0.5f); \ \ /* Calculate the initial interpolation parameter and its increment */ \ - float dt = 1.0f / dx; \ + float dt = 1.0f / (end->screen[0] - start->screen[0]); \ float t = (xStart - start->screen[0]) * dt; \ \ float xDu, xDv; \ @@ -1295,20 +1292,20 @@ static inline void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1, const float x1 = v1->screen[0], y1 = v1->screen[1]; \ float x2 = v2->screen[0], y2 = v2->screen[1]; \ \ - /* Reject degenerate triangles */ \ - float height = y2 - y0; \ - if (height < 1e-4f) return; \ + /* Compute height differences */ \ + float h20 = y2 - y0; \ + float h10 = y1 - y0; \ + float h21 = y2 - y1; \ \ - /* Precompute the inverse of the triangle height and */ \ - /* edge lengths with checks to avoid division by zero. */ \ - float inv_height = 1.0f / height; \ - float inv_y1y0 = (y1 - y0 > 1e-4f) ? 1.0f / (y1 - y0) : 0.0f; \ - float inv_y2y1 = (y2 - y1 > 1e-4f) ? 1.0f / (y2 - y1) : 0.0f; \ + /* Precompute the inverse values without additional checks */ \ + float invH20 = (h20 > 1e-6f) ? 1.0f / h20 : 0.0f; \ + float invH10 = (h10 > 1e-6f) ? 1.0f / h10 : 0.0f; \ + float invH21 = (h21 > 1e-6f) ? 1.0f / h21 : 0.0f; \ \ /* Pre-calculation of slopes (dx/dy) */ \ - float dx02 = (x2 - x0) * inv_height; \ - float dx01 = (x1 - x0) * inv_y1y0; \ - float dx12 = (x2 - x1) * inv_y2y1; \ + float dx02 = (x2 - x0) * invH20; \ + float dx01 = (x1 - x0) * invH10; \ + float dx12 = (x2 - x1) * invH21; \ \ /* Y bounds (vertical clipping) */ \ int yTop = (int)(y0 + 0.5f); \ @@ -1318,8 +1315,8 @@ static inline void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1, const /* Global calculation of vertical texture gradients for the triangle */ \ float yDu, yDv; \ if (ENABLE_TEXTURE) { \ - yDu = (v2->texcoord[0] - v0->texcoord[0]) * inv_height; \ - yDv = (v2->texcoord[1] - v0->texcoord[1]) * inv_height; \ + yDu = (v2->texcoord[0] - v0->texcoord[0]) * invH20; \ + yDv = (v2->texcoord[1] - v0->texcoord[1]) * invH20; \ } \ \ /* Initializing scanline variables */ \ @@ -1328,22 +1325,35 @@ static inline void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1, const \ /* Scanline for the upper part of the triangle */ \ for (int y = yTop; y < yMiddle; y++) { \ + \ + /* Discard the lines that are degenerate */ \ + if (fabsf(xRight - xLeft) <= 1e-6f) { \ + goto discardTL; \ + } \ + \ + /* Calculation of interpolation factors */ \ float dy = (float)y - y0; \ - float t1 = dy * inv_height; \ - float t2 = dy * inv_y1y0; \ + float t1 = dy * invH20; \ + float t2 = dy * invH10; \ \ /* Vertex interpolation */ \ - start = sw_lerp_vertex(v0, v2, t1); \ - end = sw_lerp_vertex(v0, v1, t2); \ + start = sw_lerp_vertex_PNTCH(v0, v2, t1); \ + end = sw_lerp_vertex_PNTCH(v0, v1, t2); \ start.screen[0] = xLeft; \ start.screen[1] = (float)y; \ end.screen[0] = xRight; \ end.screen[1] = (float)y; \ \ - if (xLeft > xRight) { sw_vertex_t tmp = start; start = end; end = tmp; } \ + if (xLeft > xRight) { \ + sw_vertex_t tmp = start; \ + start = end; \ + end = tmp; \ + } \ + \ FUNC_SCANLINE(tex, &start, &end, yDu, yDv); \ \ /* Incremental update */ \ + discardTL: \ xLeft += dx02; \ xRight += dx01; \ } \ @@ -1351,22 +1361,35 @@ static inline void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1, const /* Scanline for the lower part of the triangle */ \ xRight = x1; /* Restart the right side from the second vertex */ \ for (int y = yMiddle; y < yBottom; y++) { \ + \ + /* Discard the lines that are degenerate */ \ + if (fabsf(xRight - xLeft) <= 1e-6f) { \ + goto discardBL; \ + } \ + \ + /* Calculation of interpolation factors */ \ float dy = (float)y - y0; \ - float t1 = dy * inv_height; \ - float t2 = (float)(y - y1) * inv_y2y1; \ + float t1 = dy * invH20; \ + float t2 = (float)(y - y1) * invH21; \ \ /* Vertex interpolation */ \ - start = sw_lerp_vertex(v0, v2, t1); \ - end = sw_lerp_vertex(v1, v2, t2); \ + start = sw_lerp_vertex_PNTCH(v0, v2, t1); \ + end = sw_lerp_vertex_PNTCH(v1, v2, t2); \ start.screen[0] = xLeft; \ start.screen[1] = (float)y; \ end.screen[0] = xRight; \ end.screen[1] = (float)y; \ \ - if (xLeft > xRight) { sw_vertex_t tmp = start; start = end; end = tmp; } \ + if (xLeft > xRight) { \ + sw_vertex_t tmp = start; \ + start = end; \ + end = tmp; \ + } \ + \ FUNC_SCANLINE(tex, &start, &end, yDu, yDv); \ \ /* Incremental update */ \ + discardBL: \ xLeft += dx02; \ xRight += dx12; \ } \ From 363f3afa60ce4bd721629d7d811dcac981cc5c3a Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Thu, 13 Mar 2025 04:19:12 +0100 Subject: [PATCH 11/43] review ndc to screen projection --- src/external/rlsw.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index c6254841a..83bf21f9a 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -1010,7 +1010,7 @@ static inline void sw_blend_colors(float dst[4], float src[4]) static inline void sw_project_ndc_to_screen(float screen[2], const float ndc[4]) { screen[0] = RLSW.vpPos[0] + (ndc[0] + 1.0f) * 0.5f * RLSW.vpDim[0]; - screen[1] = RLSW.vpPos[1] + (ndc[1] + 1.0f) * 0.5f * RLSW.vpDim[1]; + screen[1] = RLSW.vpPos[1] + (1.0f - ndc[1]) * 0.5f * RLSW.vpDim[1]; } From 6b8978cb3e0f84e6cc3090ad4b98cd1d00a41bff Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Thu, 13 Mar 2025 20:53:18 +0100 Subject: [PATCH 12/43] avoid to recalculate MVP when its not needed + tweaks --- src/external/rlsw.h | 35 ++++++++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 9 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 83bf21f9a..5ef11bf58 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -430,6 +430,7 @@ typedef struct { SWmatrix currentMatrixMode; // Current matrix mode (e.g., sw_MODELVIEW, sw_PROJECTION) bool modelMatrixUsed; // Flag indicating if the model matrix is used + bool needToUpdateMVP; SWfactor srcFactor; SWfactor dstFactor; @@ -1467,7 +1468,7 @@ static inline void sw_triangle_render(const sw_vertex_t* v0, const sw_vertex_t* /* === Line Rendering Part === */ -uint8_t sw_line_clip_encode_2d(const float screen[2], int xMin, int yMin, int xMax, int yMax) +static inline uint8_t sw_line_clip_encode_2d(const float screen[2], int xMin, int yMin, int xMax, int yMax) { uint8_t code = SW_CLIP_INSIDE; if (screen[0] < xMin) code |= SW_CLIP_LEFT; @@ -1477,7 +1478,7 @@ uint8_t sw_line_clip_encode_2d(const float screen[2], int xMin, int yMin, int xM return code; } -bool sw_line_clip_2d(sw_vertex_t* v1, sw_vertex_t* v2) +static inline bool sw_line_clip_2d(sw_vertex_t* v1, sw_vertex_t* v2) { int xMin = RLSW.vpMin[0]; int yMin = RLSW.vpMin[1]; @@ -1531,7 +1532,7 @@ bool sw_line_clip_2d(sw_vertex_t* v1, sw_vertex_t* v2) return accept; } -bool sw_line_clip_coord_3d(float q, float p, float* t1, float* t2) +static inline bool sw_line_clip_coord_3d(float q, float p, float* t1, float* t2) { if (fabsf(p) < SW_CLIP_EPSILON) { // Check if the line is entirely outside the window @@ -1552,7 +1553,7 @@ bool sw_line_clip_coord_3d(float q, float p, float* t1, float* t2) return 1; } -bool sw_line_clip_3d(sw_vertex_t* v1, sw_vertex_t* v2) +static inline bool sw_line_clip_3d(sw_vertex_t* v1, sw_vertex_t* v2) { // TODO: Lerp all vertices here, not just homogeneous coordinates @@ -1587,7 +1588,7 @@ bool sw_line_clip_3d(sw_vertex_t* v1, sw_vertex_t* v2) return true; } -bool sw_line_project_and_clip(sw_vertex_t* v0, sw_vertex_t* v1) +static inline bool sw_line_project_and_clip(sw_vertex_t* v0, sw_vertex_t* v1) { sw_vec4_transform(v0->homogeneous, v0->position, RLSW.matMVP); sw_vec4_transform(v1->homogeneous, v1->position, RLSW.matMVP); @@ -1619,7 +1620,7 @@ bool sw_line_project_and_clip(sw_vertex_t* v0, sw_vertex_t* v1) } #define DEFINE_LINE_RASTER(FUNC_NAME, ENABLE_DEPTH_TEST, ENABLE_COLOR_BLEND) \ -void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1) \ +static inline void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1) \ { \ int x1 = (int)v0->screen[0]; \ int y1 = (int)v0->screen[1]; \ @@ -2089,6 +2090,8 @@ void swPopMatrix(void) void swLoadIdentity(void) { sw_matrix_id(*RLSW.currentMatrix); + + RLSW.needToUpdateMVP = true; } void swTranslatef(float x, float y, float z) @@ -2101,6 +2104,8 @@ void swTranslatef(float x, float y, float z) mat[14] = z; sw_matrix_mul(*RLSW.currentMatrix, mat, *RLSW.currentMatrix); + + RLSW.needToUpdateMVP = true; } void swRotatef(float angle, float x, float y, float z) @@ -2143,6 +2148,8 @@ void swRotatef(float angle, float x, float y, float z) mat[15] = 1.0f; sw_matrix_mul(*RLSW.currentMatrix, mat, *RLSW.currentMatrix); + + RLSW.needToUpdateMVP = true; } void swScalef(float x, float y, float z) @@ -2155,11 +2162,15 @@ void swScalef(float x, float y, float z) mat[12] = 0, mat[13] = 0, mat[14] = 0, mat[15] = 1; sw_matrix_mul(*RLSW.currentMatrix, mat, *RLSW.currentMatrix); + + RLSW.needToUpdateMVP = true; } void swMultMatrixf(const float* mat) { sw_matrix_mul(*RLSW.currentMatrix, *RLSW.currentMatrix, mat); + + RLSW.needToUpdateMVP = true; } void swFrustum(double left, double right, double bottom, double top, double znear, double zfar) @@ -2191,6 +2202,8 @@ void swFrustum(double left, double right, double bottom, double top, double znea mat[15] = 0.0f; sw_matrix_mul(*RLSW.currentMatrix, *RLSW.currentMatrix, mat); + + RLSW.needToUpdateMVP = true; } void swOrtho(double left, double right, double bottom, double top, double znear, double zfar) @@ -2222,6 +2235,8 @@ void swOrtho(double left, double right, double bottom, double top, double znear, mat[15] = 1.0f; sw_matrix_mul(*RLSW.currentMatrix, *RLSW.currentMatrix, mat); + + RLSW.needToUpdateMVP = true; } void swViewport(int x, int y, int width, int height) @@ -2394,9 +2409,11 @@ void swVertex4fv(const float* v) if (RLSW.vertexCounter == neededVertices) { - // TODO: Optimize MVP calculation - sw_matrix_mul(RLSW.matMVP, RLSW.matModel, RLSW.matView); - sw_matrix_mul(RLSW.matMVP, RLSW.matMVP, RLSW.matProjection); + if (RLSW.needToUpdateMVP) { + RLSW.needToUpdateMVP = false; + sw_matrix_mul(RLSW.matMVP, RLSW.matModel, RLSW.matView); + sw_matrix_mul(RLSW.matMVP, RLSW.matMVP, RLSW.matProjection); + } switch (RLSW.drawMode) { case SW_POINTS: From 9be90fd5e6c089a278ac77dd99a9d47fd2c8ad97 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Thu, 13 Mar 2025 21:54:18 +0100 Subject: [PATCH 13/43] review the loading and management of textures to be closer to the OpenGL API --- src/external/rlsw.h | 225 ++++++++++++++++++++++++++++++++------------ 1 file changed, 163 insertions(+), 62 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 5ef11bf58..9f02ddd4a 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -134,6 +134,19 @@ #define GL_STACK_UNDERFLOW 0x0504 #define GL_OUT_OF_MEMORY 0x0505 +#define GL_ALPHA 0x1906 +#define GL_LUMINANCE 0x1909 +#define GL_LUMINANCE_ALPHA 0x190A +#define GL_RGB 0x1907 +#define GL_RGBA 0x1908 + +#define GL_BYTE 0x1400 +#define GL_UNSIGNED_BYTE 0x1401 +#define GL_SHORT 0x1402 +#define GL_UNSIGNED_SHORT 0x1403 +#define GL_INT 0x1404 +#define GL_UNSIGNED_INT 0x1405 +#define GL_FLOAT 0x1406 /* === RLSW Enums === */ @@ -189,31 +202,21 @@ typedef enum { } SWfactor; typedef enum { - SW_PIXELFORMAT_UNCOMPRESSED_GRAYSCALE = 1, // 8 bit per pixel (no alpha) - SW_PIXELFORMAT_UNCOMPRESSED_GRAY_ALPHA, // 8*2 bpp (2 channels) - SW_PIXELFORMAT_UNCOMPRESSED_R5G6B5, // 16 bpp - SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8, // 24 bpp - SW_PIXELFORMAT_UNCOMPRESSED_R5G5B5A1, // 16 bpp (1 bit alpha) - SW_PIXELFORMAT_UNCOMPRESSED_R4G4B4A4, // 16 bpp (4 bit alpha) - SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8A8, // 32 bpp - SW_PIXELFORMAT_UNCOMPRESSED_R32, // 32 bpp (1 channel - float) - SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32, // 32*3 bpp (3 channels - float) - SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32A32, // 32*4 bpp (4 channels - float) - SW_PIXELFORMAT_UNCOMPRESSED_R16, // 16 bpp (1 channel - half float) - SW_PIXELFORMAT_UNCOMPRESSED_R16G16B16, // 16*3 bpp (3 channels - half float) - SW_PIXELFORMAT_UNCOMPRESSED_R16G16B16A16, // 16*4 bpp (4 channels - half float) - SW_PIXELFORMAT_COMPRESSED_DXT1_RGB, // 4 bpp (no alpha) - SW_PIXELFORMAT_COMPRESSED_DXT1_RGBA, // 4 bpp (1 bit alpha) - SW_PIXELFORMAT_COMPRESSED_DXT3_RGBA, // 8 bpp - SW_PIXELFORMAT_COMPRESSED_DXT5_RGBA, // 8 bpp - SW_PIXELFORMAT_COMPRESSED_ETC1_RGB, // 4 bpp - SW_PIXELFORMAT_COMPRESSED_ETC2_RGB, // 4 bpp - SW_PIXELFORMAT_COMPRESSED_ETC2_EAC_RGBA, // 8 bpp - SW_PIXELFORMAT_COMPRESSED_PVRT_RGB, // 4 bpp - SW_PIXELFORMAT_COMPRESSED_PVRT_RGBA, // 4 bpp - SW_PIXELFORMAT_COMPRESSED_ASTC_4x4_RGBA, // 8 bpp - SW_PIXELFORMAT_COMPRESSED_ASTC_8x8_RGBA // 2 bpp -} SWpixelformat; + SW_LUMINANCE = GL_LUMINANCE, + SW_LUMINANCE_ALPHA = GL_LUMINANCE_ALPHA, + SW_RGB = GL_RGB, + SW_RGBA = GL_RGBA, +} SWformat; + +typedef enum { + SW_UNSIGNED_BYTE = GL_UNSIGNED_BYTE, + SW_BYTE = GL_BYTE, + SW_UNSIGNED_SHORT = GL_UNSIGNED_SHORT, + SW_SHORT = GL_SHORT, + SW_UNSIGNED_INT = GL_UNSIGNED_INT, + SW_INT = GL_INT, + SW_FLOAT = GL_FLOAT +} SWtype; typedef enum { SW_NEAREST = GL_NEAREST, @@ -312,10 +315,11 @@ void swNormal3fv(const float* v); void swBindArray(SWarray type, void *buffer); void swDrawArrays(SWdraw mode, int offset, int count); -uint32_t swLoadTexture(const void *data, int width, int height, int format, int mipmapCount); -void swUnloadTexture(uint32_t id); +void swGenTextures(int count, uint32_t* textures); +void swDeleteTextures(int count, uint32_t* textures); -void swTextureParameters(uint32_t id, int param, int value); +void swTexImage2D(int width, int height, SWformat format, SWtype type, const void* data); +void swTexParameteri(int param, int value); void swBindTexture(uint32_t id); #endif // RLSW_H @@ -348,6 +352,33 @@ void swBindTexture(uint32_t id); /* === Internal Structs === */ +typedef enum { + SW_PIXELFORMAT_UNCOMPRESSED_GRAYSCALE = 1, // 8 bit per pixel (no alpha) + SW_PIXELFORMAT_UNCOMPRESSED_GRAY_ALPHA, // 8*2 bpp (2 channels) + SW_PIXELFORMAT_UNCOMPRESSED_R5G6B5, // 16 bpp + SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8, // 24 bpp + SW_PIXELFORMAT_UNCOMPRESSED_R5G5B5A1, // 16 bpp (1 bit alpha) + SW_PIXELFORMAT_UNCOMPRESSED_R4G4B4A4, // 16 bpp (4 bit alpha) + SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8A8, // 32 bpp + SW_PIXELFORMAT_UNCOMPRESSED_R32, // 32 bpp (1 channel - float) + SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32, // 32*3 bpp (3 channels - float) + SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32A32, // 32*4 bpp (4 channels - float) + SW_PIXELFORMAT_UNCOMPRESSED_R16, // 16 bpp (1 channel - half float) + SW_PIXELFORMAT_UNCOMPRESSED_R16G16B16, // 16*3 bpp (3 channels - half float) + SW_PIXELFORMAT_UNCOMPRESSED_R16G16B16A16, // 16*4 bpp (4 channels - half float) + SW_PIXELFORMAT_COMPRESSED_DXT1_RGB, // 4 bpp (no alpha) + SW_PIXELFORMAT_COMPRESSED_DXT1_RGBA, // 4 bpp (1 bit alpha) + SW_PIXELFORMAT_COMPRESSED_DXT3_RGBA, // 8 bpp + SW_PIXELFORMAT_COMPRESSED_DXT5_RGBA, // 8 bpp + SW_PIXELFORMAT_COMPRESSED_ETC1_RGB, // 4 bpp + SW_PIXELFORMAT_COMPRESSED_ETC2_RGB, // 4 bpp + SW_PIXELFORMAT_COMPRESSED_ETC2_EAC_RGBA, // 8 bpp + SW_PIXELFORMAT_COMPRESSED_PVRT_RGB, // 4 bpp + SW_PIXELFORMAT_COMPRESSED_PVRT_RGBA, // 4 bpp + SW_PIXELFORMAT_COMPRESSED_ASTC_4x4_RGBA, // 8 bpp + SW_PIXELFORMAT_COMPRESSED_ASTC_8x8_RGBA // 2 bpp +} sw_pixelformat_e; + typedef float sw_matrix_t[4*4]; typedef uint16_t sw_half_t; @@ -531,7 +562,54 @@ static inline sw_vertex_t sw_lerp_vertex_PNTCH(const sw_vertex_t* a, const sw_ve } -/* === Pixel Format Conversion Part === */ +/* === Pixel Format Part === */ + +int sw_get_pixel_format(SWformat format, SWtype type) +{ + int channels = 0; + int bitsPerChannel = 8; // Default: 8 bits per channel + + // Determine the number of channels (format) + switch (format) { + case SW_LUMINANCE: channels = 1; break; + case SW_LUMINANCE_ALPHA: channels = 2; break; + case SW_RGB: channels = 3; break; + case SW_RGBA: channels = 4; break; + default: return -1; // Unknown format + } + + // Determine the depth of each channel (type) + switch (type) { + case SW_UNSIGNED_BYTE: bitsPerChannel = 8; break; + case SW_BYTE: bitsPerChannel = 8; break; + case SW_UNSIGNED_SHORT: bitsPerChannel = 16; break; + case SW_SHORT: bitsPerChannel = 16; break; + case SW_UNSIGNED_INT: bitsPerChannel = 32; break; + case SW_INT: bitsPerChannel = 32; break; + case SW_FLOAT: bitsPerChannel = 32; break; + default: return -1; // Unknown type + } + + // Map the format and type to the correct internal format + if (bitsPerChannel == 8) { + if (channels == 1) return SW_PIXELFORMAT_UNCOMPRESSED_GRAYSCALE; + if (channels == 2) return SW_PIXELFORMAT_UNCOMPRESSED_GRAY_ALPHA; + if (channels == 3) return SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8; + if (channels == 4) return SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8A8; + } + else if (bitsPerChannel == 16) { + if (channels == 1) return SW_PIXELFORMAT_UNCOMPRESSED_R16; + if (channels == 3) return SW_PIXELFORMAT_UNCOMPRESSED_R16G16B16; + if (channels == 4) return SW_PIXELFORMAT_UNCOMPRESSED_R16G16B16A16; + } + else if (bitsPerChannel == 32) { + if (channels == 1) return SW_PIXELFORMAT_UNCOMPRESSED_R32; + if (channels == 3) return SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32; + if (channels == 4) return SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32A32; + } + + return -1; // Unsupported format +} static inline uint32_t sw_cvt_hf_ui(uint16_t h) { @@ -690,7 +768,7 @@ static inline void sw_get_pixel_rgba_32323232(float* color, const void* pixels, color[3] = pixel[3]; } -static inline void sw_get_pixel(float* color, const void* pixels, uint32_t offset, SWpixelformat format) +static inline void sw_get_pixel(float* color, const void* pixels, uint32_t offset, sw_pixelformat_e format) { switch (format) { @@ -1773,7 +1851,7 @@ static inline void sw_line_render(sw_vertex_t* v0, sw_vertex_t* v1) /* === Some Validity Check Helper === */ -static inline bool sw_is_texture_id_valid(uint32_t id) +static inline bool sw_is_texture_valid(uint32_t id) { bool valid = true; @@ -2718,53 +2796,76 @@ void swDrawArrays(SWdraw mode, int offset, int count) swEnd(); } -uint32_t swLoadTexture(const void *data, int width, int height, int format, int mipmapCount) +void swGenTextures(int count, uint32_t* textures) { - if (RLSW.loadedTextureCount >= SW_MAX_TEXTURES) { - RLSW.errCode = SW_STACK_OVERFLOW; //< Out of memory, not really stack overflow - return 0; + if (count == 0 || textures == NULL) { + return; } - sw_texture_t texture = { 0 }; - texture.pixels = data; - texture.width = width; - texture.height = height; - texture.format = format; - texture.minFilter = SW_NEAREST; - texture.magFilter = SW_NEAREST; - texture.sWrap = SW_REPEAT; - texture.tWrap = SW_REPEAT; - texture.tx = 1.0f / width; - texture.ty = 1.0f / height; - (void)mipmapCount; - - uint32_t id = 0; - if (RLSW.freeTextureIdCount > 0) { - id = RLSW.freeTextureIds[--RLSW.freeTextureIdCount]; - } - else { - id = RLSW.loadedTextureCount++; + for (int i = 0; i < count; i++) { + if (RLSW.loadedTextureCount >= SW_MAX_TEXTURES) { + RLSW.errCode = SW_STACK_OVERFLOW; //< Out of memory, not really stack overflow + return; + } + uint32_t id = 0; + if (RLSW.freeTextureIdCount > 0) { + id = RLSW.freeTextureIds[--RLSW.freeTextureIdCount]; + } + else { + id = RLSW.loadedTextureCount++; + } + RLSW.loadedTextures[id] = RLSW.loadedTextures[0]; + textures[i] = id; } +} - RLSW.loadedTextures[id] = texture; +void swDeleteTextures(int count, uint32_t* textures) +{ + if (count == 0 || textures == NULL) { + return; + } - return id; + for (int i = 0; i < count; i++) { + if (!sw_is_texture_valid(textures[i])) { + RLSW.errCode = SW_INVALID_VALUE; + continue; + } + RLSW.loadedTextures[textures[i]].pixels = 0; + RLSW.freeTextureIds[RLSW.freeTextureIdCount++] = textures[i]; + } } -void swUnloadTexture(uint32_t id) +void swTexImage2D(int width, int height, SWformat format, SWtype type, const void* data) { - if (!sw_is_texture_id_valid(id)) { + uint32_t id = RLSW.currentTexture; + + if (!sw_is_texture_valid(id)) { RLSW.errCode = SW_INVALID_VALUE; return; } - RLSW.loadedTextures[id].pixels = 0; - RLSW.freeTextureIds[RLSW.freeTextureIdCount++] = id; + int pixelFormat = sw_get_pixel_format(format, type); + + if (pixelFormat < 0) { + RLSW.errCode = SW_INVALID_ENUM; + return; + } + + sw_texture_t* texture = &RLSW.loadedTextures[id]; + + texture->pixels = data; + texture->width = width; + texture->height = height; + texture->format = pixelFormat; + texture->tx = 1.0f / width; + texture->ty = 1.0f / height; } -void swTextureParameters(uint32_t id, int param, int value) +void swTexParameteri(int param, int value) { - if (!sw_is_texture_id_valid(id)) { + uint32_t id = RLSW.currentTexture; + + if (!sw_is_texture_valid(id)) { RLSW.errCode = SW_INVALID_VALUE; return; } From d3fe48fa7de232db7379d89705ecf420f21495f3 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Thu, 13 Mar 2025 22:54:05 +0100 Subject: [PATCH 14/43] texture sampling optimization --- src/external/rlsw.h | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 9f02ddd4a..c166583cb 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -922,27 +922,26 @@ static inline void sw_texture_sample(float* color, const sw_texture_t* tex, floa // at the wrong moment during rasterization. It would be worth reviewing // this, although the scanline method complicates things. + // Previous method: There is no need to compute the square root + // because using the squared value, the comparison remains `L2 > 1.0f * 1.0f` + //float du = sqrtf(xDu * xDu + yDu * yDu); + //float dv = sqrtf(xDv * xDv + yDv * yDv); + //float L = (du > dv) ? du : dv; + // Calculate the derivatives for each axis - float du = sqrtf(xDu * xDu + yDu * yDu); - float dv = sqrtf(xDv * xDv + yDv * yDv); - float L = (du > dv) ? du : dv; + float du2 = xDu * xDu + yDu * yDu; + float dv2 = xDv * xDv + yDv * yDv; + float L2 = (du2 > dv2) ? du2 : dv2; - // Select the filter based on the size of the footprint - if (L > 1.0f) { - // Minification - if (tex->minFilter == SW_NEAREST) { - sw_texture_sample_nearest(color, tex, u, v); - } else if (tex->minFilter == SW_LINEAR) { - sw_texture_sample_linear(color, tex, u, v); - } - } else { - // Magnification - if (tex->magFilter == SW_NEAREST) { - sw_texture_sample_nearest(color, tex, u, v); - } else if (tex->magFilter == SW_LINEAR) { - sw_texture_sample_linear(color, tex, u, v); - } + bool useMinFilter = (L2 > 1.0f); + int filter = useMinFilter ? tex->minFilter : tex->magFilter; + + if (filter == SW_NEAREST) { + sw_texture_sample_nearest(color, tex, u, v); } + else /* SW_LINEAR */ { + sw_texture_sample_linear(color, tex, u, v); + } } @@ -1949,6 +1948,7 @@ void swInit(int w, int h) RLSW.currentMatrixMode = SW_MODELVIEW; RLSW.currentMatrix = &RLSW.matView; + RLSW.needToUpdateMVP = true; sw_matrix_id(RLSW.matProjection); sw_matrix_id(RLSW.matTexture); From 2666eedeb07020c276a18cfc44879a1de292f466 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Thu, 13 Mar 2025 23:13:34 +0100 Subject: [PATCH 15/43] review get pixel functions + review unorm/float conversion --- src/external/rlsw.h | 45 ++++++++++++++++++++++++--------------------- 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index c166583cb..78c171261 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -337,6 +337,12 @@ void swBindTexture(uint32_t id); #define SW_DEG2RAD (SW_PI/180.0f) #define SW_RAD2DEG (180.0f/SW_PI) +#define SW_FLOAT_TO_UNORM8(x) ((uint8_t)((x) * UINT8_MAX)) +#define SW_FLOAT_TO_UNORM16(x) ((uint16_t)((x) * UINT16_MAX)) + +#define SW_UNORM8_TO_FLOAT(x) ((float)(x) * (1.0f / UINT8_MAX)) +#define SW_UNORM16_TO_FLOAT(x) ((float)(x) * (1.0f / UINT16_MAX)) + #define SW_STATE_CHECK(flags) ((RLSW.stateFlags & (flags)) == (flags)) #define SW_STATE_TEXTURE_2D (1 << 0) @@ -639,7 +645,7 @@ static inline float sw_cvt_hf(sw_half_t y) static inline void sw_get_pixel_grayscale(float* color, const void* pixels, uint32_t offset) { - float gray = (float)((uint8_t*)pixels)[offset] / 255; + float gray = SW_UNORM8_TO_FLOAT(((uint8_t*)pixels)[offset]); color[0] = gray; color[1] = gray; @@ -669,13 +675,10 @@ static inline void sw_get_pixel_red_32(float* color, const void* pixels, uint32_ static inline void sw_get_pixel_grayscale_alpha(float* color, const void* pixels, uint32_t offset) { - float gray = (float)((uint8_t*)pixels)[2 * offset] / 255; - float alpha = (float)((uint8_t*)pixels)[2 * offset + 1] / 255; + const uint8_t* pixelData = (const uint8_t*)pixels + 2 * offset; - color[0] = gray; - color[1] = gray; - color[2] = gray; - color[3] = alpha; + color[0] = color[1] = color[2] = SW_UNORM8_TO_FLOAT(pixelData[0]); + color[3] = SW_UNORM8_TO_FLOAT(pixelData[1]); } static inline void sw_get_pixel_rgb_565(float* color, const void* pixels, uint32_t offset) @@ -690,11 +693,11 @@ static inline void sw_get_pixel_rgb_565(float* color, const void* pixels, uint32 static inline void sw_get_pixel_rgb_888(float* color, const void* pixels, uint32_t offset) { - const uint8_t* pixel = (uint8_t*)pixels + 3 * offset; + const uint8_t* pixel = (const uint8_t*)pixels + 3 * offset; - color[0] = (float)pixel[0] / 255; - color[1] = (float)pixel[1] / 255; - color[2] = (float)pixel[2] / 255; + color[0] = SW_UNORM8_TO_FLOAT(pixel[0]); + color[1] = SW_UNORM8_TO_FLOAT(pixel[1]); + color[2] = SW_UNORM8_TO_FLOAT(pixel[2]); color[3] = 1.0f; } @@ -742,10 +745,10 @@ static inline void sw_get_pixel_rgba_8888(float* color, const void* pixels, uint { const uint8_t *pixel = (uint8_t*)pixels + 4 * offset; - color[0] = (float)pixel[0] / 255; - color[1] = (float)pixel[1] / 255; - color[2] = (float)pixel[2] / 255; - color[3] = (float)pixel[3] / 255; + color[0] = SW_UNORM8_TO_FLOAT(pixel[0]); + color[1] = SW_UNORM8_TO_FLOAT(pixel[1]); + color[2] = SW_UNORM8_TO_FLOAT(pixel[2]); + color[3] = SW_UNORM8_TO_FLOAT(pixel[3]); } static inline void sw_get_pixel_rgba_16161616(float* color, const void* pixels, uint32_t offset) @@ -1292,12 +1295,12 @@ static inline void FUNC_NAME(const sw_texture_t* tex, const sw_vertex_t* start, if (ENABLE_DEPTH_TEST) { \ /* Depth testing with direct access to the depth buffer */ \ /* TODO: Implement different depth funcs? */ \ - float depth = (float)(*dptr) / UINT16_MAX; \ + float depth = SW_UNORM16_TO_FLOAT(*dptr); \ if (z > depth) goto discard; \ } \ \ /* Update the depth buffer */ \ - *dptr = (uint16_t)(z * UINT16_MAX); \ + *dptr = SW_FLOAT_TO_UNORM16(z); \ \ if (ENABLE_COLOR_BLEND) \ { \ @@ -1748,11 +1751,11 @@ static inline void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1) \ \ uint16_t* dptr = &depthBuffer[pixel_index]; \ if (ENABLE_DEPTH_TEST) { \ - float depth = (float)(*dptr) / UINT16_MAX; \ + float depth = SW_UNORM16_TO_FLOAT(*dptr); \ if (z > depth) continue; \ } \ \ - *dptr = (uint16_t)(z * UINT16_MAX); \ + *dptr = SW_FLOAT_TO_UNORM16(z); \ \ int color_index = 4 * pixel_index; \ uint8_t* cptr = &colorBuffer[color_index]; \ @@ -1790,11 +1793,11 @@ static inline void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1) \ \ uint16_t* dptr = &depthBuffer[pixel_index]; \ if (ENABLE_DEPTH_TEST) { \ - float depth = (float)(*dptr) / UINT16_MAX; \ + float depth = SW_UNORM16_TO_FLOAT(*dptr); \ if (z > depth) continue; \ } \ \ - *dptr = (uint16_t)(z * UINT16_MAX); \ + *dptr = SW_FLOAT_TO_UNORM16(z); \ \ int color_index = 4 * pixel_index; \ uint8_t* cptr = &colorBuffer[color_index]; \ From cdf384794d157f518e537be3363743a5e7e2650d Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sat, 15 Mar 2025 04:13:37 +0100 Subject: [PATCH 16/43] add several buffer format support Several depth and color formats have been added for the framebuffer. 8-bit, 16-bit, and 24-bit formats are now available for depth. RGB 8-bit (332), RGB 16-bit (565), and RGB 24-bit (888) formats are now available for color. Alpha support is no longer present for the framebuffer at the moment, but it can easily be restored by adding the formats and reinterpolating the alpha in the areas that do not perform color blending. Additionally, this commit brings performance improvements. --- src/external/rlsw.h | 454 +++++++++++++++++++++++++++++++++----------- 1 file changed, 339 insertions(+), 115 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 78c171261..3b8d4b363 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -39,6 +39,14 @@ # define SW_FREE(ptr) free(ptr) #endif +#ifndef SW_COLOR_BUFFER_BITS +# define SW_COLOR_BUFFER_BITS 24 +#endif + +#ifndef SW_DEPTH_BUFFER_BITS +# define SW_DEPTH_BUFFER_BITS 16 +#endif + #ifndef SW_MAX_PROJECTION_STACK_SIZE # define SW_MAX_PROJECTION_STACK_SIZE 2 #endif @@ -337,11 +345,8 @@ void swBindTexture(uint32_t id); #define SW_DEG2RAD (SW_PI/180.0f) #define SW_RAD2DEG (180.0f/SW_PI) -#define SW_FLOAT_TO_UNORM8(x) ((uint8_t)((x) * UINT8_MAX)) -#define SW_FLOAT_TO_UNORM16(x) ((uint16_t)((x) * UINT16_MAX)) - -#define SW_UNORM8_TO_FLOAT(x) ((float)(x) * (1.0f / UINT8_MAX)) -#define SW_UNORM16_TO_FLOAT(x) ((float)(x) * (1.0f / UINT16_MAX)) +#define SW_COLOR_PIXEL_SIZE (SW_COLOR_BUFFER_BITS / 8) +#define SW_DEPTH_PIXEL_SIZE (SW_DEPTH_BUFFER_BITS / 8) #define SW_STATE_CHECK(flags) ((RLSW.stateFlags & (flags)) == (flags)) @@ -419,16 +424,16 @@ typedef struct { } sw_texture_t; typedef struct { - uint8_t *color; // 32-bit RGBA color buffer - uint16_t *depth; // 16-bit fixed fract buffer + void *color; + void *depth; int width, height; } sw_framebuffer_t; typedef struct { sw_framebuffer_t framebuffer; - uint8_t clearColor[4]; // Color used to clear the screen - uint16_t clearDepth; // Depth value used to clear the screen + float clearColor[4]; // Color used to clear the screen + float clearDepth; // Depth value used to clear the screen uint32_t currentTexture; sw_matrix_t *currentMatrix; @@ -568,9 +573,224 @@ static inline sw_vertex_t sw_lerp_vertex_PNTCH(const sw_vertex_t* a, const sw_ve } +/* === Framebuffer Part === */ + +static inline void sw_load_framebuffer(void** color, void** depth, int w, int h) +{ + int size = w * h; + + *color = SW_MALLOC(SW_COLOR_PIXEL_SIZE * size); + *depth = SW_MALLOC(SW_DEPTH_PIXEL_SIZE * size); + + // TODO: Handle memory allocation failure +} + +static inline void* sw_get_color_address(void* ptr, uint32_t offset) +{ + return (uint8_t*)ptr + offset * SW_COLOR_PIXEL_SIZE; +} + +static inline void sw_inc_color_address(void** ptr) +{ + *ptr = (void*)(((uint8_t*)*ptr) + SW_COLOR_PIXEL_SIZE); +} + +static inline void* sw_get_depth_address(void* ptr, uint32_t offset) +{ + return (uint8_t*)ptr + offset * SW_DEPTH_PIXEL_SIZE; +} + +static inline void sw_inc_depth_address(void** ptr) +{ + *ptr = (void*)(((uint8_t*)*ptr) + SW_DEPTH_PIXEL_SIZE); +} + +static inline void sw_read_color(float dst[4], const void* src) +{ +#if (SW_COLOR_BUFFER_BITS == 8) // RGB - 332 + uint8_t pixel = ((uint8_t*)src)[0]; + dst[0] = ((pixel >> 5) & 0x07) * (1.0f / 7.0f); + dst[1] = ((pixel >> 2) & 0x07) * (1.0f / 7.0f); + dst[2] = (pixel & 0x03) * (1.0f / 3.0f); + dst[3] = 1.0f; +#elif (SW_COLOR_BUFFER_BITS == 16) // RGB - 565 + uint16_t pixel = ((uint16_t*)src)[0]; + dst[0] = ((pixel >> 11) & 0x1F) * (1.0f / 31.0f); + dst[1] = ((pixel >> 5) & 0x3F) * (1.0f / 63.0f); + dst[2] = (pixel & 0x1F) * (1.0f / 31.0f); + dst[3] = 1.0f; +#elif (SW_COLOR_BUFFER_BITS == 24) // RGB - 888 + dst[0] = ((uint8_t*)src)[0] * (1.0f / 255.0f); + dst[1] = ((uint8_t*)src)[1] * (1.0f / 255.0f); + dst[2] = ((uint8_t*)src)[2] * (1.0f / 255.0f); + dst[3] = 1.0f; +#endif +} + +static inline void sw_write_color(void* dst, float color[3]) +{ +#if (SW_COLOR_BUFFER_BITS == 8) // RGB - 332 + uint8_t r = ((uint8_t)(color[0] * UINT8_MAX) >> 5) & 0x07; + uint8_t g = ((uint8_t)(color[1] * UINT8_MAX) >> 5) & 0x07; + uint8_t b = ((uint8_t)(color[2] * UINT8_MAX) >> 6) & 0x03; + ((uint8_t*)dst)[0] = (r << 5) | (g << 2) | b; + +#elif (SW_COLOR_BUFFER_BITS == 16) // RGB - 565 + uint8_t r = (uint8_t)(color[0] * 31.0f + 0.5f) & 0x1F; + uint8_t g = (uint8_t)(color[1] * 63.0f + 0.5f) & 0x3F; + uint8_t b = (uint8_t)(color[2] * 31.0f + 0.5f) & 0x1F; + ((uint16_t*)dst)[0] = (r << 11) | (g << 5) | b; + +#elif (SW_COLOR_BUFFER_BITS == 24) // RGB - 888 + ((uint8_t*)dst)[0] = (uint8_t)(color[0] * UINT8_MAX); + ((uint8_t*)dst)[1] = (uint8_t)(color[1] * UINT8_MAX); + ((uint8_t*)dst)[2] = (uint8_t)(color[2] * UINT8_MAX); +#endif +} + +static inline float sw_read_depth(const void* src) +{ +#if (SW_DEPTH_BUFFER_BITS == 8) + return (float)((uint8_t*)src)[0] * (1.0f / UINT8_MAX); +#elif (SW_DEPTH_BUFFER_BITS == 16) + return (float)((uint16_t*)src)[0] * (1.0f / UINT16_MAX); +#elif (SW_DEPTH_BUFFER_BITS == 24) + uint32_t depth24 = (((uint8_t*)src)[0] << 16) | + (((uint8_t*)src)[1] << 8) | + ((uint8_t*)src)[2]; + return depth24 / (float)0xFFFFFF; +#endif +} + +static inline void sw_write_depth(void* dst, float depth) +{ +#if (SW_DEPTH_BUFFER_BITS == 8) + ((uint8_t*)dst)[0] = (uint8_t)(depth * UINT8_MAX); +#elif (SW_DEPTH_BUFFER_BITS == 16) + ((uint16_t*)dst)[0] = (uint16_t)(depth * UINT16_MAX); +#elif (SW_DEPTH_BUFFER_BITS == 24) + uint32_t depth24 = (uint32_t)(depth * 0xFFFFFF); + ((uint8_t*)dst)[0] = (depth24 >> 16) & 0xFF; + ((uint8_t*)dst)[1] = (depth24 >> 8) & 0xFF; + ((uint8_t*)dst)[2] = depth24 & 0xFF; +#endif +} + +static inline void sw_fill_color(void* ptr, int size, float color[4]) +{ +#if (SW_COLOR_BUFFER_BITS == 8) + uint8_t r = ((uint8_t)(color[0] * UINT8_MAX) >> 5) & 0x07; + uint8_t g = ((uint8_t)(color[1] * UINT8_MAX) >> 5) & 0x07; + uint8_t b = ((uint8_t)(color[2] * UINT8_MAX) >> 6) & 0x03; + uint8_t* p = (uint8_t*)ptr; + for (int i = 0; i < size; i++) { + p[i] = (r << 5) | (g << 2) | b; + } +#elif (SW_COLOR_BUFFER_BITS == 16) + uint8_t r = (uint8_t)(color[0] * 31.0f + 0.5f) & 0x1F; + uint8_t g = (uint8_t)(color[1] * 63.0f + 0.5f) & 0x3F; + uint8_t b = (uint8_t)(color[2] * 31.0f + 0.5f) & 0x1F; + uint16_t* p = (uint16_t*)ptr; + for (int i = 0; i < size; i++) { + p[i] = (r << 11) | (g << 5) | b; + } +#elif (SW_COLOR_BUFFER_BITS == 24) + uint8_t r = (uint8_t)(color[0] * 255); + uint8_t g = (uint8_t)(color[1] * 255); + uint8_t b = (uint8_t)(color[2] * 255); + uint8_t* p = (uint8_t*)ptr; + for (int i = 0; i < size; i++) { + *p++ = r; + *p++ = g; + *p++ = b; + } +#endif +} + +static inline void sw_fill_depth(void* ptr, int size, float value) +{ +#if (SW_DEPTH_BUFFER_BITS == 8) + uint8_t v = value * UINT8_MAX; + uint8_t* p = (uint8_t*)ptr; + for (int i = 0; i < size; i++) { + p[i] = v; + } +#elif (SW_DEPTH_BUFFER_BITS == 16) + uint16_t v = value * UINT16_MAX; + uint16_t* p = (uint16_t*)ptr; + for (int i = 0; i < size; i++) { + p[i] = v; + } +#elif (SW_DEPTH_BUFFER_BITS == 24) + uint32_t v = value * UINT32_MAX; + uint8_t* p = (uint8_t*)ptr; + for (int i = 0; i < size; i++) { + *p++ = (v >> 16) & 0xFF; + *p++ = (v >> 8) & 0xFF; + *p++ = v & 0xFF; + } +#endif +} + +static inline void sw_fill_color_and_depth(void* color_ptr, void* depth_ptr, int size, float color[4], float depth_value) +{ +#if (SW_COLOR_BUFFER_BITS == 8) + uint8_t r = ((uint8_t)(color[0] * UINT8_MAX) >> 5) & 0x07; + uint8_t g = ((uint8_t)(color[1] * UINT8_MAX) >> 5) & 0x07; + uint8_t b = ((uint8_t)(color[2] * UINT8_MAX) >> 6) & 0x03; + uint8_t* color_p = (uint8_t*)color_ptr; +#elif (SW_COLOR_BUFFER_BITS == 16) + uint8_t r = (uint8_t)(color[0] * 31.0f + 0.5f) & 0x1F; + uint8_t g = (uint8_t)(color[1] * 63.0f + 0.5f) & 0x3F; + uint8_t b = (uint8_t)(color[2] * 31.0f + 0.5f) & 0x1F; + uint16_t* color_p = (uint16_t*)color_ptr; +#elif (SW_COLOR_BUFFER_BITS == 24) + uint8_t r = (uint8_t)(color[0] * 255); + uint8_t g = (uint8_t)(color[1] * 255); + uint8_t b = (uint8_t)(color[2] * 255); + uint8_t* color_p = (uint8_t*)color_ptr; +#endif + +#if (SW_DEPTH_BUFFER_BITS == 8) + uint8_t depth_v = depth_value * UINT8_MAX; + uint8_t* depth_p = (uint8_t*)depth_ptr; +#elif (SW_DEPTH_BUFFER_BITS == 16) + uint16_t depth_v = depth_value * UINT16_MAX; + uint16_t* depth_p = (uint16_t*)depth_ptr; +#elif (SW_DEPTH_BUFFER_BITS == 24) + uint32_t depth_v = depth_value * UINT32_MAX; + uint8_t* depth_p = (uint8_t*)depth_ptr; +#endif + + for (int i = 0; i < size; i++) { + // Remplir le buffer de couleurs +#if (SW_COLOR_BUFFER_BITS == 8) + color_p[i] = (r << 5) | (g << 2) | b; +#elif (SW_COLOR_BUFFER_BITS == 16) + color_p[i] = (r << 11) | (g << 5) | b; +#elif (SW_COLOR_BUFFER_BITS == 24) + *color_p++ = r; + *color_p++ = g; + *color_p++ = b; +#endif + + // Remplir le buffer de profondeur +#if (SW_DEPTH_BUFFER_BITS == 8) + depth_p[i] = depth_v; +#elif (SW_DEPTH_BUFFER_BITS == 16) + depth_p[i] = depth_v; +#elif (SW_DEPTH_BUFFER_BITS == 24) + *depth_p++ = (depth_v >> 16) & 0xFF; + *depth_p++ = (depth_v >> 8) & 0xFF; + *depth_p++ = depth_v & 0xFF; +#endif + } +} + + /* === Pixel Format Part === */ -int sw_get_pixel_format(SWformat format, SWtype type) +static inline int sw_get_pixel_format(SWformat format, SWtype type) { int channels = 0; int bitsPerChannel = 8; // Default: 8 bits per channel @@ -645,7 +865,7 @@ static inline float sw_cvt_hf(sw_half_t y) static inline void sw_get_pixel_grayscale(float* color, const void* pixels, uint32_t offset) { - float gray = SW_UNORM8_TO_FLOAT(((uint8_t*)pixels)[offset]); + float gray = (float)((uint8_t*)pixels)[offset] * (1.0f / 255); color[0] = gray; color[1] = gray; @@ -677,8 +897,8 @@ static inline void sw_get_pixel_grayscale_alpha(float* color, const void* pixels { const uint8_t* pixelData = (const uint8_t*)pixels + 2 * offset; - color[0] = color[1] = color[2] = SW_UNORM8_TO_FLOAT(pixelData[0]); - color[3] = SW_UNORM8_TO_FLOAT(pixelData[1]); + color[0] = color[1] = color[2] = (float)pixelData[0] * (1.0f / 255); + color[3] = (float)pixelData[1] * (1.0f / 255); } static inline void sw_get_pixel_rgb_565(float* color, const void* pixels, uint32_t offset) @@ -695,9 +915,9 @@ static inline void sw_get_pixel_rgb_888(float* color, const void* pixels, uint32 { const uint8_t* pixel = (const uint8_t*)pixels + 3 * offset; - color[0] = SW_UNORM8_TO_FLOAT(pixel[0]); - color[1] = SW_UNORM8_TO_FLOAT(pixel[1]); - color[2] = SW_UNORM8_TO_FLOAT(pixel[2]); + color[0] = (float)pixel[0] * (1.0f / 255); + color[1] = (float)pixel[1] * (1.0f / 255); + color[2] = (float)pixel[2] * (1.0f / 255); color[3] = 1.0f; } @@ -745,10 +965,10 @@ static inline void sw_get_pixel_rgba_8888(float* color, const void* pixels, uint { const uint8_t *pixel = (uint8_t*)pixels + 4 * offset; - color[0] = SW_UNORM8_TO_FLOAT(pixel[0]); - color[1] = SW_UNORM8_TO_FLOAT(pixel[1]); - color[2] = SW_UNORM8_TO_FLOAT(pixel[2]); - color[3] = SW_UNORM8_TO_FLOAT(pixel[3]); + color[0] = (float)pixel[0] * (1.0f / 255); + color[1] = (float)pixel[1] * (1.0f / 255); + color[2] = (float)pixel[2] * (1.0f / 255); + color[3] = (float)pixel[3] * (1.0f / 255); } static inline void sw_get_pixel_rgba_16161616(float* color, const void* pixels, uint32_t offset) @@ -1279,12 +1499,14 @@ static inline void FUNC_NAME(const sw_texture_t* tex, const sw_vertex_t* start, } \ \ /* Pre-calculate the starting pointer for the color framebuffer row */ \ - uint8_t* cptrRow = (uint8_t*)((uint32_t*)RLSW.framebuffer.color + y * RLSW.framebuffer.width); \ - uint8_t* cptr = cptrRow + xStart * 4; \ + void* cptr = sw_get_color_address( \ + RLSW.framebuffer.color, y * RLSW.framebuffer.width + xStart \ + ); \ \ /* Pre-calculate the pointer for the depth buffer row */ \ - uint16_t* dptrRow = RLSW.framebuffer.depth + y * RLSW.framebuffer.width + xStart; \ - uint16_t* dptr = dptrRow; \ + void* dptr = sw_get_depth_address( \ + RLSW.framebuffer.depth, y * RLSW.framebuffer.width + xStart \ + ); \ \ /* Scanline rasterization loop */ \ for (int x = xStart; x < xEnd; x++) { \ @@ -1292,66 +1514,73 @@ static inline void FUNC_NAME(const sw_texture_t* tex, const sw_vertex_t* start, float w = 1.0f / (start->homogeneous[3] + t * dw); \ float z = start->homogeneous[2] + t * dz; \ \ - if (ENABLE_DEPTH_TEST) { \ + if (ENABLE_DEPTH_TEST) \ + { \ /* Depth testing with direct access to the depth buffer */ \ /* TODO: Implement different depth funcs? */ \ - float depth = SW_UNORM16_TO_FLOAT(*dptr); \ + float depth = sw_read_depth(dptr); \ if (z > depth) goto discard; \ } \ \ /* Update the depth buffer */ \ - *dptr = SW_FLOAT_TO_UNORM16(z); \ + sw_write_depth(dptr, z); \ \ if (ENABLE_COLOR_BLEND) \ { \ - float dstColor[4] = { 0.0f, 0.0f, 0.0f, 0.0f }; \ - float srcColor[4] = { 1.0f, 1.0f, 1.0f, 1.0f }; \ + float dstColor[4]; \ + sw_read_color(dstColor, cptr); \ \ - if (ENABLE_TEXTURE) { \ + float srcColor[4]; \ + if (ENABLE_TEXTURE) \ + { \ sw_texture_sample(srcColor, tex, u * w, v * w, xDu, yDu, xDv, yDv); \ + srcColor[0] *= (start->color[0] + t * dcol[0]) * w; \ + srcColor[1] *= (start->color[1] + t * dcol[1]) * w; \ + srcColor[2] *= (start->color[2] + t * dcol[2]) * w; \ + srcColor[3] *= (start->color[3] + t * dcol[3]) * w; \ } \ - \ - for (int i = 0; i < 4; i++) { \ - dstColor[i] = (float)cptr[i] / 255; \ - srcColor[i] *= (start->color[i] + t * dcol[i]) * w; \ + else \ + { \ + srcColor[0] = (start->color[0] + t * dcol[0]) * w; \ + srcColor[1] = (start->color[1] + t * dcol[1]) * w; \ + srcColor[2] = (start->color[2] + t * dcol[2]) * w; \ + srcColor[3] = (start->color[3] + t * dcol[3]) * w; \ } \ \ sw_blend_colors(dstColor, srcColor); \ \ - for (int i = 0; i < 4; i++) { \ - cptr[i] = (uint8_t)(sw_saturate(dstColor[i]) * 255); \ - } \ + dstColor[0] = sw_saturate(dstColor[0]); \ + dstColor[1] = sw_saturate(dstColor[1]); \ + dstColor[2] = sw_saturate(dstColor[2]); \ + \ + sw_write_color(cptr, dstColor); \ } \ else \ { \ if (ENABLE_TEXTURE) \ { \ - /* Sample the texture */ \ - float texColor[4]; \ - sw_texture_sample(texColor, tex, u * w, v * w, xDu, yDu, xDv, yDv); \ - \ - /* Interpolate the color and modulate by the texture color */ \ - for (int i = 0; i < 4; i++) { \ - float finalColor = texColor[i]; \ - finalColor *= (start->color[i] + t * dcol[i]) * w; \ - cptr[i] = (uint8_t)(sw_saturate(finalColor) * 255.0f); \ - } \ + float color[4]; \ + sw_texture_sample(color, tex, u * w, v * w, xDu, yDu, xDv, yDv); \ + color[0] = sw_saturate(color[0] * (start->color[0] + t * dcol[0]) * w); \ + color[1] = sw_saturate(color[1] * (start->color[1] + t * dcol[1]) * w); \ + color[2] = sw_saturate(color[2] * (start->color[2] + t * dcol[2]) * w); \ + sw_write_color(cptr, color); \ } \ else \ { \ - /* Interpolate the color */ \ - for (int i = 0; i < 4; i++) { \ - float finalColor = (start->color[i] + t * dcol[i]) * w; \ - cptr[i] = (uint8_t)(sw_saturate(finalColor) * 255.0f); \ - } \ + float color[3]; \ + color[0] = sw_saturate((start->color[0] + t * dcol[0]) * w); \ + color[1] = sw_saturate((start->color[1] + t * dcol[1]) * w); \ + color[2] = sw_saturate((start->color[2] + t * dcol[2]) * w); \ + sw_write_color(cptr, color); \ } \ } \ \ /* Increment the interpolation parameter, UVs, and pointers */ \ discard: \ t += dt; \ - cptr += 4; \ - dptr++; \ + sw_inc_color_address(&cptr); \ + sw_inc_depth_address(&dptr); \ if (ENABLE_TEXTURE) { \ u += xDu; \ v += xDv; \ @@ -1747,38 +1976,39 @@ static inline void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1) \ int x = x1 + (j >> 16); \ int y = y1 + i; \ float z = z1 + t * zDiff; \ - int pixel_index = y * fbWidth + x; \ + int offset = y * fbWidth + x; \ \ - uint16_t* dptr = &depthBuffer[pixel_index]; \ + void* dptr = sw_get_depth_address(depthBuffer, offset); \ if (ENABLE_DEPTH_TEST) { \ - float depth = SW_UNORM16_TO_FLOAT(*dptr); \ + float depth = sw_read_depth(dptr); \ if (z > depth) continue; \ } \ \ - *dptr = SW_FLOAT_TO_UNORM16(z); \ + sw_write_depth(dptr, z); \ \ - int color_index = 4 * pixel_index; \ - uint8_t* cptr = &colorBuffer[color_index]; \ + void* cptr = sw_get_depth_address(colorBuffer, offset); \ \ if (ENABLE_COLOR_BLEND) \ { \ float dstColor[4]; \ + sw_read_color(dstColor, cptr); \ + \ float srcColor[4]; \ - for (int j = 0; j < 4; j++) { \ - dstColor[j] = (float)cptr[i] / 255; \ - srcColor[j] = sw_lerp(v0->color[j], v1->color[j], t); \ - } \ + srcColor[0] = sw_lerp(v0->color[0], v1->color[0], t); \ + srcColor[1] = sw_lerp(v0->color[1], v1->color[1], t); \ + srcColor[2] = sw_lerp(v0->color[2], v1->color[2], t); \ + srcColor[3] = sw_lerp(v0->color[3], v1->color[3], t); \ + \ sw_blend_colors(dstColor, srcColor); \ - for (int j = 0; j < 4; j++) { \ - cptr[j] = (uint8_t)(dstColor[j] * 255); \ - } \ + sw_write_color(cptr, dstColor); \ } \ else \ { \ - for (int j = 0; j < 4; j++) { \ - float finalColor = sw_lerp(v0->color[j], v1->color[j], t); \ - cptr[j] = (uint8_t)(finalColor * 255); \ - } \ + float color[3]; \ + color[0] = sw_lerp(v0->color[0], v1->color[0], t); \ + color[1] = sw_lerp(v0->color[1], v1->color[1], t); \ + color[2] = sw_lerp(v0->color[2], v1->color[2], t); \ + sw_write_color(cptr, color); \ } \ } \ } \ @@ -1789,38 +2019,39 @@ static inline void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1) \ int x = x1 + i; \ int y = y1 + (j >> 16); \ float z = z1 + t * zDiff; \ - int pixel_index = y * fbWidth + x; \ + int offset = y * fbWidth + x; \ \ - uint16_t* dptr = &depthBuffer[pixel_index]; \ + void* dptr = sw_get_depth_address(depthBuffer, offset); \ if (ENABLE_DEPTH_TEST) { \ - float depth = SW_UNORM16_TO_FLOAT(*dptr); \ + float depth = sw_read_depth(dptr); \ if (z > depth) continue; \ } \ \ - *dptr = SW_FLOAT_TO_UNORM16(z); \ + sw_write_depth(dptr, z); \ \ - int color_index = 4 * pixel_index; \ - uint8_t* cptr = &colorBuffer[color_index]; \ + void* cptr = sw_get_depth_address(colorBuffer, offset); \ \ if (ENABLE_COLOR_BLEND) \ { \ float dstColor[4]; \ + sw_read_color(dstColor, cptr); \ + \ float srcColor[4]; \ - for (int j = 0; j < 4; j++) { \ - dstColor[j] = (float)cptr[i] / 255; \ - srcColor[j] = sw_lerp(v0->color[j], v1->color[j], t); \ - } \ + srcColor[0] = sw_lerp(v0->color[0], v1->color[0], t); \ + srcColor[1] = sw_lerp(v0->color[1], v1->color[1], t); \ + srcColor[2] = sw_lerp(v0->color[2], v1->color[2], t); \ + srcColor[3] = sw_lerp(v0->color[3], v1->color[3], t); \ + \ sw_blend_colors(dstColor, srcColor); \ - for (int j = 0; j < 4; j++) { \ - cptr[j] = (uint8_t)(dstColor[j] * 255); \ - } \ + sw_write_color(cptr, dstColor); \ } \ else \ { \ - for (int j = 0; j < 4; j++) { \ - float finalColor = sw_lerp(v0->color[j], v1->color[j], t); \ - cptr[j] = (uint8_t)(finalColor * 255); \ - } \ + float color[3]; \ + color[0] = sw_lerp(v0->color[0], v1->color[0], t); \ + color[1] = sw_lerp(v0->color[1], v1->color[1], t); \ + color[2] = sw_lerp(v0->color[2], v1->color[2], t); \ + sw_write_color(cptr, color); \ } \ } \ } \ @@ -1934,8 +2165,11 @@ void swInit(int w, int h) { swViewport(0, 0, w, h); - RLSW.framebuffer.color = SW_MALLOC(4 * w * h); - RLSW.framebuffer.depth = SW_MALLOC(2 * w * h); + sw_load_framebuffer( + &RLSW.framebuffer.color, + &RLSW.framebuffer.depth, + w, h + ); RLSW.framebuffer.width = w; RLSW.framebuffer.height = h; @@ -1943,11 +2177,11 @@ void swInit(int w, int h) RLSW.loadedTextures = SW_MALLOC(SW_MAX_TEXTURES); RLSW.freeTextureIds = SW_MALLOC(SW_MAX_TEXTURES); - RLSW.clearColor[0] = 0; - RLSW.clearColor[1] = 0; - RLSW.clearColor[2] = 0; - RLSW.clearColor[3] = 255; - RLSW.clearDepth = UINT16_MAX; + RLSW.clearColor[0] = 0.0f; + RLSW.clearColor[1] = 0.0f; + RLSW.clearColor[2] = 0.0f; + RLSW.clearColor[3] = 1.0f; + RLSW.clearDepth = 1.0f; RLSW.currentMatrixMode = SW_MODELVIEW; RLSW.currentMatrix = &RLSW.matView; @@ -2348,37 +2582,27 @@ void swViewport(int x, int y, int width, int height) void swClearColor(float r, float g, float b, float a) { - RLSW.clearColor[0] = r * 255; - RLSW.clearColor[1] = g * 255; - RLSW.clearColor[2] = b * 255; - RLSW.clearColor[3] = a * 255; + RLSW.clearColor[0] = r; + RLSW.clearColor[1] = g; + RLSW.clearColor[2] = b; + RLSW.clearColor[3] = a; } void swClear(uint32_t bitmask) { int size = RLSW.framebuffer.width * RLSW.framebuffer.height; - uint32_t* cptr = (uint32_t*)RLSW.framebuffer.color; - uint16_t* dptr = RLSW.framebuffer.depth; - - uint32_t c = *((uint32_t*)RLSW.clearColor); - uint16_t d = RLSW.clearDepth; - if ((bitmask & (SW_COLOR_BUFFER_BIT | SW_DEPTH_BUFFER_BIT)) == (SW_COLOR_BUFFER_BIT | SW_DEPTH_BUFFER_BIT)) { - for (int i = 0; i < size; i++) { - cptr[i] = c; - dptr[i] = d; - } + sw_fill_color_and_depth( + RLSW.framebuffer.color, RLSW.framebuffer.depth, + size, RLSW.clearColor, RLSW.clearDepth + ); } else if (bitmask & (SW_COLOR_BUFFER_BIT)) { - for (int i = 0; i < size; i++) { - cptr[i] = c; - } + sw_fill_color(RLSW.framebuffer.color, size, RLSW.clearColor); } else if (bitmask & SW_DEPTH_BUFFER_BIT) { - for (int i = 0; i < size; i++) { - dptr[i] = d; - } + sw_fill_depth(RLSW.framebuffer.depth, size, RLSW.clearDepth); } } From 645b5e9c45091ee03a55040ab7aa896d2e866f0b Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sun, 16 Mar 2025 00:09:13 +0100 Subject: [PATCH 17/43] tweaks --- src/external/rlsw.h | 254 +++++++++++++++++++++++--------------------- 1 file changed, 132 insertions(+), 122 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 3b8d4b363..d30db2ac4 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -259,10 +259,18 @@ typedef enum { void swInit(int w, int h); void swClose(void); +void* swGetColorBuffer(int* w, int* h); + void swEnable(SWstate state); void swDisable(SWstate state); -void* swGetColorBuffer(int* w, int* h); +void swViewport(int x, int y, int width, int height); + +void swClearColor(float r, float g, float b, float a); +void swClear(uint32_t bitmask); + +void swBlendFunc(SWfactor sfactor, SWfactor dfactor); +void swCullFace(SWface face); void swMatrixMode(SWmatrix mode); void swPushMatrix(void); @@ -275,14 +283,6 @@ void swMultMatrixf(const float* mat); void swFrustum(double left, double right, double bottom, double top, double znear, double zfar); void swOrtho(double left, double right, double bottom, double top, double znear, double zfar); -void swViewport(int x, int y, int width, int height); - -void swClearColor(float r, float g, float b, float a); -void swClear(uint32_t bitmask); - -void swBlendFunc(SWfactor sfactor, SWfactor dfactor); -void swCullFace(SWface face); - void swBegin(SWdraw mode); void swEnd(void); @@ -575,7 +575,7 @@ static inline sw_vertex_t sw_lerp_vertex_PNTCH(const sw_vertex_t* a, const sw_ve /* === Framebuffer Part === */ -static inline void sw_load_framebuffer(void** color, void** depth, int w, int h) +static inline void sw_framebuffer_load(void** color, void** depth, int w, int h) { int size = w * h; @@ -585,27 +585,27 @@ static inline void sw_load_framebuffer(void** color, void** depth, int w, int h) // TODO: Handle memory allocation failure } -static inline void* sw_get_color_address(void* ptr, uint32_t offset) +static inline void* sw_framebuffer_get_color_addr(void* ptr, uint32_t offset) { return (uint8_t*)ptr + offset * SW_COLOR_PIXEL_SIZE; } -static inline void sw_inc_color_address(void** ptr) +static inline void sw_framebuffer_inc_color_addr(void** ptr) { *ptr = (void*)(((uint8_t*)*ptr) + SW_COLOR_PIXEL_SIZE); } -static inline void* sw_get_depth_address(void* ptr, uint32_t offset) +static inline void* sw_framebuffer_get_depth_addr(void* ptr, uint32_t offset) { return (uint8_t*)ptr + offset * SW_DEPTH_PIXEL_SIZE; } -static inline void sw_inc_depth_address(void** ptr) +static inline void sw_framebuffer_inc_depth_addr(void** ptr) { *ptr = (void*)(((uint8_t*)*ptr) + SW_DEPTH_PIXEL_SIZE); } -static inline void sw_read_color(float dst[4], const void* src) +static inline void sw_framebuffer_read_color(float dst[4], const void* src) { #if (SW_COLOR_BUFFER_BITS == 8) // RGB - 332 uint8_t pixel = ((uint8_t*)src)[0]; @@ -627,7 +627,7 @@ static inline void sw_read_color(float dst[4], const void* src) #endif } -static inline void sw_write_color(void* dst, float color[3]) +static inline void sw_framebuffer_write_color(void* dst, float color[3]) { #if (SW_COLOR_BUFFER_BITS == 8) // RGB - 332 uint8_t r = ((uint8_t)(color[0] * UINT8_MAX) >> 5) & 0x07; @@ -648,7 +648,7 @@ static inline void sw_write_color(void* dst, float color[3]) #endif } -static inline float sw_read_depth(const void* src) +static inline float sw_framebuffer_read_depth(const void* src) { #if (SW_DEPTH_BUFFER_BITS == 8) return (float)((uint8_t*)src)[0] * (1.0f / UINT8_MAX); @@ -662,7 +662,7 @@ static inline float sw_read_depth(const void* src) #endif } -static inline void sw_write_depth(void* dst, float depth) +static inline void sw_framebuffer_write_depth(void* dst, float depth) { #if (SW_DEPTH_BUFFER_BITS == 8) ((uint8_t*)dst)[0] = (uint8_t)(depth * UINT8_MAX); @@ -676,7 +676,7 @@ static inline void sw_write_depth(void* dst, float depth) #endif } -static inline void sw_fill_color(void* ptr, int size, float color[4]) +static inline void sw_framebuffer_fill_color(void* ptr, int size, float color[4]) { #if (SW_COLOR_BUFFER_BITS == 8) uint8_t r = ((uint8_t)(color[0] * UINT8_MAX) >> 5) & 0x07; @@ -707,7 +707,7 @@ static inline void sw_fill_color(void* ptr, int size, float color[4]) #endif } -static inline void sw_fill_depth(void* ptr, int size, float value) +static inline void sw_framebuffer_fill_depth(void* ptr, int size, float value) { #if (SW_DEPTH_BUFFER_BITS == 8) uint8_t v = value * UINT8_MAX; @@ -732,7 +732,7 @@ static inline void sw_fill_depth(void* ptr, int size, float value) #endif } -static inline void sw_fill_color_and_depth(void* color_ptr, void* depth_ptr, int size, float color[4], float depth_value) +static inline void sw_framebuffer_fill(void* color_ptr, void* depth_ptr, int size, float color[4], float depth_value) { #if (SW_COLOR_BUFFER_BITS == 8) uint8_t r = ((uint8_t)(color[0] * UINT8_MAX) >> 5) & 0x07; @@ -1499,12 +1499,12 @@ static inline void FUNC_NAME(const sw_texture_t* tex, const sw_vertex_t* start, } \ \ /* Pre-calculate the starting pointer for the color framebuffer row */ \ - void* cptr = sw_get_color_address( \ + void* cptr = sw_framebuffer_get_color_addr( \ RLSW.framebuffer.color, y * RLSW.framebuffer.width + xStart \ ); \ \ /* Pre-calculate the pointer for the depth buffer row */ \ - void* dptr = sw_get_depth_address( \ + void* dptr = sw_framebuffer_get_depth_addr( \ RLSW.framebuffer.depth, y * RLSW.framebuffer.width + xStart \ ); \ \ @@ -1518,17 +1518,17 @@ static inline void FUNC_NAME(const sw_texture_t* tex, const sw_vertex_t* start, { \ /* Depth testing with direct access to the depth buffer */ \ /* TODO: Implement different depth funcs? */ \ - float depth = sw_read_depth(dptr); \ + float depth = sw_framebuffer_read_depth(dptr); \ if (z > depth) goto discard; \ } \ \ /* Update the depth buffer */ \ - sw_write_depth(dptr, z); \ + sw_framebuffer_write_depth(dptr, z); \ \ if (ENABLE_COLOR_BLEND) \ { \ float dstColor[4]; \ - sw_read_color(dstColor, cptr); \ + sw_framebuffer_read_color(dstColor, cptr); \ \ float srcColor[4]; \ if (ENABLE_TEXTURE) \ @@ -1553,7 +1553,7 @@ static inline void FUNC_NAME(const sw_texture_t* tex, const sw_vertex_t* start, dstColor[1] = sw_saturate(dstColor[1]); \ dstColor[2] = sw_saturate(dstColor[2]); \ \ - sw_write_color(cptr, dstColor); \ + sw_framebuffer_write_color(cptr, dstColor); \ } \ else \ { \ @@ -1564,7 +1564,7 @@ static inline void FUNC_NAME(const sw_texture_t* tex, const sw_vertex_t* start, color[0] = sw_saturate(color[0] * (start->color[0] + t * dcol[0]) * w); \ color[1] = sw_saturate(color[1] * (start->color[1] + t * dcol[1]) * w); \ color[2] = sw_saturate(color[2] * (start->color[2] + t * dcol[2]) * w); \ - sw_write_color(cptr, color); \ + sw_framebuffer_write_color(cptr, color); \ } \ else \ { \ @@ -1572,15 +1572,15 @@ static inline void FUNC_NAME(const sw_texture_t* tex, const sw_vertex_t* start, color[0] = sw_saturate((start->color[0] + t * dcol[0]) * w); \ color[1] = sw_saturate((start->color[1] + t * dcol[1]) * w); \ color[2] = sw_saturate((start->color[2] + t * dcol[2]) * w); \ - sw_write_color(cptr, color); \ + sw_framebuffer_write_color(cptr, color); \ } \ } \ \ /* Increment the interpolation parameter, UVs, and pointers */ \ discard: \ t += dt; \ - sw_inc_color_address(&cptr); \ - sw_inc_depth_address(&dptr); \ + sw_framebuffer_inc_color_addr(&cptr); \ + sw_framebuffer_inc_depth_addr(&dptr); \ if (ENABLE_TEXTURE) { \ u += xDu; \ v += xDv; \ @@ -1978,20 +1978,25 @@ static inline void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1) \ float z = z1 + t * zDiff; \ int offset = y * fbWidth + x; \ \ - void* dptr = sw_get_depth_address(depthBuffer, offset); \ + void* dptr = sw_framebuffer_get_depth_addr( \ + depthBuffer, offset \ + ); \ + \ if (ENABLE_DEPTH_TEST) { \ - float depth = sw_read_depth(dptr); \ + float depth = sw_framebuffer_read_depth(dptr); \ if (z > depth) continue; \ } \ \ - sw_write_depth(dptr, z); \ + sw_framebuffer_write_depth(dptr, z); \ \ - void* cptr = sw_get_depth_address(colorBuffer, offset); \ + void* cptr = sw_framebuffer_get_depth_addr( \ + colorBuffer, offset \ + ); \ \ if (ENABLE_COLOR_BLEND) \ { \ float dstColor[4]; \ - sw_read_color(dstColor, cptr); \ + sw_framebuffer_read_color(dstColor, cptr); \ \ float srcColor[4]; \ srcColor[0] = sw_lerp(v0->color[0], v1->color[0], t); \ @@ -2000,7 +2005,7 @@ static inline void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1) \ srcColor[3] = sw_lerp(v0->color[3], v1->color[3], t); \ \ sw_blend_colors(dstColor, srcColor); \ - sw_write_color(cptr, dstColor); \ + sw_framebuffer_write_color(cptr, dstColor); \ } \ else \ { \ @@ -2008,7 +2013,7 @@ static inline void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1) \ color[0] = sw_lerp(v0->color[0], v1->color[0], t); \ color[1] = sw_lerp(v0->color[1], v1->color[1], t); \ color[2] = sw_lerp(v0->color[2], v1->color[2], t); \ - sw_write_color(cptr, color); \ + sw_framebuffer_write_color(cptr, color); \ } \ } \ } \ @@ -2021,20 +2026,25 @@ static inline void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1) \ float z = z1 + t * zDiff; \ int offset = y * fbWidth + x; \ \ - void* dptr = sw_get_depth_address(depthBuffer, offset); \ + void* dptr = sw_framebuffer_get_depth_addr( \ + depthBuffer, offset \ + ); \ + \ if (ENABLE_DEPTH_TEST) { \ - float depth = sw_read_depth(dptr); \ + float depth = sw_framebuffer_read_depth(dptr); \ if (z > depth) continue; \ } \ \ - sw_write_depth(dptr, z); \ + sw_framebuffer_write_depth(dptr, z); \ \ - void* cptr = sw_get_depth_address(colorBuffer, offset); \ + void* cptr = sw_framebuffer_get_depth_addr( \ + colorBuffer, offset \ + ); \ \ if (ENABLE_COLOR_BLEND) \ { \ float dstColor[4]; \ - sw_read_color(dstColor, cptr); \ + sw_framebuffer_read_color(dstColor, cptr); \ \ float srcColor[4]; \ srcColor[0] = sw_lerp(v0->color[0], v1->color[0], t); \ @@ -2043,7 +2053,7 @@ static inline void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1) \ srcColor[3] = sw_lerp(v0->color[3], v1->color[3], t); \ \ sw_blend_colors(dstColor, srcColor); \ - sw_write_color(cptr, dstColor); \ + sw_framebuffer_write_color(cptr, dstColor); \ } \ else \ { \ @@ -2051,7 +2061,7 @@ static inline void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1) \ color[0] = sw_lerp(v0->color[0], v1->color[0], t); \ color[1] = sw_lerp(v0->color[1], v1->color[1], t); \ color[2] = sw_lerp(v0->color[2], v1->color[2], t); \ - sw_write_color(cptr, color); \ + sw_framebuffer_write_color(cptr, color); \ } \ } \ } \ @@ -2165,7 +2175,7 @@ void swInit(int w, int h) { swViewport(0, 0, w, h); - sw_load_framebuffer( + sw_framebuffer_load( &RLSW.framebuffer.color, &RLSW.framebuffer.depth, w, h @@ -2240,6 +2250,14 @@ void swClose(void) SW_FREE(RLSW.freeTextureIds); } +void* swGetColorBuffer(int* w, int* h) +{ + if (w) *w = RLSW.framebuffer.width; + if (h) *h = RLSW.framebuffer.height; + + return RLSW.framebuffer.color; +} + void swEnable(SWstate state) { switch (state) { @@ -2282,12 +2300,76 @@ void swDisable(SWstate state) } } -void* swGetColorBuffer(int* w, int* h) +void swViewport(int x, int y, int width, int height) { - if (w) *w = RLSW.framebuffer.width; - if (h) *h = RLSW.framebuffer.height; + if (x <= -width || y <= -height) { + RLSW.errCode = SW_INVALID_OPERATION; + return; + } - return RLSW.framebuffer.color; + RLSW.vpPos[0] = x; + RLSW.vpPos[1] = y; + + RLSW.vpDim[0] = width - 1; + RLSW.vpDim[1] = height - 1; + + RLSW.vpMin[0] = (x < 0) ? 0 : x; + RLSW.vpMin[1] = (y < 0) ? 0 : y; + + int fbW = RLSW.framebuffer.width - 1; + int fbH = RLSW.framebuffer.height - 1; + + int vpMaxX = x + width; + int vpMaxY = y + height; + + RLSW.vpMax[0] = (vpMaxX < fbW) ? vpMaxX : fbW; + RLSW.vpMax[1] = (vpMaxY < fbH) ? vpMaxY : fbH; +} + +void swClearColor(float r, float g, float b, float a) +{ + RLSW.clearColor[0] = r; + RLSW.clearColor[1] = g; + RLSW.clearColor[2] = b; + RLSW.clearColor[3] = a; +} + +void swClear(uint32_t bitmask) +{ + int size = RLSW.framebuffer.width * RLSW.framebuffer.height; + + if ((bitmask & (SW_COLOR_BUFFER_BIT | SW_DEPTH_BUFFER_BIT)) == (SW_COLOR_BUFFER_BIT | SW_DEPTH_BUFFER_BIT)) { + sw_framebuffer_fill( + RLSW.framebuffer.color, RLSW.framebuffer.depth, + size, RLSW.clearColor, RLSW.clearDepth + ); + } + else if (bitmask & (SW_COLOR_BUFFER_BIT)) { + sw_framebuffer_fill_color(RLSW.framebuffer.color, size, RLSW.clearColor); + } + else if (bitmask & SW_DEPTH_BUFFER_BIT) { + sw_framebuffer_fill_depth(RLSW.framebuffer.depth, size, RLSW.clearDepth); + } +} + +void swBlendFunc(SWfactor sfactor, SWfactor dfactor) +{ + if (!sw_is_blend_src_factor_valid(sfactor) + || !sw_is_blend_dst_factor_valid(dfactor)) { + RLSW.errCode = SW_INVALID_ENUM; + return; + } + RLSW.srcFactor = sfactor; + RLSW.dstFactor = dfactor; +} + +void swCullFace(SWface face) +{ + if (!sw_is_face_valid(face)) { + RLSW.errCode = SW_INVALID_ENUM; + return; + } + RLSW.cullFace = face; } void swMatrixMode(SWmatrix mode) @@ -2554,78 +2636,6 @@ void swOrtho(double left, double right, double bottom, double top, double znear, RLSW.needToUpdateMVP = true; } -void swViewport(int x, int y, int width, int height) -{ - if (x <= -width || y <= -height) { - RLSW.errCode = SW_INVALID_OPERATION; - return; - } - - RLSW.vpPos[0] = x; - RLSW.vpPos[1] = y; - - RLSW.vpDim[0] = width - 1; - RLSW.vpDim[1] = height - 1; - - RLSW.vpMin[0] = (x < 0) ? 0 : x; - RLSW.vpMin[1] = (y < 0) ? 0 : y; - - int fbW = RLSW.framebuffer.width - 1; - int fbH = RLSW.framebuffer.height - 1; - - int vpMaxX = x + width; - int vpMaxY = y + height; - - RLSW.vpMax[0] = (vpMaxX < fbW) ? vpMaxX : fbW; - RLSW.vpMax[1] = (vpMaxY < fbH) ? vpMaxY : fbH; -} - -void swClearColor(float r, float g, float b, float a) -{ - RLSW.clearColor[0] = r; - RLSW.clearColor[1] = g; - RLSW.clearColor[2] = b; - RLSW.clearColor[3] = a; -} - -void swClear(uint32_t bitmask) -{ - int size = RLSW.framebuffer.width * RLSW.framebuffer.height; - - if ((bitmask & (SW_COLOR_BUFFER_BIT | SW_DEPTH_BUFFER_BIT)) == (SW_COLOR_BUFFER_BIT | SW_DEPTH_BUFFER_BIT)) { - sw_fill_color_and_depth( - RLSW.framebuffer.color, RLSW.framebuffer.depth, - size, RLSW.clearColor, RLSW.clearDepth - ); - } - else if (bitmask & (SW_COLOR_BUFFER_BIT)) { - sw_fill_color(RLSW.framebuffer.color, size, RLSW.clearColor); - } - else if (bitmask & SW_DEPTH_BUFFER_BIT) { - sw_fill_depth(RLSW.framebuffer.depth, size, RLSW.clearDepth); - } -} - -void swBlendFunc(SWfactor sfactor, SWfactor dfactor) -{ - if (!sw_is_blend_src_factor_valid(sfactor) - || !sw_is_blend_dst_factor_valid(dfactor)) { - RLSW.errCode = SW_INVALID_ENUM; - return; - } - RLSW.srcFactor = sfactor; - RLSW.dstFactor = dfactor; -} - -void swCullFace(SWface face) -{ - if (!sw_is_face_valid(face)) { - RLSW.errCode = SW_INVALID_ENUM; - return; - } - RLSW.cullFace = face; -} - void swBegin(SWdraw mode) { if (mode < SW_POINTS || mode > SW_QUADS) { From 3bc8f36f146f2dd901776a0c04df93239e75abfd Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sun, 16 Mar 2025 00:50:34 +0100 Subject: [PATCH 18/43] impl line width --- src/external/rlsw.h | 99 +++++++++++++++++++++++++++++++++++++++------ 1 file changed, 86 insertions(+), 13 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index d30db2ac4..4b18027b0 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -272,6 +272,8 @@ void swClear(uint32_t bitmask); void swBlendFunc(SWfactor sfactor, SWfactor dfactor); void swCullFace(SWface face); +void swLineWidth(float width); + void swMatrixMode(SWmatrix mode); void swPushMatrix(void); void swPopMatrix(void); @@ -1989,7 +1991,7 @@ static inline void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1) \ \ sw_framebuffer_write_depth(dptr, z); \ \ - void* cptr = sw_framebuffer_get_depth_addr( \ + void* cptr = sw_framebuffer_get_color_addr( \ colorBuffer, offset \ ); \ \ @@ -2037,7 +2039,7 @@ static inline void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1) \ \ sw_framebuffer_write_depth(dptr, z); \ \ - void* cptr = sw_framebuffer_get_depth_addr( \ + void* cptr = sw_framebuffer_get_color_addr( \ colorBuffer, offset \ ); \ \ @@ -2067,28 +2069,94 @@ static inline void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1) \ } \ } +#define DEFINE_LINE_THICK_RASTER(FUNC_NAME, RASTER_FUNC) \ +void FUNC_NAME(const sw_vertex_t* v1, const sw_vertex_t* v2) \ +{ \ + sw_vertex_t tv1, tv2; \ + \ + int x1 = (int)v1->screen[0]; \ + int y1 = (int)v1->screen[1]; \ + int x2 = (int)v2->screen[0]; \ + int y2 = (int)v2->screen[1]; \ + \ + int dx = x2 - x1; \ + int dy = y2 - y1; \ + \ + RASTER_FUNC(v1, v2); \ + \ + if (dx != 0 && abs(dy / dx) < 1) { \ + int wy = (int)((RLSW.lineWidth - 1.0f) * abs(dx) / sqrtf(dx * dx + dy * dy)); \ + wy >>= 1; /* Division by 2 via bit shift */ \ + for (int i = 1; i <= wy; i++) { \ + tv1 = *v1, tv2 = *v2; \ + tv1.screen[1] -= i; \ + tv2.screen[1] -= i; \ + RASTER_FUNC(&tv1, &tv2); \ + tv1 = *v1, tv2 = *v2; \ + tv1.screen[1] += i; \ + tv2.screen[1] += i; \ + RASTER_FUNC(&tv1, &tv2); \ + } \ + } \ + else if (dy != 0) { \ + int wx = (int)((RLSW.lineWidth - 1.0f) * abs(dy) / sqrtf(dx * dx + dy * dy)); \ + wx >>= 1; /* Division by 2 via bit shift */ \ + for (int i = 1; i <= wx; i++) { \ + tv1 = *v1, tv2 = *v2; \ + tv1.screen[0] -= i; \ + tv2.screen[0] -= i; \ + RASTER_FUNC(&tv1, &tv2); \ + tv1 = *v1, tv2 = *v2; \ + tv1.screen[0] += i; \ + tv2.screen[0] += i; \ + RASTER_FUNC(&tv1, &tv2); \ + } \ + } \ +} + DEFINE_LINE_RASTER(sw_line_raster, 0, 0) DEFINE_LINE_RASTER(sw_line_raster_DEPTH, 1, 0) DEFINE_LINE_RASTER(sw_line_raster_BLEND, 0, 1) DEFINE_LINE_RASTER(sw_line_raster_DEPTH_BLEND, 1, 1) +DEFINE_LINE_THICK_RASTER(sw_line_thick_raster, sw_line_raster) +DEFINE_LINE_THICK_RASTER(sw_line_thick_raster_DEPTH, sw_line_raster_DEPTH) +DEFINE_LINE_THICK_RASTER(sw_line_thick_raster_BLEND, sw_line_raster_BLEND) +DEFINE_LINE_THICK_RASTER(sw_line_thick_raster_DEPTH_BLEND, sw_line_raster_DEPTH_BLEND) + static inline void sw_line_render(sw_vertex_t* v0, sw_vertex_t* v1) { if (!sw_line_project_and_clip(v0, v1)) { return; } - if (SW_STATE_CHECK(SW_STATE_DEPTH_TEST | SW_STATE_BLEND)) { - sw_line_raster_DEPTH_BLEND(v0, v1); - } - else if (SW_STATE_CHECK(SW_STATE_BLEND)) { - sw_line_raster_BLEND(v0, v1); - } - else if (SW_STATE_CHECK(SW_STATE_DEPTH_TEST)) { - sw_line_raster_DEPTH(v0, v1); + if (RLSW.lineWidth >= 2.0f) { + if (SW_STATE_CHECK(SW_STATE_DEPTH_TEST | SW_STATE_BLEND)) { + sw_line_thick_raster_DEPTH_BLEND(v0, v1); + } + else if (SW_STATE_CHECK(SW_STATE_BLEND)) { + sw_line_thick_raster_BLEND(v0, v1); + } + else if (SW_STATE_CHECK(SW_STATE_DEPTH_TEST)) { + sw_line_thick_raster_DEPTH(v0, v1); + } + else { + sw_line_thick_raster(v0, v1); + } } else { - sw_line_raster(v0, v1); + if (SW_STATE_CHECK(SW_STATE_DEPTH_TEST | SW_STATE_BLEND)) { + sw_line_raster_DEPTH_BLEND(v0, v1); + } + else if (SW_STATE_CHECK(SW_STATE_BLEND)) { + sw_line_raster_BLEND(v0, v1); + } + else if (SW_STATE_CHECK(SW_STATE_DEPTH_TEST)) { + sw_line_raster_DEPTH(v0, v1); + } + else { + sw_line_raster(v0, v1); + } } } @@ -2173,8 +2241,6 @@ static inline bool sw_is_blend_dst_factor_valid(int blend) void swInit(int w, int h) { - swViewport(0, 0, w, h); - sw_framebuffer_load( &RLSW.framebuffer.color, &RLSW.framebuffer.depth, @@ -2184,6 +2250,8 @@ void swInit(int w, int h) RLSW.framebuffer.width = w; RLSW.framebuffer.height = h; + swViewport(0, 0, w, h); + RLSW.loadedTextures = SW_MALLOC(SW_MAX_TEXTURES); RLSW.freeTextureIds = SW_MALLOC(SW_MAX_TEXTURES); @@ -2372,6 +2440,11 @@ void swCullFace(SWface face) RLSW.cullFace = face; } +void swLineWidth(float width) +{ + RLSW.lineWidth = roundf(width); +} + void swMatrixMode(SWmatrix mode) { switch (mode) { From e15c0d963964b472ee5a34e4cca0e60ce55c7128 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sun, 16 Mar 2025 02:17:35 +0100 Subject: [PATCH 19/43] impl points + point size --- src/external/rlsw.h | 179 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 178 insertions(+), 1 deletion(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 4b18027b0..574989451 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -272,6 +272,7 @@ void swClear(uint32_t bitmask); void swBlendFunc(SWfactor sfactor, SWfactor dfactor); void swCullFace(SWface face); +void swPointSize(float size); void swLineWidth(float width); void swMatrixMode(SWmatrix mode); @@ -456,7 +457,7 @@ typedef struct { int vertexCounter; // Number of vertices in 'ctx.vertexBuffer' SWdraw drawMode; // Current polygon filling mode (e.g., lines, triangles) - float pointSize; // Rasterized point size + float pointRadius; // Rasterized point radius float lineWidth; // Rasterized line width sw_matrix_t matProjection; // Projection matrix, user adjustable @@ -2160,6 +2161,174 @@ static inline void sw_line_render(sw_vertex_t* v0, sw_vertex_t* v1) } } + +/* === Point Rendering Part === */ + +static inline bool sw_point_project_and_clip(sw_vertex_t* v) +{ + sw_vec4_transform(v->homogeneous, v->position, RLSW.matMVP); + + if (v->homogeneous[3] != 1.0f) { + for (int_fast8_t i = 0; i < 3; i++) { + if (v->homogeneous[i] < -v->homogeneous[3] || v->homogeneous[i] > v->homogeneous[3]) { + return false; + } + } + v->homogeneous[3] = 1.0f / v->homogeneous[3]; + v->homogeneous[0] *= v->homogeneous[3]; + v->homogeneous[1] *= v->homogeneous[3]; + v->homogeneous[2] *= v->homogeneous[3]; + } + + sw_project_ndc_to_screen(v->screen, v->homogeneous); + + return v->screen[0] - RLSW.pointRadius >= RLSW.vpMin[0] + && v->screen[1] - RLSW.pointRadius >= RLSW.vpMin[1] + && v->screen[0] + RLSW.pointRadius <= RLSW.vpMax[0] + && v->screen[1] + RLSW.pointRadius <= RLSW.vpMax[1]; +} + +#define DEFINE_POINT_RASTER(FUNC_NAME, ENABLE_DEPTH_TEST, ENABLE_COLOR_BLEND, CHECK_BOUNDS) \ +static inline void FUNC_NAME(int x, int y, float z, float color[4]) \ +{ \ + if (CHECK_BOUNDS) \ + { \ + if (x < 0 || x >= RLSW.framebuffer.width) { \ + return; \ + } \ + if (y < 0 || y >= RLSW.framebuffer.height) { \ + return; \ + } \ + } \ + \ + int offset = y * RLSW.framebuffer.width + x; \ + \ + void* dptr = sw_framebuffer_get_depth_addr( \ + RLSW.framebuffer.depth, offset \ + ); \ + \ + if (ENABLE_DEPTH_TEST) \ + { \ + float depth = sw_framebuffer_read_depth(dptr); \ + if (z > depth) return; \ + } \ + \ + sw_framebuffer_write_depth(dptr, z); \ + \ + void* cptr = sw_framebuffer_get_color_addr( \ + RLSW.framebuffer.color, offset \ + ); \ + \ + if (ENABLE_COLOR_BLEND) \ + { \ + float dstColor[4]; \ + sw_framebuffer_read_color(dstColor, cptr); \ + \ + sw_blend_colors(dstColor, color); \ + sw_framebuffer_write_color(cptr, dstColor); \ + } \ + else \ + { \ + sw_framebuffer_write_color(cptr, color); \ + } \ +} + +#define DEFINE_POINT_THICK_RASTER(FUNC_NAME, RASTER_FUNC) \ +static inline void FUNC_NAME(sw_vertex_t* v) \ +{ \ + int cx = v->screen[0]; \ + int cy = v->screen[1]; \ + float cz = v->homogeneous[2]; \ + int radius = RLSW.pointRadius; \ + float* color = v->color; \ + \ + int x = 0; \ + int y = radius; \ + int d = 3 - 2 * radius; \ + \ + while (x <= y) { \ + for (int i = -x; i <= x; i++) { \ + RASTER_FUNC(cx + i, cy + y, cz, color); \ + RASTER_FUNC(cx + i, cy - y, cz, color); \ + } \ + for (int i = -y; i <= y; i++) { \ + RASTER_FUNC(cx + i, cy + x, cz, color); \ + RASTER_FUNC(cx + i, cy - x, cz, color); \ + } \ + if (d > 0) { \ + y--; \ + d = d + 4 * (x - y) + 10; \ + } else { \ + d = d + 4 * x + 6; \ + } \ + x++; \ + } \ +} + +DEFINE_POINT_RASTER(sw_point_raster, 0, 0, 0) +DEFINE_POINT_RASTER(sw_point_raster_DEPTH, 1, 0, 0) +DEFINE_POINT_RASTER(sw_point_raster_BLEND, 0, 1, 0) +DEFINE_POINT_RASTER(sw_point_raster_DEPTH_BLEND, 1, 1, 0) + +DEFINE_POINT_RASTER(sw_point_raster_CHECK, 0, 0, 1) +DEFINE_POINT_RASTER(sw_point_raster_DEPTH_CHECK, 1, 0, 1) +DEFINE_POINT_RASTER(sw_point_raster_BLEND_CHECK, 0, 1, 1) +DEFINE_POINT_RASTER(sw_point_raster_DEPTH_BLEND_CHECK, 1, 1, 1) + +DEFINE_POINT_THICK_RASTER(sw_point_thick_raster, sw_point_raster_CHECK) +DEFINE_POINT_THICK_RASTER(sw_point_thick_raster_DEPTH, sw_point_raster_DEPTH_CHECK) +DEFINE_POINT_THICK_RASTER(sw_point_thick_raster_BLEND, sw_point_raster_BLEND_CHECK) +DEFINE_POINT_THICK_RASTER(sw_point_thick_raster_DEPTH_BLEND, sw_point_raster_DEPTH_BLEND_CHECK) + +static inline void sw_point_render(sw_vertex_t* v) +{ + if (!sw_point_project_and_clip(v)) { + return; + } + + if (RLSW.pointRadius >= 1.0f) { + if (SW_STATE_CHECK(SW_STATE_DEPTH_TEST | SW_STATE_BLEND)) { + sw_point_thick_raster_DEPTH_BLEND(v); + } + else if (SW_STATE_CHECK(SW_STATE_BLEND)) { + sw_point_thick_raster_BLEND(v); + } + else if (SW_STATE_CHECK(SW_STATE_DEPTH_TEST)) { + sw_point_thick_raster_DEPTH(v); + } + else { + sw_point_thick_raster(v); + } + } + else { + if (SW_STATE_CHECK(SW_STATE_DEPTH_TEST | SW_STATE_BLEND)) { + sw_point_raster_DEPTH_BLEND( + v->screen[0], v->screen[1], + v->homogeneous[2], v->color + ); + } + else if (SW_STATE_CHECK(SW_STATE_BLEND)) { + sw_point_raster_BLEND( + v->screen[0], v->screen[1], + v->homogeneous[2], v->color + ); + } + else if (SW_STATE_CHECK(SW_STATE_DEPTH_TEST)) { + sw_point_raster_DEPTH( + v->screen[0], v->screen[1], + v->homogeneous[2], v->color + ); + } + else { + sw_point_raster( + v->screen[0], v->screen[1], + v->homogeneous[2], v->color + ); + } + } +} + + /* === Some Validity Check Helper === */ static inline bool sw_is_texture_valid(uint32_t id) @@ -2440,6 +2609,11 @@ void swCullFace(SWface face) RLSW.cullFace = face; } +void swPointSize(float size) +{ + RLSW.pointRadius = floorf(size * 0.5f); +} + void swLineWidth(float width) { RLSW.lineWidth = roundf(width); @@ -2805,6 +2979,9 @@ void swVertex4fv(const float* v) switch (RLSW.drawMode) { case SW_POINTS: + sw_point_render( + &RLSW.vertexBuffer[0] + ); break; case SW_LINES: sw_line_render( From 793d56c70157fa74eb454035ed9fa507d7062ff7 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sun, 16 Mar 2025 03:42:33 +0100 Subject: [PATCH 20/43] fix and improve polygon clipping functions --- src/external/rlsw.h | 237 +++++++++++++++++++++++--------------------- 1 file changed, 122 insertions(+), 115 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 574989451..983ec91cf 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -1320,144 +1320,151 @@ static inline void sw_project_ndc_to_screen(float screen[2], const float ndc[4]) /* === Triangle Rendering Part === */ -static inline bool sw_triangle_clip_w(sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON_VERTICES], int* vertexCounter) -{ - sw_vertex_t input[SW_MAX_CLIPPED_POLYGON_VERTICES]; - for (int i = 0; i < SW_MAX_CLIPPED_POLYGON_VERTICES; i++) { - input[i] = polygon[i]; - } - - int inputCounter = *vertexCounter; - *vertexCounter = 0; - - const sw_vertex_t *prevVt = &input[inputCounter-1]; - char prevDot = (prevVt->homogeneous[3] < SW_CLIP_EPSILON) ? -1 : 1; - - for (int i = 0; i < inputCounter; i++) { - char currDot = (input[i].homogeneous[3] < SW_CLIP_EPSILON) ? -1 : 1; - if (prevDot*currDot < 0) { - polygon[(*vertexCounter)++] = sw_lerp_vertex_PNTCH(prevVt, &input[i], - (SW_CLIP_EPSILON - prevVt->homogeneous[3]) / (input[i].homogeneous[3] - prevVt->homogeneous[3])); - } - if (currDot > 0) { - polygon[(*vertexCounter)++] = input[i]; - } - prevDot = currDot; - prevVt = &input[i]; +#define DEFINE_CLIP_FUNC(name, FUNC_IS_INSIDE, FUNC_COMPUTE_T) \ +static inline int sw_clip_##name( \ + sw_vertex_t output[SW_MAX_CLIPPED_POLYGON_VERTICES], \ + const sw_vertex_t input[SW_MAX_CLIPPED_POLYGON_VERTICES], \ + int n) \ +{ \ + const sw_vertex_t *prev = &input[n - 1]; \ + int prevInside = FUNC_IS_INSIDE(prev->homogeneous); \ + int outputCount = 0; \ + \ + for (int i = 0; i < n; i++) { \ + const sw_vertex_t *curr = &input[i]; \ + int currInside = FUNC_IS_INSIDE(curr->homogeneous); \ + \ + /* If transition between interior/exterior, calculate intersection point */ \ + if (prevInside != currInside) { \ + float t = FUNC_COMPUTE_T(prev->homogeneous, curr->homogeneous); \ + output[outputCount++] = sw_lerp_vertex_PNTCH(prev, curr, t); \ + } \ + \ + /* If current vertex inside, add it */ \ + if (currInside) { \ + output[outputCount++] = *curr; \ + } \ + \ + prev = curr; \ + prevInside = currInside; \ + } \ + \ + return outputCount; \ +} + +#define IS_INSIDE_PLANE_W(h) ((h)[3] >= SW_CLIP_EPSILON) +#define IS_INSIDE_PLANE_X_POS(h) ((h)[0] <= (h)[3]) +#define IS_INSIDE_PLANE_X_NEG(h) (-(h)[0] <= (h)[3]) +#define IS_INSIDE_PLANE_Y_POS(h) ((h)[1] <= (h)[3]) +#define IS_INSIDE_PLANE_Y_NEG(h) (-(h)[1] <= (h)[3]) +#define IS_INSIDE_PLANE_Z_POS(h) ((h)[2] <= (h)[3]) +#define IS_INSIDE_PLANE_Z_NEG(h) (-(h)[2] <= (h)[3]) + +#define COMPUTE_T_PLANE_W(hPrev, hCurr) ((SW_CLIP_EPSILON - (hPrev)[3]) / ((hCurr)[3] - (hPrev)[3])) +#define COMPUTE_T_PLANE_X_POS(hPrev, hCurr) (((hPrev)[3] - (hPrev)[0]) / (((hPrev)[3] - (hPrev)[0]) - ((hCurr)[3] - (hCurr)[0]))) +#define COMPUTE_T_PLANE_X_NEG(hPrev, hCurr) (((hPrev)[3] + (hPrev)[0]) / (((hPrev)[3] + (hPrev)[0]) - ((hCurr)[3] + (hCurr)[0]))) +#define COMPUTE_T_PLANE_Y_POS(hPrev, hCurr) (((hPrev)[3] - (hPrev)[1]) / (((hPrev)[3] - (hPrev)[1]) - ((hCurr)[3] - (hCurr)[1]))) +#define COMPUTE_T_PLANE_Y_NEG(hPrev, hCurr) (((hPrev)[3] + (hPrev)[1]) / (((hPrev)[3] + (hPrev)[1]) - ((hCurr)[3] + (hCurr)[1]))) +#define COMPUTE_T_PLANE_Z_POS(hPrev, hCurr) (((hPrev)[3] - (hPrev)[2]) / (((hPrev)[3] - (hPrev)[2]) - ((hCurr)[3] - (hCurr)[2]))) +#define COMPUTE_T_PLANE_Z_NEG(hPrev, hCurr) (((hPrev)[3] + (hPrev)[2]) / (((hPrev)[3] + (hPrev)[2]) - ((hCurr)[3] + (hCurr)[2]))) + +DEFINE_CLIP_FUNC(w, IS_INSIDE_PLANE_W, COMPUTE_T_PLANE_W) +DEFINE_CLIP_FUNC(x_pos, IS_INSIDE_PLANE_X_POS, COMPUTE_T_PLANE_X_POS) +DEFINE_CLIP_FUNC(x_neg, IS_INSIDE_PLANE_X_NEG, COMPUTE_T_PLANE_X_NEG) +DEFINE_CLIP_FUNC(y_pos, IS_INSIDE_PLANE_Y_POS, COMPUTE_T_PLANE_Y_POS) +DEFINE_CLIP_FUNC(y_neg, IS_INSIDE_PLANE_Y_NEG, COMPUTE_T_PLANE_Y_NEG) +DEFINE_CLIP_FUNC(z_pos, IS_INSIDE_PLANE_Z_POS, COMPUTE_T_PLANE_Z_POS) +DEFINE_CLIP_FUNC(z_neg, IS_INSIDE_PLANE_Z_NEG, COMPUTE_T_PLANE_Z_NEG) + +static inline bool sw_triangle_clip(sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON_VERTICES], int* vertexCounter) +{ + sw_vertex_t tmp[SW_MAX_CLIPPED_POLYGON_VERTICES]; + int n = *vertexCounter; + + #define CLIP_AGAINST_PLANE(FUNC_CLIP) \ + { \ + n = FUNC_CLIP(tmp, polygon, n); \ + if (n == 0) return false; \ + for (int i = 0; i < n; i++) { \ + polygon[i] = tmp[i]; \ + } \ } - return *vertexCounter > 0; -} - -static inline bool sw_triangle_clip_xyz(sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON_VERTICES], int* vertexCounter) -{ - for (int iAxis = 0; iAxis < 3; iAxis++) - { - if (*vertexCounter == 0) return false; - - sw_vertex_t input[SW_MAX_CLIPPED_POLYGON_VERTICES]; - int inputCounter; - - const sw_vertex_t *prevVt; - char prevDot; - - // Clip against first plane - - for (int i = 0; i < SW_MAX_CLIPPED_POLYGON_VERTICES; i++) { - input[i] = polygon[i]; - } - inputCounter = *vertexCounter; - *vertexCounter = 0; - - prevVt = &input[inputCounter-1]; - prevDot = (prevVt->homogeneous[iAxis] <= prevVt->homogeneous[3]) ? 1 : -1; - - for (int i = 0; i < inputCounter; i++) { - char currDot = (input[i].homogeneous[iAxis] <= input[i].homogeneous[3]) ? 1 : -1; - if (prevDot * currDot <= 0) { - polygon[(*vertexCounter)++] = sw_lerp_vertex_PNTCH(prevVt, &input[i], (prevVt->homogeneous[3] - prevVt->homogeneous[iAxis]) / - ((prevVt->homogeneous[3] - prevVt->homogeneous[iAxis]) - (input[i].homogeneous[3] - input[i].homogeneous[iAxis]))); - } - if (currDot > 0) { - polygon[(*vertexCounter)++] = input[i]; - } - prevDot = currDot; - prevVt = &input[i]; - } - - if (*vertexCounter == 0) return false; - - // Clip against opposite plane - - for (int i = 0; i < SW_MAX_CLIPPED_POLYGON_VERTICES; i++) { - input[i] = polygon[i]; - } - inputCounter = *vertexCounter; - *vertexCounter = 0; - - prevVt = &input[inputCounter-1]; - prevDot = (-prevVt->homogeneous[iAxis] <= prevVt->homogeneous[3]) ? 1 : -1; + CLIP_AGAINST_PLANE(sw_clip_w); + CLIP_AGAINST_PLANE(sw_clip_x_pos); + CLIP_AGAINST_PLANE(sw_clip_x_neg); + CLIP_AGAINST_PLANE(sw_clip_y_pos); + CLIP_AGAINST_PLANE(sw_clip_y_neg); + CLIP_AGAINST_PLANE(sw_clip_z_pos); + CLIP_AGAINST_PLANE(sw_clip_z_neg); - for (int i = 0; i < inputCounter; i++) { - char currDot = (-input[i].homogeneous[iAxis] <= input[i].homogeneous[3]) ? 1 : -1; - if (prevDot*currDot <= 0) { - polygon[(*vertexCounter)++] = sw_lerp_vertex_PNTCH(prevVt, &input[i], (prevVt->homogeneous[3] + prevVt->homogeneous[iAxis]) / - ((prevVt->homogeneous[3] + prevVt->homogeneous[iAxis]) - (input[i].homogeneous[3] + input[i].homogeneous[iAxis]))); - } - if (currDot > 0) { - polygon[(*vertexCounter)++] = input[i]; - } - prevDot = currDot; - prevVt = &input[i]; - } - } + *vertexCounter = n; - return *vertexCounter > 0; + return n > 0; } static inline void sw_triangle_project_and_clip(sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON_VERTICES], int* vertexCounter) { + // Step 1: MVP projection for all vertices for (int i = 0; i < *vertexCounter; i++) { - sw_vertex_t *v = polygon + i; - sw_vec4_transform(v->homogeneous, v->position, RLSW.matMVP); + sw_vec4_transform(polygon[i].homogeneous, polygon[i].position, RLSW.matMVP); } + // Step 2: Face culling - discard triangles facing away if (RLSW.stateFlags & SW_STATE_CULL_FACE) { - float x0 = polygon[0].homogeneous[0], y0 = polygon[0].homogeneous[1]; - float x1 = polygon[1].homogeneous[0], y1 = polygon[1].homogeneous[1]; - float x2 = polygon[2].homogeneous[0], y2 = polygon[2].homogeneous[1]; - float sgnArea = (x1 - x0) * (y2 - y0) - (x2 - x0) * (y1 - y0); - if ((RLSW.cullFace == SW_FRONT && sgnArea >= 0) || (RLSW.cullFace == SW_BACK && sgnArea <= 0)) { + // NOTE: Face culling is done before clipping to avoid unnecessary computations. + // However, culling requires NDC coordinates, while clipping must be done + // in homogeneous space to correctly interpolate newly generated vertices. + // This means we need to compute 1/W twice: + // - Once before clipping for face culling. + // - Again after clipping for the new vertices. + + const float invW0 = 1.0f / polygon[0].homogeneous[3]; + const float invW1 = 1.0f / polygon[1].homogeneous[3]; + const float invW2 = 1.0f / polygon[2].homogeneous[3]; + + // Compute the signed 2D area (cross product in Z) + const float x0 = polygon[0].homogeneous[0] * invW0, y0 = polygon[0].homogeneous[1] * invW0; + const float x1 = polygon[1].homogeneous[0] * invW1, y1 = polygon[1].homogeneous[1] * invW1; + const float x2 = polygon[2].homogeneous[0] * invW2, y2 = polygon[2].homogeneous[1] * invW2; + const float sgnArea = (x1 - x0) * (y2 - y0) - (x2 - x0) * (y1 - y0); + + // Discard the triangle if it faces the culled direction + if ((RLSW.cullFace == SW_FRONT) ? (sgnArea >= 0) : (sgnArea <= 0)) { *vertexCounter = 0; return; } } + + // Step 3: Clipping and perspective projection + if (sw_triangle_clip(polygon, vertexCounter) && *vertexCounter >= 3) { - if (sw_triangle_clip_w(polygon, vertexCounter) && sw_triangle_clip_xyz(polygon, vertexCounter)) { + // Transformation to screen space and normalization for (int i = 0; i < *vertexCounter; i++) { - sw_vertex_t *v = polygon + i; + sw_vertex_t *v = &polygon[i]; // Use &polygon[i] instead of polygon + i // Calculation of the reciprocal of W for normalization - // as well as perspective correct attributes - v->homogeneous[3] = 1.0f / v->homogeneous[3]; + // as well as perspective-correct attributes + const float invW = 1.0f / v->homogeneous[3]; + v->homogeneous[3] = invW; // Division of XYZ coordinates by weight - v->homogeneous[0] *= v->homogeneous[3]; - v->homogeneous[1] *= v->homogeneous[3]; - v->homogeneous[2] *= v->homogeneous[3]; - - // Division of texture coordinates (perspective correct) - v->texcoord[0] *= v->homogeneous[3]; - v->texcoord[1] *= v->homogeneous[3]; - - // Division of colors (perspective correct) - v->color[0] *= v->homogeneous[3]; - v->color[1] *= v->homogeneous[3]; - v->color[2] *= v->homogeneous[3]; - v->color[3] *= v->homogeneous[3]; - - // Transform to screen space + v->homogeneous[0] *= invW; + v->homogeneous[1] *= invW; + v->homogeneous[2] *= invW; + + // Division of texture coordinates (perspective-correct) + v->texcoord[0] *= invW; + v->texcoord[1] *= invW; + + // Division of colors (perspective-correct) + v->color[0] *= invW; + v->color[1] *= invW; + v->color[2] *= invW; + v->color[3] *= invW; + + // Transformation to screen space sw_project_ndc_to_screen(v->screen, v->homogeneous); } } From fc52b7710ca68a27bcb6c31d8f3a6cad5f1932fc Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Wed, 19 Mar 2025 01:28:41 +0100 Subject: [PATCH 21/43] impl polygone modes --- src/external/rlsw.h | 174 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 135 insertions(+), 39 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 983ec91cf..5fd7a5327 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -103,6 +103,10 @@ //#define GL_QUAD_STRIP 0x0008 //#define GL_POLYGON 0x0009 +#define GL_POINT 0x1B00 +#define GL_LINE 0x1B01 +#define GL_FILL 0x1B02 + //#define GL_CW 0x0900 //#define GL_CCW 0x0901 @@ -187,9 +191,15 @@ typedef enum { SW_POINTS = GL_POINTS, SW_LINES = GL_LINES, SW_TRIANGLES = GL_TRIANGLES, - SW_QUADS = GL_QUADS, + SW_QUADS = GL_QUADS } SWdraw; +typedef enum { + SW_POINT = GL_POINT, + SW_LINE = GL_LINE, + SW_FILL = GL_FILL +} SWpoly; + typedef enum { SW_FRONT = GL_FRONT, SW_BACK = GL_BACK, @@ -270,6 +280,7 @@ void swClearColor(float r, float g, float b, float a); void swClear(uint32_t bitmask); void swBlendFunc(SWfactor sfactor, SWfactor dfactor); +void swPolygonMode(SWpoly mode); void swCullFace(SWface face); void swPointSize(float size); @@ -456,7 +467,8 @@ typedef struct { sw_vertex_t vertexBuffer[4]; // Buffer used for storing primitive vertices, used for processing and rendering int vertexCounter; // Number of vertices in 'ctx.vertexBuffer' - SWdraw drawMode; // Current polygon filling mode (e.g., lines, triangles) + SWdraw drawMode; // Current primitive mode (e.g., lines, triangles) + SWpoly polyMode; // Current polygon filling mode (e.g., lines, triangles) float pointRadius; // Rasterized point radius float lineWidth; // Rasterized line width @@ -2336,6 +2348,66 @@ static inline void sw_point_render(sw_vertex_t* v) } +/* === Polygon Modes Rendering Part === */ + +static inline void sw_poly_point_render(void) +{ + for (int i = 0; i < RLSW.vertexCounter; i++) { + sw_point_render(RLSW.vertexBuffer); + } +} + +static inline void sw_poly_line_render(void) +{ + const sw_vertex_t* vertices = RLSW.vertexBuffer; + int cm1 = RLSW.vertexCounter - 1; + sw_vertex_t v0, v1; + + for (int i = 0; i < cm1; i++) { + v0 = vertices[i], v1 = vertices[i + 1]; + sw_line_render(&v0, &v1); + } + + v0 = vertices[cm1], v1 = vertices[0]; + sw_line_render(&v0, &v1); +} + +static inline void sw_poly_fill_render(void) +{ + switch (RLSW.drawMode) { + case SW_POINTS: + sw_point_render( + &RLSW.vertexBuffer[0] + ); + break; + case SW_LINES: + sw_line_render( + &RLSW.vertexBuffer[0], + &RLSW.vertexBuffer[1] + ); + break; + case SW_TRIANGLES: + sw_triangle_render( + &RLSW.vertexBuffer[0], + &RLSW.vertexBuffer[1], + &RLSW.vertexBuffer[2] + ); + break; + case SW_QUADS: + sw_triangle_render( + &RLSW.vertexBuffer[0], + &RLSW.vertexBuffer[1], + &RLSW.vertexBuffer[2] + ); + sw_triangle_render( + &RLSW.vertexBuffer[2], + &RLSW.vertexBuffer[3], + &RLSW.vertexBuffer[0] + ); + break; + } +} + /* === Some Validity Check Helper === */ static inline bool sw_is_texture_valid(uint32_t id) @@ -2359,6 +2431,41 @@ static inline bool sw_is_texture_wrap_valid(int wrap) return (wrap == SW_REPEAT || wrap == SW_CLAMP_TO_EDGE || SW_MIRRORED_REPEAT); } +static inline bool sw_is_draw_mode_valid(int mode) +{ + bool result = false; + + switch (mode) { + case SW_POINTS: + case SW_LINES: + case SW_TRIANGLES: + case SW_QUADS: + result = true; + break; + default: + break; + } + + return result; +} + +static inline bool sw_is_poly_mode_valid(int mode) +{ + bool result = false; + + switch (mode) { + case SW_POINT: + case SW_LINE: + case SW_FILL: + result = true; + break; + default: + break; + } + + return result; +} + static inline bool sw_is_face_valid(int face) { return (face == SW_FRONT || face == SW_BACK); @@ -2461,6 +2568,7 @@ void swInit(int w, int h) RLSW.srcFactor = SW_SRC_ALPHA; RLSW.dstFactor = SW_ONE_MINUS_SRC_ALPHA; + RLSW.polyMode = SW_FILL; RLSW.cullFace = SW_BACK; static const float defTex[3*2*2] = @@ -2607,6 +2715,15 @@ void swBlendFunc(SWfactor sfactor, SWfactor dfactor) RLSW.dstFactor = dfactor; } +void swPolygonMode(SWpoly mode) +{ + if (!sw_is_poly_mode_valid(mode)) { + RLSW.errCode = SW_INVALID_ENUM; + return; + } + RLSW.polyMode = mode; +} + void swCullFace(SWface face) { if (!sw_is_face_valid(face)) { @@ -2892,7 +3009,7 @@ void swOrtho(double left, double right, double bottom, double top, double znear, void swBegin(SWdraw mode) { - if (mode < SW_POINTS || mode > SW_QUADS) { + if (!sw_is_draw_mode_valid(mode)) { RLSW.errCode = SW_INVALID_ENUM; return; } @@ -2907,49 +3024,49 @@ void swEnd(void) void swVertex2i(int x, int y) { - float v[4] = { (float)x, (float)y, 0.0f, 1.0f }; + const float v[4] = { (float)x, (float)y, 0.0f, 1.0f }; swVertex4fv(v); } void swVertex2f(float x, float y) { - float v[4] = { x, y, 0.0f, 1.0f }; + const float v[4] = { x, y, 0.0f, 1.0f }; swVertex4fv(v); } void swVertex2fv(const float* v) { - float v4[4] = { v[0], v[1], 0.0f, 1.0f }; + const float v4[4] = { v[0], v[1], 0.0f, 1.0f }; swVertex4fv(v4); } void swVertex3i(int x, int y, int z) { - float v[4] = { (float)x, (float)y, (float)z, 1.0f }; + const float v[4] = { (float)x, (float)y, (float)z, 1.0f }; swVertex4fv(v); } void swVertex3f(float x, float y, float z) { - float v[4] = { x, y, z, 1.0f }; + const float v[4] = { x, y, z, 1.0f }; swVertex4fv(v); } void swVertex3fv(const float* v) { - float v4[4] = { v[0], v[1], v[2], 1.0f }; + const float v4[4] = { v[0], v[1], v[2], 1.0f }; swVertex4fv(v4); } void swVertex4i(int x, int y, int z, int w) { - float v[4] = { (float)x, (float)y, (float)z, (float)w }; + const float v[4] = { (float)x, (float)y, (float)z, (float)w }; swVertex4fv(v); } void swVertex4f(float x, float y, float z, float w) { - float v[4] = { x, y, z, w }; + const float v[4] = { x, y, z, w }; swVertex4fv(v); } @@ -2984,36 +3101,15 @@ void swVertex4fv(const float* v) sw_matrix_mul(RLSW.matMVP, RLSW.matMVP, RLSW.matProjection); } - switch (RLSW.drawMode) { - case SW_POINTS: - sw_point_render( - &RLSW.vertexBuffer[0] - ); + switch (RLSW.polyMode) { + case SW_FILL: + sw_poly_fill_render(); break; - case SW_LINES: - sw_line_render( - &RLSW.vertexBuffer[0], - &RLSW.vertexBuffer[1] - ); + case SW_LINE: + sw_poly_line_render(); break; - case SW_TRIANGLES: - sw_triangle_render( - &RLSW.vertexBuffer[0], - &RLSW.vertexBuffer[1], - &RLSW.vertexBuffer[2] - ); - break; - case SW_QUADS: - sw_triangle_render( - &RLSW.vertexBuffer[0], - &RLSW.vertexBuffer[1], - &RLSW.vertexBuffer[2] - ); - sw_triangle_render( - &RLSW.vertexBuffer[2], - &RLSW.vertexBuffer[3], - &RLSW.vertexBuffer[0] - ); + case SW_POINT: + sw_poly_point_render(); break; } From 06e49a048a2901253180ad275926f55eebe06c24 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Wed, 19 Mar 2025 01:41:24 +0100 Subject: [PATCH 22/43] add some not planned functions - `glDepthMask` - `glColorMask` --- src/external/rlsw.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 5fd7a5327..5103ee55d 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -160,6 +160,13 @@ #define GL_UNSIGNED_INT 0x1405 #define GL_FLOAT 0x1406 + +/* === Not Implemented === */ + +#define glDepthMask(x) ((void)(x)) +#define glColorMask(x) ((void)(x)) + + /* === RLSW Enums === */ typedef enum { From fe600a5ea1bcf580cd42fd26949c57d0afaa323c Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Wed, 19 Mar 2025 02:04:59 +0100 Subject: [PATCH 23/43] framebuffer resizing + handle init failure --- src/external/rlsw.h | 99 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 78 insertions(+), 21 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 5103ee55d..dd79de64d 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -35,6 +35,10 @@ # define SW_MALLOC(sz) malloc(sz) #endif +#ifndef SW_REALLOC +# define SW_REALLOC(ptr, newSz) realloc(ptr, newSz) +#endif + #ifndef SW_FREE # define SW_FREE(ptr) free(ptr) #endif @@ -273,10 +277,11 @@ typedef enum { /* === Public API === */ -void swInit(int w, int h); +bool swInit(int w, int h); void swClose(void); void* swGetColorBuffer(int* w, int* h); +bool swResizeFramebuffer(int w, int h); void swEnable(SWstate state); void swDisable(SWstate state); @@ -447,7 +452,9 @@ typedef struct { typedef struct { void *color; void *depth; - int width, height; + int width; + int height; + int allocSz; } sw_framebuffer_t; typedef struct { @@ -510,12 +517,12 @@ typedef struct { uint32_t stateFlags; -} sw_data_t; +} sw_context_t; /* === Global Data === */ -static sw_data_t RLSW = { 0 }; +static sw_context_t RLSW = { 0 }; /* === Helper Functions === */ @@ -597,14 +604,47 @@ static inline sw_vertex_t sw_lerp_vertex_PNTCH(const sw_vertex_t* a, const sw_ve /* === Framebuffer Part === */ -static inline void sw_framebuffer_load(void** color, void** depth, int w, int h) +static inline bool sw_framebuffer_load(int w, int h) { int size = w * h; - *color = SW_MALLOC(SW_COLOR_PIXEL_SIZE * size); - *depth = SW_MALLOC(SW_DEPTH_PIXEL_SIZE * size); + RLSW.framebuffer.color = SW_MALLOC(SW_COLOR_PIXEL_SIZE * size); + if (RLSW.framebuffer.color == NULL) return false; + + RLSW.framebuffer.depth = SW_MALLOC(SW_DEPTH_PIXEL_SIZE * size); + if (RLSW.framebuffer.depth == NULL) return false; - // TODO: Handle memory allocation failure + RLSW.framebuffer.width = w; + RLSW.framebuffer.height = h; + RLSW.framebuffer.allocSz = w * h; + + return true; +} + +static inline bool sw_framebuffer_resize(int w, int h) +{ + int newSize = w * h; + + if (newSize <= RLSW.framebuffer.allocSz) { + RLSW.framebuffer.width = w; + RLSW.framebuffer.height = h; + return true; + } + + void* newColor = SW_REALLOC(RLSW.framebuffer.color, newSize); + if (newColor == NULL) return false; + + void* newDepth = SW_REALLOC(RLSW.framebuffer.depth, newSize); + if (newDepth == NULL) return false; + + RLSW.framebuffer.color = newColor; + RLSW.framebuffer.depth = newDepth; + + RLSW.framebuffer.width = w; + RLSW.framebuffer.height = h; + RLSW.framebuffer.allocSz = newSize; + + return true; } static inline void* sw_framebuffer_get_color_addr(void* ptr, uint32_t offset) @@ -2529,21 +2569,19 @@ static inline bool sw_is_blend_dst_factor_valid(int blend) /* === Public Implementation === */ -void swInit(int w, int h) +bool swInit(int w, int h) { - sw_framebuffer_load( - &RLSW.framebuffer.color, - &RLSW.framebuffer.depth, - w, h - ); - - RLSW.framebuffer.width = w; - RLSW.framebuffer.height = h; + if (!sw_framebuffer_load(w, h)) { + swClose(); return false; + } swViewport(0, 0, w, h); RLSW.loadedTextures = SW_MALLOC(SW_MAX_TEXTURES); + if (RLSW.loadedTextures == NULL) { swClose(); return false; } + RLSW.freeTextureIds = SW_MALLOC(SW_MAX_TEXTURES); + if (RLSW.loadedTextures == NULL) { swClose(); return false; } RLSW.clearColor[0] = 0.0f; RLSW.clearColor[1] = 0.0f; @@ -2598,15 +2636,29 @@ void swInit(int w, int h) RLSW.loadedTextures[0].ty = 0.5f; RLSW.loadedTextureCount = 1; + + return true; } void swClose(void) { - SW_FREE(RLSW.framebuffer.color); - SW_FREE(RLSW.framebuffer.depth); + if (RLSW.framebuffer.color != NULL) { + SW_FREE(RLSW.framebuffer.color); + } - SW_FREE(RLSW.loadedTextures); - SW_FREE(RLSW.freeTextureIds); + if (RLSW.framebuffer.depth != NULL) { + SW_FREE(RLSW.framebuffer.depth); + } + + if (RLSW.loadedTextures != NULL) { + SW_FREE(RLSW.loadedTextures); + } + + if (RLSW.freeTextureIds != NULL) { + SW_FREE(RLSW.freeTextureIds); + } + + RLSW = (sw_context_t) { 0 }; } void* swGetColorBuffer(int* w, int* h) @@ -2617,6 +2669,11 @@ void* swGetColorBuffer(int* w, int* h) return RLSW.framebuffer.color; } +bool swResizeFramebuffer(int w, int h) +{ + return sw_framebuffer_resize(w, h); +} + void swEnable(SWstate state) { switch (state) { From 268a410a130ac6003c52605c79e5af244d4178b2 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Wed, 19 Mar 2025 02:19:28 +0100 Subject: [PATCH 24/43] add quick notes about line clipping algorithms used --- src/external/rlsw.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index dd79de64d..7dc6173f7 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -1856,6 +1856,7 @@ static inline uint8_t sw_line_clip_encode_2d(const float screen[2], int xMin, in return code; } +// Cohen-Sutherland algorithm, faster but for 2D only static inline bool sw_line_clip_2d(sw_vertex_t* v1, sw_vertex_t* v2) { int xMin = RLSW.vpMin[0]; @@ -1931,6 +1932,7 @@ static inline bool sw_line_clip_coord_3d(float q, float p, float* t1, float* t2) return 1; } +// Liang-Barsky algorithm variant, more robust but slightly slower static inline bool sw_line_clip_3d(sw_vertex_t* v1, sw_vertex_t* v2) { // TODO: Lerp all vertices here, not just homogeneous coordinates From d3b6fa4785b10b6b8fd6a458b5a61e1efcff9af3 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Wed, 19 Mar 2025 04:21:27 +0100 Subject: [PATCH 25/43] start to impl scissor test + review line clipping The support for the scissor test has been implemented for clearing as well as for triangle clipping. The implementation for lines and points is still missing. I also removed the 2D clipping of lines that used the Cohen-Sutherland algorithm, opting instead to always use the Liang-Barsky algorithm in all cases. This simplifies the implementation, and the 2D version would have caused issues when interpolating vertices in the future if we want to implement additional features. --- src/external/rlsw.h | 437 ++++++++++++++++++++++++++++---------------- 1 file changed, 278 insertions(+), 159 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 7dc6173f7..5f683a3b9 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -28,6 +28,9 @@ #include #include +// TODO: Review the use of viewport dimensions stored with -1 +// It seems there are issues with the NDC -> screen projection +// Also, consider testing and reviewing, if necessary, scissor clipping as well as line clipping /* === RLSW Definition And Macros === */ @@ -78,6 +81,7 @@ /* === OpenGL Definitions === */ +#define GL_SCISSOR_TEST 0x0C11 #define GL_TEXTURE_2D 0x0DE1 #define GL_DEPTH_TEST 0x0B71 #define GL_CULL_FACE 0x0B44 @@ -174,6 +178,7 @@ /* === RLSW Enums === */ typedef enum { + SW_SCISSOR_TEST = GL_SCISSOR_TEST, SW_TEXTURE_2D = GL_TEXTURE_2D, SW_DEPTH_TEST = GL_DEPTH_TEST, SW_CULL_FACE = GL_CULL_FACE, @@ -287,6 +292,7 @@ void swEnable(SWstate state); void swDisable(SWstate state); void swViewport(int x, int y, int width, int height); +void swScissor(int x, int y, int width, int height); void swClearColor(float r, float g, float b, float a); void swClear(uint32_t bitmask); @@ -376,10 +382,11 @@ void swBindTexture(uint32_t id); #define SW_STATE_CHECK(flags) ((RLSW.stateFlags & (flags)) == (flags)) -#define SW_STATE_TEXTURE_2D (1 << 0) -#define SW_STATE_DEPTH_TEST (1 << 1) -#define SW_STATE_CULL_FACE (1 << 2) -#define SW_STATE_BLEND (1 << 3) +#define SW_STATE_SCISSOR_TEST (1 << 0) +#define SW_STATE_TEXTURE_2D (1 << 1) +#define SW_STATE_DEPTH_TEST (1 << 2) +#define SW_STATE_CULL_FACE (1 << 3) +#define SW_STATE_BLEND (1 << 4) #define SW_CLIP_INSIDE (0x00) // 0000 #define SW_CLIP_LEFT (0x01) // 0001 @@ -471,6 +478,13 @@ typedef struct { int vpMin[2]; // Represents the minimum renderable point of the viewport (top-left) int vpMax[2]; // Represents the maximum renderable point of the viewport (bottom-right) + int scPos[2]; // Represents the top-left corner of the scissor rect + int scDim[2]; // Represents the dimensions of the scissor rect (minus one) + int scMin[2]; // Represents the minimum renderable point of the scissor rect (top-left) + int scMax[2]; // Represents the maximum renderable point of the scissor rect (bottom-right) + float scHMax[2]; + float scHMin[2]; + struct { float* positions; float* texcoords; @@ -745,26 +759,59 @@ static inline void sw_framebuffer_fill_color(void* ptr, int size, float color[4] uint8_t g = ((uint8_t)(color[1] * UINT8_MAX) >> 5) & 0x07; uint8_t b = ((uint8_t)(color[2] * UINT8_MAX) >> 6) & 0x03; uint8_t* p = (uint8_t*)ptr; - for (int i = 0; i < size; i++) { - p[i] = (r << 5) | (g << 2) | b; + if (RLSW.stateFlags & SW_STATE_SCISSOR_TEST) { + int w = RLSW.scPos[0] + RLSW.scDim[0]; + int h = RLSW.scPos[1] + RLSW.scDim[1]; + for (int y = RLSW.scPos[1]; y < h; y++) { + for (int x = RLSW.scPos[0]; x < w; x++) { + p[y * RLSW.framebuffer.width + x] = (r << 5) | (g << 2) | b; + } + } + } else { + for (int i = 0; i < size; i++) { + p[i] = (r << 5) | (g << 2) | b; + } } #elif (SW_COLOR_BUFFER_BITS == 16) uint8_t r = (uint8_t)(color[0] * 31.0f + 0.5f) & 0x1F; uint8_t g = (uint8_t)(color[1] * 63.0f + 0.5f) & 0x3F; uint8_t b = (uint8_t)(color[2] * 31.0f + 0.5f) & 0x1F; uint16_t* p = (uint16_t*)ptr; - for (int i = 0; i < size; i++) { - p[i] = (r << 11) | (g << 5) | b; + if (RLSW.stateFlags & SW_STATE_SCISSOR_TEST) { + int w = RLSW.scPos[0] + RLSW.scDim[0]; + int h = RLSW.scPos[1] + RLSW.scDim[1]; + for (int y = RLSW.scPos[1]; y < h; y++) { + for (int x = RLSW.scPos[0]; x < w; x++) { + p[y * RLSW.framebuffer.width + x] = (r << 11) | (g << 5) | b; + } + } + } else { + for (int i = 0; i < size; i++) { + p[i] = (r << 11) | (g << 5) | b; + } } #elif (SW_COLOR_BUFFER_BITS == 24) uint8_t r = (uint8_t)(color[0] * 255); uint8_t g = (uint8_t)(color[1] * 255); uint8_t b = (uint8_t)(color[2] * 255); uint8_t* p = (uint8_t*)ptr; - for (int i = 0; i < size; i++) { - *p++ = r; - *p++ = g; - *p++ = b; + if (RLSW.stateFlags & SW_STATE_SCISSOR_TEST) { + int w = RLSW.scPos[0] + RLSW.scDim[0]; + int h = RLSW.scPos[1] + RLSW.scDim[1]; + for (int y = RLSW.scPos[1]; y < h; y++) { + for (int x = RLSW.scPos[0]; x < w; x++) { + int offset = (y * RLSW.framebuffer.width + x) * 3; + p[offset + 0] = r; + p[offset + 1] = g; + p[offset + 2] = b; + } + } + } else { + for (int i = 0; i < size; i++) { + *p++ = r; + *p++ = g; + *p++ = b; + } } #endif } @@ -774,78 +821,140 @@ static inline void sw_framebuffer_fill_depth(void* ptr, int size, float value) #if (SW_DEPTH_BUFFER_BITS == 8) uint8_t v = value * UINT8_MAX; uint8_t* p = (uint8_t*)ptr; - for (int i = 0; i < size; i++) { - p[i] = v; + if (RLSW.stateFlags & SW_STATE_SCISSOR_TEST) { + int w = RLSW.scPos[0] + RLSW.scDim[0]; + int h = RLSW.scPos[1] + RLSW.scDim[1]; + for (int y = 0; y < h; y++) { + for (int x = 0; x < w; x++) { + p[y * RLSW.framebuffer.width + x] = v; + } + } + } + else { + for (int i = 0; i < size; i++) { + p[i] = v; + } } #elif (SW_DEPTH_BUFFER_BITS == 16) uint16_t v = value * UINT16_MAX; uint16_t* p = (uint16_t*)ptr; - for (int i = 0; i < size; i++) { - p[i] = v; + if (RLSW.stateFlags & SW_STATE_SCISSOR_TEST) { + int w = RLSW.scPos[0] + RLSW.scDim[0]; + int h = RLSW.scPos[1] + RLSW.scDim[1]; + for (int y = 0; y < h; y++) { + for (int x = 0; x < w; x++) { + p[y * RLSW.framebuffer.width + x] = v; + } + } + } + else { + for (int i = 0; i < size; i++) { + p[i] = v; + } } #elif (SW_DEPTH_BUFFER_BITS == 24) uint32_t v = value * UINT32_MAX; uint8_t* p = (uint8_t*)ptr; - for (int i = 0; i < size; i++) { - *p++ = (v >> 16) & 0xFF; - *p++ = (v >> 8) & 0xFF; - *p++ = v & 0xFF; + if (RLSW.stateFlags & SW_STATE_SCISSOR_TEST) { + int w = RLSW.scPos[0] + RLSW.scDim[0]; + int h = RLSW.scPos[1] + RLSW.scDim[1]; + for (int y = 0; y < h; y++) { + for (int x = 0; x < w; x++) { + int offset = y * RLSW.framebuffer.width + x; + p[3 * offset + 0] = (v >> 16) & 0xFF; + p[3 * offset + 1] = (v >> 8) & 0xFF; + p[3 * offset + 2] = v & 0xFF; + } + } + } + else { + for (int i = 0; i < size; i++) { + *p++ = (v >> 16) & 0xFF; + *p++ = (v >> 8) & 0xFF; + *p++ = v & 0xFF; + } } #endif } -static inline void sw_framebuffer_fill(void* color_ptr, void* depth_ptr, int size, float color[4], float depth_value) +static inline void sw_framebuffer_fill(void* colorPtr, void* depthPtr, int size, float color[4], float depth_value) { #if (SW_COLOR_BUFFER_BITS == 8) uint8_t r = ((uint8_t)(color[0] * UINT8_MAX) >> 5) & 0x07; uint8_t g = ((uint8_t)(color[1] * UINT8_MAX) >> 5) & 0x07; uint8_t b = ((uint8_t)(color[2] * UINT8_MAX) >> 6) & 0x03; - uint8_t* color_p = (uint8_t*)color_ptr; + uint8_t* cptr = (uint8_t*)colorPtr; #elif (SW_COLOR_BUFFER_BITS == 16) uint8_t r = (uint8_t)(color[0] * 31.0f + 0.5f) & 0x1F; uint8_t g = (uint8_t)(color[1] * 63.0f + 0.5f) & 0x3F; uint8_t b = (uint8_t)(color[2] * 31.0f + 0.5f) & 0x1F; - uint16_t* color_p = (uint16_t*)color_ptr; + uint16_t* cptr = (uint16_t*)colorPtr; #elif (SW_COLOR_BUFFER_BITS == 24) uint8_t r = (uint8_t)(color[0] * 255); uint8_t g = (uint8_t)(color[1] * 255); uint8_t b = (uint8_t)(color[2] * 255); - uint8_t* color_p = (uint8_t*)color_ptr; + uint8_t* cptr = (uint8_t*)colorPtr; #endif #if (SW_DEPTH_BUFFER_BITS == 8) - uint8_t depth_v = depth_value * UINT8_MAX; - uint8_t* depth_p = (uint8_t*)depth_ptr; + uint8_t d = depth_value * UINT8_MAX; + uint8_t* dptr = (uint8_t*)depthPtr; #elif (SW_DEPTH_BUFFER_BITS == 16) - uint16_t depth_v = depth_value * UINT16_MAX; - uint16_t* depth_p = (uint16_t*)depth_ptr; + uint16_t d = depth_value * UINT16_MAX; + uint16_t* dptr = (uint16_t*)depthPtr; #elif (SW_DEPTH_BUFFER_BITS == 24) - uint32_t depth_v = depth_value * UINT32_MAX; - uint8_t* depth_p = (uint8_t*)depth_ptr; + uint32_t d = depth_value * UINT32_MAX; + uint8_t* dptr = (uint8_t*)depthPtr; #endif - for (int i = 0; i < size; i++) { - // Remplir le buffer de couleurs -#if (SW_COLOR_BUFFER_BITS == 8) - color_p[i] = (r << 5) | (g << 2) | b; -#elif (SW_COLOR_BUFFER_BITS == 16) - color_p[i] = (r << 11) | (g << 5) | b; -#elif (SW_COLOR_BUFFER_BITS == 24) - *color_p++ = r; - *color_p++ = g; - *color_p++ = b; -#endif + if (RLSW.stateFlags & SW_STATE_SCISSOR_TEST) { + int w = RLSW.scPos[0] + RLSW.scDim[0]; + int h = RLSW.scPos[1] + RLSW.scDim[1]; + for (int y = RLSW.scPos[1]; y < h; y++) { + for (int x = RLSW.scPos[0]; x < w; x++) { + int offset = y * RLSW.framebuffer.width + x; +# if (SW_COLOR_BUFFER_BITS == 8) + cptr[offset] = (r << 5) | (g << 2) | b; +# elif (SW_COLOR_BUFFER_BITS == 16) + cptr[offset] = (r << 11) | (g << 5) | b; +# elif (SW_COLOR_BUFFER_BITS == 24) + cptr[3 * offset + 0] = r; + cptr[3 * offset + 1] = g; + cptr[3 * offset + 2] = b; +# endif +# if (SW_DEPTH_BUFFER_BITS == 8) + dptr[offset] = d; +# elif (SW_DEPTH_BUFFER_BITS == 16) + dptr[offset] = d; +# elif (SW_DEPTH_BUFFER_BITS == 24) + dptr[3 * offset + 0] = (d >> 16) & 0xFF; + dptr[3 * offset + 1] = (d >> 8) & 0xFF; + dptr[3 * offset + 2] = d & 0xFF; +# endif + } + } + return; + } - // Remplir le buffer de profondeur -#if (SW_DEPTH_BUFFER_BITS == 8) - depth_p[i] = depth_v; -#elif (SW_DEPTH_BUFFER_BITS == 16) - depth_p[i] = depth_v; -#elif (SW_DEPTH_BUFFER_BITS == 24) - *depth_p++ = (depth_v >> 16) & 0xFF; - *depth_p++ = (depth_v >> 8) & 0xFF; - *depth_p++ = depth_v & 0xFF; -#endif + for (int i = 0; i < size; i++) { +# if (SW_COLOR_BUFFER_BITS == 8) + cptr[i] = (r << 5) | (g << 2) | b; +# elif (SW_COLOR_BUFFER_BITS == 16) + cptr[i] = (r << 11) | (g << 5) | b; +# elif (SW_COLOR_BUFFER_BITS == 24) + *cptr++ = r; + *cptr++ = g; + *cptr++ = b; +# endif +# if (SW_DEPTH_BUFFER_BITS == 8) + dptr[i] = d; +# elif (SW_DEPTH_BUFFER_BITS == 16) + dptr[i] = d; +# elif (SW_DEPTH_BUFFER_BITS == 24) + *dptr++ = (d >> 16) & 0xFF; + *dptr++ = (d >> 8) & 0xFF; + *dptr++ = d & 0xFF; +# endif } } @@ -1411,6 +1520,8 @@ static inline int sw_clip_##name( return outputCount; \ } +// Frustum clip functions + #define IS_INSIDE_PLANE_W(h) ((h)[3] >= SW_CLIP_EPSILON) #define IS_INSIDE_PLANE_X_POS(h) ((h)[0] <= (h)[3]) #define IS_INSIDE_PLANE_X_NEG(h) (-(h)[0] <= (h)[3]) @@ -1435,6 +1546,25 @@ DEFINE_CLIP_FUNC(y_neg, IS_INSIDE_PLANE_Y_NEG, COMPUTE_T_PLANE_Y_NEG) DEFINE_CLIP_FUNC(z_pos, IS_INSIDE_PLANE_Z_POS, COMPUTE_T_PLANE_Z_POS) DEFINE_CLIP_FUNC(z_neg, IS_INSIDE_PLANE_Z_NEG, COMPUTE_T_PLANE_Z_NEG) +// Scissor clip functions + +#define COMPUTE_T_SCISSOR_X_MIN(hPrev, hCurr) (((RLSW.scHMin[0]) * (hPrev)[3] - (hPrev)[0]) / (((hCurr)[0] - (RLSW.scHMin[0]) * (hCurr)[3]) - ((hPrev)[0] - (RLSW.scHMin[0]) * (hPrev)[3]))) +#define COMPUTE_T_SCISSOR_X_MAX(hPrev, hCurr) (((RLSW.scHMax[0]) * (hPrev)[3] - (hPrev)[0]) / (((hCurr)[0] - (RLSW.scHMax[0]) * (hCurr)[3]) - ((hPrev)[0] - (RLSW.scHMax[0]) * (hPrev)[3]))) +#define COMPUTE_T_SCISSOR_Y_MIN(hPrev, hCurr) (((RLSW.scHMin[1]) * (hPrev)[3] - (hPrev)[1]) / (((hCurr)[1] - (RLSW.scHMin[1]) * (hCurr)[3]) - ((hPrev)[1] - (RLSW.scHMin[1]) * (hPrev)[3]))) +#define COMPUTE_T_SCISSOR_Y_MAX(hPrev, hCurr) (((RLSW.scHMax[1]) * (hPrev)[3] - (hPrev)[1]) / (((hCurr)[1] - (RLSW.scHMax[1]) * (hCurr)[3]) - ((hPrev)[1] - (RLSW.scHMax[1]) * (hPrev)[3]))) + +#define IS_INSIDE_SCISSOR_X_MIN(h) ((h)[0] >= (RLSW.scHMin[0]) * (h)[3]) +#define IS_INSIDE_SCISSOR_X_MAX(h) ((h)[0] <= (RLSW.scHMax[0]) * (h)[3]) +#define IS_INSIDE_SCISSOR_Y_MIN(h) ((h)[1] >= (RLSW.scHMin[1]) * (h)[3]) +#define IS_INSIDE_SCISSOR_Y_MAX(h) ((h)[1] <= (RLSW.scHMax[1]) * (h)[3]) + +DEFINE_CLIP_FUNC(scissor_x_min, IS_INSIDE_SCISSOR_X_MIN, COMPUTE_T_SCISSOR_X_MIN) +DEFINE_CLIP_FUNC(scissor_x_max, IS_INSIDE_SCISSOR_X_MAX, COMPUTE_T_SCISSOR_X_MAX) +DEFINE_CLIP_FUNC(scissor_y_min, IS_INSIDE_SCISSOR_Y_MIN, COMPUTE_T_SCISSOR_Y_MIN) +DEFINE_CLIP_FUNC(scissor_y_max, IS_INSIDE_SCISSOR_Y_MAX, COMPUTE_T_SCISSOR_Y_MAX) + +// Main clip function + static inline bool sw_triangle_clip(sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON_VERTICES], int* vertexCounter) { sw_vertex_t tmp[SW_MAX_CLIPPED_POLYGON_VERTICES]; @@ -1457,6 +1587,13 @@ static inline bool sw_triangle_clip(sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON_V CLIP_AGAINST_PLANE(sw_clip_z_pos); CLIP_AGAINST_PLANE(sw_clip_z_neg); + if (RLSW.stateFlags & SW_STATE_SCISSOR_TEST) { + CLIP_AGAINST_PLANE(sw_clip_scissor_x_min); + CLIP_AGAINST_PLANE(sw_clip_scissor_x_max); + CLIP_AGAINST_PLANE(sw_clip_scissor_y_min); + CLIP_AGAINST_PLANE(sw_clip_scissor_y_max); + } + *vertexCounter = n; return n > 0; @@ -1846,72 +1983,7 @@ static inline void sw_triangle_render(const sw_vertex_t* v0, const sw_vertex_t* /* === Line Rendering Part === */ -static inline uint8_t sw_line_clip_encode_2d(const float screen[2], int xMin, int yMin, int xMax, int yMax) -{ - uint8_t code = SW_CLIP_INSIDE; - if (screen[0] < xMin) code |= SW_CLIP_LEFT; - if (screen[0] > xMax) code |= SW_CLIP_RIGHT; - if (screen[1] < yMin) code |= SW_CLIP_TOP; - if (screen[1] > yMax) code |= SW_CLIP_BOTTOM; - return code; -} - -// Cohen-Sutherland algorithm, faster but for 2D only -static inline bool sw_line_clip_2d(sw_vertex_t* v1, sw_vertex_t* v2) -{ - int xMin = RLSW.vpMin[0]; - int yMin = RLSW.vpMin[1]; - int xMax = RLSW.vpMax[0]; - int yMax = RLSW.vpMax[1]; - - bool accept = false; - uint8_t code0, code1; - float m = 0; - - if (v1->screen[0] != v2->screen[0]) { - m = (v2->screen[1] - v1->screen[1]) / (v2->screen[0] - v1->screen[0]); - } - - for (;;) { - code0 = sw_line_clip_encode_2d(v1->screen, xMin, yMin, xMax, yMax); - code1 = sw_line_clip_encode_2d(v2->screen, xMin, yMin, xMax, yMax); - - // Accepted if both endpoints lie within rectangle - if ((code0 | code1) == 0) { - accept = true; - break; - } - - // Rejected if both endpoints are outside rectangle, in same region - if (code0 & code1) break; - - if (code0 == SW_CLIP_INSIDE) { - uint8_t ctmp = code0; code0 = code1; code1 = ctmp; - sw_vertex_t vtmp = *v1; *v1 = *v2; *v2 = vtmp; - } - - if (code0 & SW_CLIP_LEFT) { - v1->screen[1] += (RLSW.vpMin[0] - v1->screen[0])*m; - v1->screen[0] = (float)RLSW.vpMin[0]; - } - else if (code0 & SW_CLIP_RIGHT) { - v1->screen[1] += (RLSW.vpMax[0] - v1->screen[0])*m; - v1->screen[0] = (float)RLSW.vpMax[0]; - } - else if (code0 & SW_CLIP_BOTTOM) { - if (m) v1->screen[0] += (RLSW.vpMin[1] - v1->screen[1]) / m; - v1->screen[1] = (float)RLSW.vpMin[1]; - } - else if (code0 & SW_CLIP_TOP) { - if (m) v1->screen[0] += (RLSW.vpMax[1] - v1->screen[1]) / m; - v1->screen[1] = (float)RLSW.vpMax[1]; - } - } - - return accept; -} - -static inline bool sw_line_clip_coord_3d(float q, float p, float* t1, float* t2) +static inline bool sw_line_clip_coord(float q, float p, float* t0, float* t1) { if (fabsf(p) < SW_CLIP_EPSILON) { // Check if the line is entirely outside the window @@ -1922,46 +1994,47 @@ static inline bool sw_line_clip_coord_3d(float q, float p, float* t1, float* t2) const float r = q / p; if (p < 0) { - if (r > *t2) return 0; - if (r > *t1) *t1 = r; + if (r > *t1) return 0; + if (r > *t0) *t0 = r; } else { - if (r < *t1) return 0; - if (r < *t2) *t2 = r; + if (r < *t0) return 0; + if (r < *t1) *t1 = r; } return 1; } -// Liang-Barsky algorithm variant, more robust but slightly slower -static inline bool sw_line_clip_3d(sw_vertex_t* v1, sw_vertex_t* v2) +static inline bool sw_line_clip(sw_vertex_t* v0, sw_vertex_t* v1) { - // TODO: Lerp all vertices here, not just homogeneous coordinates + // Uses Liang-Barsky algorithm - float t1 = 0, t2 = 1; + float t0 = 0; + float t1 = 1; - float delta[4]; + float dH[4], dC[4]; for (int i = 0; i < 4; i++) { - delta[i] = v2->homogeneous[i] - v1->homogeneous[i]; + dH[i] = v1->homogeneous[i] - v0->homogeneous[i]; + dC[i] = v1->color[i] - v0->color[i]; } - if (!sw_line_clip_coord_3d(v1->homogeneous[3] - v1->homogeneous[0], -delta[3] + delta[0], &t1, &t2)) return false; - if (!sw_line_clip_coord_3d(v1->homogeneous[3] + v1->homogeneous[0], -delta[3] - delta[0], &t1, &t2)) return false; - - if (!sw_line_clip_coord_3d(v1->homogeneous[3] - v1->homogeneous[1], -delta[3] + delta[1], &t1, &t2)) return false; - if (!sw_line_clip_coord_3d(v1->homogeneous[3] + v1->homogeneous[1], -delta[3] - delta[1], &t1, &t2)) return false; + if (!sw_line_clip_coord(v0->homogeneous[3] - v0->homogeneous[0], -dH[3] + dH[0], &t0, &t1)) return false; + if (!sw_line_clip_coord(v0->homogeneous[3] + v0->homogeneous[0], -dH[3] - dH[0], &t0, &t1)) return false; + if (!sw_line_clip_coord(v0->homogeneous[3] - v0->homogeneous[1], -dH[3] + dH[1], &t0, &t1)) return false; + if (!sw_line_clip_coord(v0->homogeneous[3] + v0->homogeneous[1], -dH[3] - dH[1], &t0, &t1)) return false; + if (!sw_line_clip_coord(v0->homogeneous[3] - v0->homogeneous[2], -dH[3] + dH[2], &t0, &t1)) return false; + if (!sw_line_clip_coord(v0->homogeneous[3] + v0->homogeneous[2], -dH[3] - dH[2], &t0, &t1)) return false; - if (!sw_line_clip_coord_3d(v1->homogeneous[3] - v1->homogeneous[2], -delta[3] + delta[2], &t1, &t2)) return false; - if (!sw_line_clip_coord_3d(v1->homogeneous[3] + v1->homogeneous[2], -delta[3] - delta[2], &t1, &t2)) return false; - - if (t2 < 1) { + if (t1 < 1) { for (int i = 0; i < 4; i++) { - v2->homogeneous[i] = v1->homogeneous[i] + t2 * delta[i]; + v1->homogeneous[i] = v0->homogeneous[i] + t1 * dH[i]; + v1->color[i] = v0->color[i] + t1 * dC[i]; } } - if (t1 > 0) { + if (t0 > 0) { for (int i = 0; i < 4; i++) { - v1->homogeneous[i] = v1->homogeneous[i] + t1 * delta[i]; + v0->homogeneous[i] = v0->homogeneous[i] + t0 * dH[i]; + v0->color[i] = v0->color[i] + t0 * dC[i]; } } @@ -1973,29 +2046,22 @@ static inline bool sw_line_project_and_clip(sw_vertex_t* v0, sw_vertex_t* v1) sw_vec4_transform(v0->homogeneous, v0->position, RLSW.matMVP); sw_vec4_transform(v1->homogeneous, v1->position, RLSW.matMVP); - if (v0->homogeneous[3] == 1.0f && v1->homogeneous[3] == 1.0f) { - sw_project_ndc_to_screen(v0->screen, v0->homogeneous); - sw_project_ndc_to_screen(v1->screen, v1->homogeneous); - if (!sw_line_clip_2d(v0, v1)) { - return false; - } + if (!sw_line_clip(v0, v1)) { + return false; } - else { - if (!sw_line_clip_3d(v0, v1)) { - return false; - } - // Convert XYZ coordinates to NDC - v0->homogeneous[3] = 1.0f / v0->homogeneous[3]; - v1->homogeneous[3] = 1.0f / v1->homogeneous[3]; - for (int i = 0; i < 3; i++) { - v0->homogeneous[i] *= v0->homogeneous[3]; - v1->homogeneous[i] *= v1->homogeneous[3]; - } - // Convert NDC coordinates to screen space - sw_project_ndc_to_screen(v0->screen, v0->homogeneous); - sw_project_ndc_to_screen(v1->screen, v1->homogeneous); + + // Convert homogeneous coordinates to NDC + v0->homogeneous[3] = 1.0f / v0->homogeneous[3]; + v1->homogeneous[3] = 1.0f / v1->homogeneous[3]; + for (int i = 0; i < 3; i++) { + v0->homogeneous[i] *= v0->homogeneous[3]; + v1->homogeneous[i] *= v1->homogeneous[3]; } + // Convert NDC coordinates to screen space + sw_project_ndc_to_screen(v0->screen, v0->homogeneous); + sw_project_ndc_to_screen(v1->screen, v1->homogeneous); + return true; } @@ -2578,6 +2644,7 @@ bool swInit(int w, int h) } swViewport(0, 0, w, h); + swScissor(0, 0, w, h); RLSW.loadedTextures = SW_MALLOC(SW_MAX_TEXTURES); if (RLSW.loadedTextures == NULL) { swClose(); return false; } @@ -2679,6 +2746,9 @@ bool swResizeFramebuffer(int w, int h) void swEnable(SWstate state) { switch (state) { + case SW_SCISSOR_TEST: + RLSW.stateFlags |= SW_STATE_SCISSOR_TEST; + break; case SW_TEXTURE_2D: RLSW.stateFlags |= SW_STATE_TEXTURE_2D; break; @@ -2700,6 +2770,9 @@ void swEnable(SWstate state) void swDisable(SWstate state) { switch (state) { + case SW_SCISSOR_TEST: + RLSW.stateFlags &= ~SW_STATE_SCISSOR_TEST; + break; case SW_TEXTURE_2D: RLSW.stateFlags &= ~SW_STATE_TEXTURE_2D; break; @@ -2744,6 +2817,52 @@ void swViewport(int x, int y, int width, int height) RLSW.vpMax[1] = (vpMaxY < fbH) ? vpMaxY : fbH; } +void swScissor(int x, int y, int width, int height) +{ + if (x < 0) x = 0; + if (y < 0) y = 0; + + if (width < 0) width = 0; + if (height < 0) height = 0; + + if (x >= RLSW.framebuffer.width) { + x = RLSW.framebuffer.width - 1; + } + if (y >= RLSW.framebuffer.height) { + y = RLSW.framebuffer.height - 1; + } + + if (width >= RLSW.framebuffer.width) { + width = RLSW.framebuffer.width - 1; + } + if (height >= RLSW.framebuffer.height) { + height = RLSW.framebuffer.height - 1; + } + + RLSW.scPos[0] = x; + RLSW.scPos[1] = y; + + RLSW.scDim[0] = width - 1; + RLSW.scDim[1] = height - 1; + + RLSW.scMin[0] = (x < 0) ? 0 : x; + RLSW.scMin[1] = (y < 0) ? 0 : y; + + int fbW = RLSW.framebuffer.width - 1; + int fbH = RLSW.framebuffer.height - 1; + + int vpMaxX = x + width; + int vpMaxY = y + height; + + RLSW.scMax[0] = (vpMaxX < fbW) ? vpMaxX : fbW; + RLSW.scMax[1] = (vpMaxY < fbH) ? vpMaxY : fbH; + + RLSW.scHMin[0] = (2.0f * (float)RLSW.scMin[0] / (float)(RLSW.vpDim[0] + 1)) - 1.0f; + RLSW.scHMax[0] = (2.0f * (float)RLSW.scMax[0] / (float)(RLSW.vpDim[0] + 1)) - 1.0f; + RLSW.scHMax[1] = 1.0f - (2.0f * (float)RLSW.scMin[1] / (float)(RLSW.vpDim[1] + 1)); + RLSW.scHMin[1] = 1.0f - (2.0f * (float)RLSW.scMax[1] / (float)(RLSW.vpDim[1] + 1)); +} + void swClearColor(float r, float g, float b, float a) { RLSW.clearColor[0] = r; From 0fb0c5cd380bd55b0a2826f1403308f40463c0d4 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sat, 22 Mar 2025 12:44:32 +0100 Subject: [PATCH 26/43] review scissor clear --- src/external/rlsw.h | 48 ++++++++++++++++----------------------------- 1 file changed, 17 insertions(+), 31 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 5f683a3b9..c06c7d2d8 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -81,7 +81,7 @@ /* === OpenGL Definitions === */ -#define GL_SCISSOR_TEST 0x0C11 +#define GL_SCISSOR_TEST 0x0C11 #define GL_TEXTURE_2D 0x0DE1 #define GL_DEPTH_TEST 0x0B71 #define GL_CULL_FACE 0x0B44 @@ -482,8 +482,8 @@ typedef struct { int scDim[2]; // Represents the dimensions of the scissor rect (minus one) int scMin[2]; // Represents the minimum renderable point of the scissor rect (top-left) int scMax[2]; // Represents the maximum renderable point of the scissor rect (bottom-right) - float scHMax[2]; - float scHMin[2]; + float scHMin[2]; // Represents the minimum renderable point of the scissor rect in clip space + float scHMax[2]; // Represents the maximum renderable point of the scissor rect in clip space struct { float* positions; @@ -760,10 +760,8 @@ static inline void sw_framebuffer_fill_color(void* ptr, int size, float color[4] uint8_t b = ((uint8_t)(color[2] * UINT8_MAX) >> 6) & 0x03; uint8_t* p = (uint8_t*)ptr; if (RLSW.stateFlags & SW_STATE_SCISSOR_TEST) { - int w = RLSW.scPos[0] + RLSW.scDim[0]; - int h = RLSW.scPos[1] + RLSW.scDim[1]; - for (int y = RLSW.scPos[1]; y < h; y++) { - for (int x = RLSW.scPos[0]; x < w; x++) { + for (int y = RLSW.scMin[1]; y <= RLSW.scMax[1]; y++) { + for (int x = RLSW.scMin[0]; x <= RLSW.scMax[0]; x++) { p[y * RLSW.framebuffer.width + x] = (r << 5) | (g << 2) | b; } } @@ -778,10 +776,8 @@ static inline void sw_framebuffer_fill_color(void* ptr, int size, float color[4] uint8_t b = (uint8_t)(color[2] * 31.0f + 0.5f) & 0x1F; uint16_t* p = (uint16_t*)ptr; if (RLSW.stateFlags & SW_STATE_SCISSOR_TEST) { - int w = RLSW.scPos[0] + RLSW.scDim[0]; - int h = RLSW.scPos[1] + RLSW.scDim[1]; - for (int y = RLSW.scPos[1]; y < h; y++) { - for (int x = RLSW.scPos[0]; x < w; x++) { + for (int y = RLSW.scMin[1]; y <= RLSW.scMax[1]; y++) { + for (int x = RLSW.scMin[0]; x <= RLSW.scMax[0]; x++) { p[y * RLSW.framebuffer.width + x] = (r << 11) | (g << 5) | b; } } @@ -796,10 +792,8 @@ static inline void sw_framebuffer_fill_color(void* ptr, int size, float color[4] uint8_t b = (uint8_t)(color[2] * 255); uint8_t* p = (uint8_t*)ptr; if (RLSW.stateFlags & SW_STATE_SCISSOR_TEST) { - int w = RLSW.scPos[0] + RLSW.scDim[0]; - int h = RLSW.scPos[1] + RLSW.scDim[1]; - for (int y = RLSW.scPos[1]; y < h; y++) { - for (int x = RLSW.scPos[0]; x < w; x++) { + for (int y = RLSW.scMin[1]; y <= RLSW.scMax[1]; y++) { + for (int x = RLSW.scMin[0]; x <= RLSW.scMax[0]; x++) { int offset = (y * RLSW.framebuffer.width + x) * 3; p[offset + 0] = r; p[offset + 1] = g; @@ -822,10 +816,8 @@ static inline void sw_framebuffer_fill_depth(void* ptr, int size, float value) uint8_t v = value * UINT8_MAX; uint8_t* p = (uint8_t*)ptr; if (RLSW.stateFlags & SW_STATE_SCISSOR_TEST) { - int w = RLSW.scPos[0] + RLSW.scDim[0]; - int h = RLSW.scPos[1] + RLSW.scDim[1]; - for (int y = 0; y < h; y++) { - for (int x = 0; x < w; x++) { + for (int y = RLSW.scMin[1]; y <= RLSW.scMax[1]; y++) { + for (int x = RLSW.scMin[0]; x <= RLSW.scMax[0]; x++) { p[y * RLSW.framebuffer.width + x] = v; } } @@ -839,10 +831,8 @@ static inline void sw_framebuffer_fill_depth(void* ptr, int size, float value) uint16_t v = value * UINT16_MAX; uint16_t* p = (uint16_t*)ptr; if (RLSW.stateFlags & SW_STATE_SCISSOR_TEST) { - int w = RLSW.scPos[0] + RLSW.scDim[0]; - int h = RLSW.scPos[1] + RLSW.scDim[1]; - for (int y = 0; y < h; y++) { - for (int x = 0; x < w; x++) { + for (int y = RLSW.scMin[1]; y <= RLSW.scMax[1]; y++) { + for (int x = RLSW.scMin[0]; x <= RLSW.scMax[0]; x++) { p[y * RLSW.framebuffer.width + x] = v; } } @@ -856,10 +846,8 @@ static inline void sw_framebuffer_fill_depth(void* ptr, int size, float value) uint32_t v = value * UINT32_MAX; uint8_t* p = (uint8_t*)ptr; if (RLSW.stateFlags & SW_STATE_SCISSOR_TEST) { - int w = RLSW.scPos[0] + RLSW.scDim[0]; - int h = RLSW.scPos[1] + RLSW.scDim[1]; - for (int y = 0; y < h; y++) { - for (int x = 0; x < w; x++) { + for (int y = RLSW.scMin[1]; y <= RLSW.scMax[1]; y++) { + for (int x = RLSW.scMin[0]; x <= RLSW.scMax[0]; x++) { int offset = y * RLSW.framebuffer.width + x; p[3 * offset + 0] = (v >> 16) & 0xFF; p[3 * offset + 1] = (v >> 8) & 0xFF; @@ -908,10 +896,8 @@ static inline void sw_framebuffer_fill(void* colorPtr, void* depthPtr, int size, #endif if (RLSW.stateFlags & SW_STATE_SCISSOR_TEST) { - int w = RLSW.scPos[0] + RLSW.scDim[0]; - int h = RLSW.scPos[1] + RLSW.scDim[1]; - for (int y = RLSW.scPos[1]; y < h; y++) { - for (int x = RLSW.scPos[0]; x < w; x++) { + for (int y = RLSW.scMin[1]; y <= RLSW.scMax[1]; y++) { + for (int x = RLSW.scMin[0]; x <= RLSW.scMax[0]; x++) { int offset = y * RLSW.framebuffer.width + x; # if (SW_COLOR_BUFFER_BITS == 8) cptr[offset] = (r << 5) | (g << 2) | b; From 3441cf632746bbf80050c981a5e4e44e0126c46d Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sat, 22 Mar 2025 12:47:31 +0100 Subject: [PATCH 27/43] review `swScissor` --- src/external/rlsw.h | 30 +++++++++++------------------- 1 file changed, 11 insertions(+), 19 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index c06c7d2d8..04e7d6709 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -601,6 +601,13 @@ static inline float sw_saturate(float x) // return (x < 0.0f) ? 0.0f : ((x > 1.0f) ? 1.0f : x); } +static inline int sw_clampi(int v, int min, int max) +{ + if (v < min) return min; + if (v > max) return max; + return v; +} + static inline float sw_lerp(float a, float b, float t) { return a + t * (b - a); @@ -2805,25 +2812,10 @@ void swViewport(int x, int y, int width, int height) void swScissor(int x, int y, int width, int height) { - if (x < 0) x = 0; - if (y < 0) y = 0; - - if (width < 0) width = 0; - if (height < 0) height = 0; - - if (x >= RLSW.framebuffer.width) { - x = RLSW.framebuffer.width - 1; - } - if (y >= RLSW.framebuffer.height) { - y = RLSW.framebuffer.height - 1; - } - - if (width >= RLSW.framebuffer.width) { - width = RLSW.framebuffer.width - 1; - } - if (height >= RLSW.framebuffer.height) { - height = RLSW.framebuffer.height - 1; - } + sw_clampi(x, 0, RLSW.framebuffer.width); + sw_clampi(y, 0, RLSW.framebuffer.height); + sw_clampi(width, 0, RLSW.framebuffer.width); + sw_clampi(width, 0, RLSW.framebuffer.height); RLSW.scPos[0] = x; RLSW.scPos[1] = y; From f0b02b8cad6348066e38dca4c527a185567f84ba Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sat, 22 Mar 2025 15:18:41 +0100 Subject: [PATCH 28/43] impl line scissor clipping --- src/external/rlsw.h | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 04e7d6709..d80bbb58d 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -1999,17 +1999,15 @@ static inline bool sw_line_clip_coord(float q, float p, float* t0, float* t1) static inline bool sw_line_clip(sw_vertex_t* v0, sw_vertex_t* v1) { - // Uses Liang-Barsky algorithm - - float t0 = 0; - float t1 = 1; - + float t0 = 0.0f, t1 = 1.0f; float dH[4], dC[4]; + for (int i = 0; i < 4; i++) { dH[i] = v1->homogeneous[i] - v0->homogeneous[i]; dC[i] = v1->color[i] - v0->color[i]; } + // Clipping Liang-Barsky if (!sw_line_clip_coord(v0->homogeneous[3] - v0->homogeneous[0], -dH[3] + dH[0], &t0, &t1)) return false; if (!sw_line_clip_coord(v0->homogeneous[3] + v0->homogeneous[0], -dH[3] - dH[0], &t0, &t1)) return false; if (!sw_line_clip_coord(v0->homogeneous[3] - v0->homogeneous[1], -dH[3] + dH[1], &t0, &t1)) return false; @@ -2017,17 +2015,25 @@ static inline bool sw_line_clip(sw_vertex_t* v0, sw_vertex_t* v1) if (!sw_line_clip_coord(v0->homogeneous[3] - v0->homogeneous[2], -dH[3] + dH[2], &t0, &t1)) return false; if (!sw_line_clip_coord(v0->homogeneous[3] + v0->homogeneous[2], -dH[3] - dH[2], &t0, &t1)) return false; - if (t1 < 1) { + // Clipping Scissor + if (RLSW.stateFlags & SW_STATE_SCISSOR_TEST) { + if (!sw_line_clip_coord(v0->homogeneous[0] - RLSW.scHMin[0] * v0->homogeneous[3], RLSW.scHMin[0] * dH[3] - dH[0], &t0, &t1)) return false; + if (!sw_line_clip_coord(RLSW.scHMax[0] * v0->homogeneous[3] - v0->homogeneous[0], dH[0] - RLSW.scHMax[0] * dH[3], &t0, &t1)) return false; + if (!sw_line_clip_coord(v0->homogeneous[1] - RLSW.scHMin[1] * v0->homogeneous[3], RLSW.scHMin[1] * dH[3] - dH[1], &t0, &t1)) return false; + if (!sw_line_clip_coord(RLSW.scHMax[1] * v0->homogeneous[3] - v0->homogeneous[1], dH[1] - RLSW.scHMax[1] * dH[3], &t0, &t1)) return false; + } + + // Interpolation of new coordinates + if (t1 < 1.0f) { for (int i = 0; i < 4; i++) { v1->homogeneous[i] = v0->homogeneous[i] + t1 * dH[i]; v1->color[i] = v0->color[i] + t1 * dC[i]; } } - - if (t0 > 0) { + if (t0 > 0.0f) { for (int i = 0; i < 4; i++) { - v0->homogeneous[i] = v0->homogeneous[i] + t0 * dH[i]; - v0->color[i] = v0->color[i] + t0 * dC[i]; + v0->homogeneous[i] += t0 * dH[i]; + v0->color[i] += t0 * dC[i]; } } From 633e34e4575a4bbb8002f73190cebdddced608ff Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sat, 22 Mar 2025 15:19:55 +0100 Subject: [PATCH 29/43] round screen coordinate (line rasterization) --- src/external/rlsw.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index d80bbb58d..29de03ccb 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -2067,10 +2067,10 @@ static inline bool sw_line_project_and_clip(sw_vertex_t* v0, sw_vertex_t* v1) #define DEFINE_LINE_RASTER(FUNC_NAME, ENABLE_DEPTH_TEST, ENABLE_COLOR_BLEND) \ static inline void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1) \ { \ - int x1 = (int)v0->screen[0]; \ - int y1 = (int)v0->screen[1]; \ - int x2 = (int)v1->screen[0]; \ - int y2 = (int)v1->screen[1]; \ + int x1 = (int)(v0->screen[0] + 0.5f); \ + int y1 = (int)(v0->screen[1] + 0.5f); \ + int x2 = (int)(v1->screen[0] + 0.5f); \ + int y2 = (int)(v1->screen[1] + 0.5f); \ \ float z1 = v0->homogeneous[2]; \ float z2 = v1->homogeneous[2]; \ From 2f9eb5b7276a34fb53f5ee2fb7080bd902f11c85 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sat, 22 Mar 2025 15:57:12 +0100 Subject: [PATCH 30/43] impl point scissor clipping --- src/external/rlsw.h | 80 +++++++++++++++++++++++++++++++++------------ 1 file changed, 60 insertions(+), 20 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 29de03ccb..fd6a675a4 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -2315,23 +2315,37 @@ static inline bool sw_point_project_and_clip(sw_vertex_t* v) sw_project_ndc_to_screen(v->screen, v->homogeneous); - return v->screen[0] - RLSW.pointRadius >= RLSW.vpMin[0] - && v->screen[1] - RLSW.pointRadius >= RLSW.vpMin[1] - && v->screen[0] + RLSW.pointRadius <= RLSW.vpMax[0] - && v->screen[1] + RLSW.pointRadius <= RLSW.vpMax[1]; + const int *min, *max; + + if (RLSW.stateFlags & SW_STATE_SCISSOR_TEST) { + min = RLSW.scMin; + max = RLSW.scMax; + } else { + min = RLSW.vpMin; + max = RLSW.vpMax; + } + + bool insideX = v->screen[0] - RLSW.pointRadius < max[0] + && v->screen[0] + RLSW.pointRadius > min[0]; + + bool insideY = v->screen[1] - RLSW.pointRadius < max[1] + && v->screen[1] + RLSW.pointRadius > min[1]; + + return insideX && insideY; } #define DEFINE_POINT_RASTER(FUNC_NAME, ENABLE_DEPTH_TEST, ENABLE_COLOR_BLEND, CHECK_BOUNDS) \ static inline void FUNC_NAME(int x, int y, float z, float color[4]) \ { \ - if (CHECK_BOUNDS) \ + if (CHECK_BOUNDS == 1) \ { \ - if (x < 0 || x >= RLSW.framebuffer.width) { \ - return; \ - } \ - if (y < 0 || y >= RLSW.framebuffer.height) { \ - return; \ - } \ + if (x < RLSW.vpMin[0] || x > RLSW.vpMax[0]) return; \ + if (y < RLSW.vpMin[1] || y > RLSW.vpMax[1]) return; \ + } \ + else if (CHECK_BOUNDS == SW_SCISSOR_TEST) \ + { \ + if (x < RLSW.scMin[0] || x > RLSW.scMax[0]) return; \ + if (y < RLSW.scMin[1] || y > RLSW.scMax[1]) return; \ } \ \ int offset = y * RLSW.framebuffer.width + x; \ @@ -2408,11 +2422,21 @@ DEFINE_POINT_RASTER(sw_point_raster_DEPTH_CHECK, 1, 0, 1) DEFINE_POINT_RASTER(sw_point_raster_BLEND_CHECK, 0, 1, 1) DEFINE_POINT_RASTER(sw_point_raster_DEPTH_BLEND_CHECK, 1, 1, 1) +DEFINE_POINT_RASTER(sw_point_raster_CHECK_SCISSOR, 0, 0, SW_SCISSOR_TEST) +DEFINE_POINT_RASTER(sw_point_raster_DEPTH_CHECK_SCISSOR, 1, 0, SW_SCISSOR_TEST) +DEFINE_POINT_RASTER(sw_point_raster_BLEND_CHECK_SCISSOR, 0, 1, SW_SCISSOR_TEST) +DEFINE_POINT_RASTER(sw_point_raster_DEPTH_BLEND_CHECK_SCISSOR, 1, 1, SW_SCISSOR_TEST) + DEFINE_POINT_THICK_RASTER(sw_point_thick_raster, sw_point_raster_CHECK) DEFINE_POINT_THICK_RASTER(sw_point_thick_raster_DEPTH, sw_point_raster_DEPTH_CHECK) DEFINE_POINT_THICK_RASTER(sw_point_thick_raster_BLEND, sw_point_raster_BLEND_CHECK) DEFINE_POINT_THICK_RASTER(sw_point_thick_raster_DEPTH_BLEND, sw_point_raster_DEPTH_BLEND_CHECK) +DEFINE_POINT_THICK_RASTER(sw_point_thick_raster_SCISSOR, sw_point_raster_CHECK_SCISSOR) +DEFINE_POINT_THICK_RASTER(sw_point_thick_raster_DEPTH_SCISSOR, sw_point_raster_DEPTH_CHECK_SCISSOR) +DEFINE_POINT_THICK_RASTER(sw_point_thick_raster_BLEND_SCISSOR, sw_point_raster_BLEND_CHECK_SCISSOR) +DEFINE_POINT_THICK_RASTER(sw_point_thick_raster_DEPTH_BLEND_SCISSOR, sw_point_raster_DEPTH_BLEND_CHECK_SCISSOR) + static inline void sw_point_render(sw_vertex_t* v) { if (!sw_point_project_and_clip(v)) { @@ -2420,17 +2444,33 @@ static inline void sw_point_render(sw_vertex_t* v) } if (RLSW.pointRadius >= 1.0f) { - if (SW_STATE_CHECK(SW_STATE_DEPTH_TEST | SW_STATE_BLEND)) { - sw_point_thick_raster_DEPTH_BLEND(v); - } - else if (SW_STATE_CHECK(SW_STATE_BLEND)) { - sw_point_thick_raster_BLEND(v); - } - else if (SW_STATE_CHECK(SW_STATE_DEPTH_TEST)) { - sw_point_thick_raster_DEPTH(v); + if (SW_STATE_CHECK(SW_STATE_SCISSOR_TEST)) { + if (SW_STATE_CHECK(SW_STATE_DEPTH_TEST | SW_STATE_BLEND)) { + sw_point_thick_raster_DEPTH_BLEND_SCISSOR(v); + } + else if (SW_STATE_CHECK(SW_STATE_BLEND)) { + sw_point_thick_raster_BLEND_SCISSOR(v); + } + else if (SW_STATE_CHECK(SW_STATE_DEPTH_TEST)) { + sw_point_thick_raster_DEPTH_SCISSOR(v); + } + else { + sw_point_thick_raster_SCISSOR(v); + } } else { - sw_point_thick_raster(v); + if (SW_STATE_CHECK(SW_STATE_DEPTH_TEST | SW_STATE_BLEND)) { + sw_point_thick_raster_DEPTH_BLEND(v); + } + else if (SW_STATE_CHECK(SW_STATE_BLEND)) { + sw_point_thick_raster_BLEND(v); + } + else if (SW_STATE_CHECK(SW_STATE_DEPTH_TEST)) { + sw_point_thick_raster_DEPTH(v); + } + else { + sw_point_thick_raster(v); + } } } else { From 26dfe69e5f6f229de04e222cb9331ddef32d0849 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sat, 22 Mar 2025 15:57:55 +0100 Subject: [PATCH 31/43] remove unused defs --- src/external/rlsw.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index fd6a675a4..d3e927a5e 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -388,12 +388,6 @@ void swBindTexture(uint32_t id); #define SW_STATE_CULL_FACE (1 << 3) #define SW_STATE_BLEND (1 << 4) -#define SW_CLIP_INSIDE (0x00) // 0000 -#define SW_CLIP_LEFT (0x01) // 0001 -#define SW_CLIP_RIGHT (0x02) // 0010 -#define SW_CLIP_BOTTOM (0x04) // 0100 -#define SW_CLIP_TOP (0x08) // 1000 - /* === Internal Structs === */ typedef enum { From 2009490151580d1a77d6ae409e5ba80e05f57563 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sat, 22 Mar 2025 17:16:21 +0100 Subject: [PATCH 32/43] add getter functions --- src/external/rlsw.h | 314 ++++++++++++++++++++++++++++++++------------ 1 file changed, 228 insertions(+), 86 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index d3e927a5e..60dd29e6b 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -81,92 +81,125 @@ /* === OpenGL Definitions === */ -#define GL_SCISSOR_TEST 0x0C11 -#define GL_TEXTURE_2D 0x0DE1 -#define GL_DEPTH_TEST 0x0B71 -#define GL_CULL_FACE 0x0B44 -#define GL_BLEND 0x0BE2 - -#define GL_COLOR_BUFFER_BIT 0x00004000 -#define GL_DEPTH_BUFFER_BIT 0x00000100 - -#define GL_MODELVIEW 0x1700 -#define GL_PROJECTION 0x1701 -#define GL_TEXTURE 0x1702 - -#define GL_VERTEX_ARRAY 0x8074 -#define GL_NORMAL_ARRAY 0x8075 -#define GL_COLOR_ARRAY 0x8076 -//#define GL_INDEX_ARRAY 0x8077 -#define GL_TEXTURE_COORD_ARRAY 0x8078 - -#define GL_POINTS 0x0000 -#define GL_LINES 0x0001 -//#define GL_LINE_LOOP 0x0002 -//#define GL_LINE_STRIP 0x0003 -#define GL_TRIANGLES 0x0004 -//#define GL_TRIANGLE_STRIP 0x0005 -//#define GL_TRIANGLE_FAN 0x0006 -#define GL_QUADS 0x0007 -//#define GL_QUAD_STRIP 0x0008 -//#define GL_POLYGON 0x0009 - -#define GL_POINT 0x1B00 -#define GL_LINE 0x1B01 -#define GL_FILL 0x1B02 - -//#define GL_CW 0x0900 -//#define GL_CCW 0x0901 - -#define GL_FRONT 0x0404 -#define GL_BACK 0x0405 - -#define GL_ZERO 0 -#define GL_ONE 1 -#define GL_SRC_COLOR 0x0300 -#define GL_ONE_MINUS_SRC_COLOR 0x0301 -#define GL_SRC_ALPHA 0x0302 -#define GL_ONE_MINUS_SRC_ALPHA 0x0303 -#define GL_DST_ALPHA 0x0304 -#define GL_ONE_MINUS_DST_ALPHA 0x0305 -#define GL_DST_COLOR 0x0306 -#define GL_ONE_MINUS_DST_COLOR 0x0307 -#define GL_SRC_ALPHA_SATURATE 0x0308 - -#define GL_NEAREST 0x2600 -#define GL_LINEAR 0x2601 - -#define GL_REPEAT 0x2901 -#define GL_CLAMP_TO_EDGE 0x812F //< (OpenGL 1.2) -#define GL_MIRRORED_REPEAT 0x8370 //< (OpenGL 2.0) - -#define GL_TEXTURE_MAG_FILTER 0x2800 -#define GL_TEXTURE_MIN_FILTER 0x2801 - -#define GL_TEXTURE_WRAP_S 0x2802 -#define GL_TEXTURE_WRAP_T 0x2803 - -#define GL_NO_ERROR 0 -#define GL_INVALID_ENUM 0x0500 -#define GL_INVALID_VALUE 0x0501 -#define GL_INVALID_OPERATION 0x0502 -#define GL_STACK_OVERFLOW 0x0503 -#define GL_STACK_UNDERFLOW 0x0504 -#define GL_OUT_OF_MEMORY 0x0505 - -#define GL_ALPHA 0x1906 -#define GL_LUMINANCE 0x1909 -#define GL_LUMINANCE_ALPHA 0x190A -#define GL_RGB 0x1907 -#define GL_RGBA 0x1908 - -#define GL_BYTE 0x1400 -#define GL_UNSIGNED_BYTE 0x1401 -#define GL_SHORT 0x1402 -#define GL_UNSIGNED_SHORT 0x1403 -#define GL_INT 0x1404 -#define GL_UNSIGNED_INT 0x1405 -#define GL_FLOAT 0x1406 +#define GL_SCISSOR_TEST 0x0C11 +#define GL_TEXTURE_2D 0x0DE1 +#define GL_DEPTH_TEST 0x0B71 +#define GL_CULL_FACE 0x0B44 +#define GL_BLEND 0x0BE2 + +#define GL_VENDOR 0x1F00 +#define GL_RENDERER 0x1F01 +#define GL_VERSION 0x1F02 +#define GL_EXTENSIONS 0x1F03 + +//#define GL_ATTRIB_STACK_DEPTH 0x0BB0 +//#define GL_CLIENT_ATTRIB_STACK_DEPTH 0x0BB1 +#define GL_COLOR_CLEAR_VALUE 0x0C22 +//#define GL_COLOR_WRITEMASK 0x0C23 +//#define GL_CURRENT_INDEX 0x0B01 +#define GL_CURRENT_COLOR 0x0B00 +#define GL_CURRENT_NORMAL 0x0B02 +//#define GL_CURRENT_RASTER_COLOR 0x0B04 +//#define GL_CURRENT_RASTER_DISTANCE 0x0B09 +//#define GL_CURRENT_RASTER_INDEX 0x0B05 +//#define GL_CURRENT_RASTER_POSITION 0x0B07 +//#define GL_CURRENT_RASTER_TEXTURE_COORDS 0x0B06 +//#define GL_CURRENT_RASTER_POSITION_VALID 0x0B08 +#define GL_CURRENT_TEXTURE_COORDS 0x0B03 +//#define GL_INDEX_CLEAR_VALUE 0x0C20 +//#define GL_INDEX_MODE 0x0C30 +//#define GL_INDEX_WRITEMASK 0x0C21 +#define GL_MODELVIEW_MATRIX 0x0BA6 +#define GL_MODELVIEW_STACK_DEPTH 0x0BA3 +//#define GL_NAME_STACK_DEPTH 0x0D70 +#define GL_PROJECTION_MATRIX 0x0BA7 +#define GL_PROJECTION_STACK_DEPTH 0x0BA4 +//#define GL_RENDER_MODE 0x0C40 +//#define GL_RGBA_MODE 0x0C31 +#define GL_TEXTURE_MATRIX 0x0BA8 +#define GL_TEXTURE_STACK_DEPTH 0x0BA5 +#define GL_VIEWPORT 0x0BA2 + +#define GL_COLOR_BUFFER_BIT 0x00004000 +#define GL_DEPTH_BUFFER_BIT 0x00000100 + +#define GL_MODELVIEW 0x1700 +#define GL_PROJECTION 0x1701 +#define GL_TEXTURE 0x1702 + +#define GL_VERTEX_ARRAY 0x8074 +#define GL_NORMAL_ARRAY 0x8075 +#define GL_COLOR_ARRAY 0x8076 +//#define GL_INDEX_ARRAY 0x8077 +#define GL_TEXTURE_COORD_ARRAY 0x8078 + +#define GL_POINTS 0x0000 +#define GL_LINES 0x0001 +//#define GL_LINE_LOOP 0x0002 +//#define GL_LINE_STRIP 0x0003 +#define GL_TRIANGLES 0x0004 +//#define GL_TRIANGLE_STRIP 0x0005 +//#define GL_TRIANGLE_FAN 0x0006 +#define GL_QUADS 0x0007 +//#define GL_QUAD_STRIP 0x0008 +//#define GL_POLYGON 0x0009 + +#define GL_POINT 0x1B00 +#define GL_LINE 0x1B01 +#define GL_FILL 0x1B02 + +//#define GL_CW 0x0900 +//#define GL_CCW 0x0901 + +#define GL_FRONT 0x0404 +#define GL_BACK 0x0405 + +#define GL_ZERO 0 +#define GL_ONE 1 +#define GL_SRC_COLOR 0x0300 +#define GL_ONE_MINUS_SRC_COLOR 0x0301 +#define GL_SRC_ALPHA 0x0302 +#define GL_ONE_MINUS_SRC_ALPHA 0x0303 +#define GL_DST_ALPHA 0x0304 +#define GL_ONE_MINUS_DST_ALPHA 0x0305 +#define GL_DST_COLOR 0x0306 +#define GL_ONE_MINUS_DST_COLOR 0x0307 +#define GL_SRC_ALPHA_SATURATE 0x0308 + +#define GL_NEAREST 0x2600 +#define GL_LINEAR 0x2601 + +#define GL_REPEAT 0x2901 +#define GL_CLAMP_TO_EDGE 0x812F //< (OpenGL 1.2) +#define GL_MIRRORED_REPEAT 0x8370 //< (OpenGL 2.0) + +#define GL_TEXTURE_MAG_FILTER 0x2800 +#define GL_TEXTURE_MIN_FILTER 0x2801 + +#define GL_TEXTURE_WRAP_S 0x2802 +#define GL_TEXTURE_WRAP_T 0x2803 + +#define GL_NO_ERROR 0 +#define GL_INVALID_ENUM 0x0500 +#define GL_INVALID_VALUE 0x0501 +#define GL_INVALID_OPERATION 0x0502 +#define GL_STACK_OVERFLOW 0x0503 +#define GL_STACK_UNDERFLOW 0x0504 +#define GL_OUT_OF_MEMORY 0x0505 + +#define GL_ALPHA 0x1906 +#define GL_LUMINANCE 0x1909 +#define GL_LUMINANCE_ALPHA 0x190A +#define GL_RGB 0x1907 +#define GL_RGBA 0x1908 + +#define GL_BYTE 0x1400 +#define GL_UNSIGNED_BYTE 0x1401 +#define GL_SHORT 0x1402 +#define GL_UNSIGNED_SHORT 0x1403 +#define GL_INT 0x1404 +#define GL_UNSIGNED_INT 0x1405 +#define GL_FLOAT 0x1406 /* === Not Implemented === */ @@ -185,6 +218,24 @@ typedef enum { SW_BLEND = GL_BLEND } SWstate; +typedef enum { + SW_VENDOR = GL_VENDOR, + SW_RENDERER = GL_RENDERER, + SW_VERSION = GL_VERSION, + SW_EXTENSIONS = GL_EXTENSIONS, + SW_COLOR_CLEAR_VALUE = GL_COLOR_CLEAR_VALUE, + SW_CURRENT_COLOR = GL_CURRENT_COLOR, + SW_CURRENT_NORMAL = GL_CURRENT_NORMAL, + SW_CURRENT_TEXTURE_COORDS = GL_CURRENT_TEXTURE_COORDS, + SW_MODELVIEW_MATRIX = GL_MODELVIEW_MATRIX, + SW_MODELVIEW_STACK_DEPTH = GL_MODELVIEW_STACK_DEPTH, + SW_PROJECTION_MATRIX = GL_PROJECTION_MATRIX, + SW_PROJECTION_STACK_DEPTH = GL_PROJECTION_STACK_DEPTH, + SW_TEXTURE_MATRIX = GL_TEXTURE_MATRIX, + SW_TEXTURE_STACK_DEPTH = GL_TEXTURE_STACK_DEPTH, + SW_VIEWPORT = GL_VIEWPORT +} SWget; + typedef enum { SW_COLOR_BUFFER_BIT = GL_COLOR_BUFFER_BIT, SW_DEPTH_BUFFER_BIT = GL_DEPTH_BUFFER_BIT @@ -291,6 +342,9 @@ bool swResizeFramebuffer(int w, int h); void swEnable(SWstate state); void swDisable(SWstate state); +void swGetFloatv(SWget name, float* v); +const char* glGetString(SWget name); + void swViewport(int x, int y, int width, int height); void swScissor(int x, int y, int width, int height); @@ -2824,6 +2878,94 @@ void swDisable(SWstate state) } } +void swGetIntegerv(SWget name, int* v) +{ + switch (name) { + case SW_MODELVIEW_STACK_DEPTH: + *v = SW_MODELVIEW_STACK_DEPTH; + break; + case SW_PROJECTION_STACK_DEPTH: + *v = SW_PROJECTION_STACK_DEPTH; + break; + case SW_TEXTURE_STACK_DEPTH: + *v = SW_TEXTURE_STACK_DEPTH; + break; + default: + RLSW.errCode = SW_INVALID_ENUM; + break; + } +} + +void swGetFloatv(SWget name, float* v) +{ + switch (name) { + case SW_COLOR_CLEAR_VALUE: + v[0] = RLSW.clearColor[0]; + v[1] = RLSW.clearColor[1]; + v[2] = RLSW.clearColor[2]; + v[3] = RLSW.clearColor[3]; + break; + case SW_CURRENT_COLOR: + v[0] = RLSW.vertexBuffer[RLSW.vertexCounter - 1].color[0]; + v[1] = RLSW.vertexBuffer[RLSW.vertexCounter - 1].color[1]; + v[2] = RLSW.vertexBuffer[RLSW.vertexCounter - 1].color[2]; + v[3] = RLSW.vertexBuffer[RLSW.vertexCounter - 1].color[3]; + break; + case SW_CURRENT_NORMAL: + v[0] = RLSW.vertexBuffer[RLSW.vertexCounter - 1].normal[0]; + v[1] = RLSW.vertexBuffer[RLSW.vertexCounter - 1].normal[1]; + v[2] = RLSW.vertexBuffer[RLSW.vertexCounter - 1].normal[2]; + break; + case SW_CURRENT_TEXTURE_COORDS: + v[0] = RLSW.vertexBuffer[RLSW.vertexCounter - 1].texcoord[0]; + v[1] = RLSW.vertexBuffer[RLSW.vertexCounter - 1].texcoord[1]; + break; + case SW_MODELVIEW_MATRIX: + for (int i = 0; i < 16; i++) { + v[i] = RLSW.stackModelview[RLSW.stackModelviewCounter - 1][i]; + } + break; + case SW_PROJECTION_MATRIX: + for (int i = 0; i < 16; i++) { + v[i] = RLSW.stackProjection[RLSW.stackProjectionCounter - 1][i]; + } + break; + case SW_TEXTURE_MATRIX: + for (int i = 0; i < 16; i++) { + v[i] = RLSW.stackTexture[RLSW.stackTextureCounter - 1][i]; + } + break; + default: + RLSW.errCode = SW_INVALID_ENUM; + break; + } +} + +const char* glGetString(SWget name) +{ + const char* result = NULL; + + switch (name) { + case SW_VENDOR: + result = "RLSW Header"; + break; + case SW_RENDERER: + result = "RLSW Software Renderer"; + break; + case SW_VERSION: + result = "RLSW 1.0"; + break; + case SW_EXTENSIONS: + result = "None"; + break; + default: + RLSW.errCode = SW_INVALID_ENUM; + break; + } + + return result; +} + void swViewport(int x, int y, int width, int height) { if (x <= -width || y <= -height) { From 618f283cb2fb5273c1ea8c9df345b342f5278b0c Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sat, 22 Mar 2025 17:59:58 +0100 Subject: [PATCH 33/43] gl binding --- src/external/rlsw.h | 186 ++++++++++++++++---------------------------- 1 file changed, 68 insertions(+), 118 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 60dd29e6b..38f00a0e7 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -202,10 +202,73 @@ #define GL_FLOAT 0x1406 +/* === OpenGL Binding === */ + +#define glEnable(state) swEnable(state) +#define glDisable(state) swDisable(state) +#define glGetFloatv(pname, params) swGetFloatv(pname, params) +#define glGetString(pname) swGetString(pname) +#define glViewport(x, y, w, h) swViewport(x, y, w, h) +#define glScissor(x, y, w, h) swScissor(x, y, w, h) +#define glClearColor(r, g, b, a) swClearColor(r, g, b, a) +#define glClear(bitmask) swClear(bitmask) +#define glBlendFunc(sfactor, dfactor) swBlendFunc(sfactor, dfactor) +#define glPolygonMode(mode) swPolygonMode(mode) +#define glCullFace(face) swCullFace(face) +#define glPointSize(size) swPointSize(size) +#define glLineWidth(width) swLineWidth(width) +#define glMatrixMode(mode) swMatrixMode(mode) +#define glPushMatrix() swPushMatrix() +#define glPopMatrix() swPopMatrix() +#define glLoadIdentity() swLoadIdentity() +#define glTranslatef(x, y, z) swTranslatef(x, y, z) +#define glRotatef(a, x, y, z) swRotatef(a, x, y, z) +#define glScalef(x, y, z) swScalef(x, y, z) +#define glMultMatrixf(v) swMultMatrixf(v) +#define glFrusutm(l, r, b, t, n, f) swFrustum(l, r, b, t, n, f) +#define glOrtho(l, r, b, t, n, f) swOrtho(l, r, b, t, n, f) +#define glBegin(mode) swBegin(mode) +#define glEnd() swEnd() +#define glVertex2i(x, y) swVertex2i(x, y) +#define glVertex2f(x, y) swVertex2f(x, y) +#define glVertex2fv(v) swVertex2fv(v) +#define glVertex3i(x, y, z) swVertex3i(x, y, z) +#define glVertex3f(x, y, z) swVertex3f(x, y, z) +#define glvertex3fv(v) swVertex3fv(v) +#define glVertex4i(x, y, z, w) swVertex4i(x, y, z, w) +#define glVertex4f(x, y, z, w) swVertex4f(x, y, z, w) +#define glVertex4fv(v) swVertex4fv(v) +#define glColor3ub(r, g, b) swColor3ub(r, g, b) +#define glColor3ubv(v) swColor3ubv(v) +#define glColor3f(r, g, b) swColor3f(r, g, b) +#define glColor3fv(v) swColor3fv(v) +#define glColor4ub(r, g, b, a) swColor4ub(r, g, b, a) +#define glColor4ubv(v) swColor4ubv(v) +#define glColor4f(r, g, b, a) swColor4f(r, g, b, a) +#define glColor4fv(v) swColor4fv(v) +#define glTexCoord2f(u, v) swTexCoord2f(u, v) +#define glTexCoord2fv(v) swTexCoord2fv(v) +#define glNormal3f(x, y, z) swNormal3f(x, y, z) +#define glNormal3fv(v) swNormal3fv(v) +#define glEnableClientState(t) ((void)(t)) +#define glDisableClientState(t) swBindArray(t, 0) +#define glVertexPointer(sz, t, s, p) swBindArray(SW_VERTEX_ARRAY, p) +#define glTexCoordPointer(sz, t, s, p) swBindArray(SW_TEXTURE_COORD_ARRAY, p) +#define glNormalPointer(sz, t, s, p) swBindArray(SW_NORMAL_ARRAY, p) +#define glColorPointer(sz, t, s, p) swBindArray(SW_COLOR_ARRAY, p) +#define glDrawArrays(m, o, c) swDrawArrays(m, o, c) +#define glGenTextures(c, v) swGenTextures(c, v) +#define glDeleteTextures(c, v) swDeleteTextures(c, v) +#define glTexImage2D(tr, l, if, w, h, b, f, t, p) swTexImage2D(w, h, f, t, p) +#define glTexParameteri(pname, param) swTexParameteri(param, value) +#define glBindTexture(tr, id) swBindTexture(id) + + /* === Not Implemented === */ -#define glDepthMask(x) ((void)(x)) -#define glColorMask(x) ((void)(x)) +#define glDepthMask(X) ((void)(X)) +#define glColorMask(X) ((void)(X)) +#define glPixelStorei(X, Y) ((void)(X);(void)(Y)) /* === RLSW Enums === */ @@ -382,26 +445,17 @@ void swVertex4i(int x, int y, int z, int w); void swVertex4f(float x, float y, float z, float w); void swVertex4fv(const float* v); -void swColor1ui(uint32_t color); void swColor3ub(uint8_t r, uint8_t g, uint8_t b); void swColor3ubv(const uint8_t* v); -void swColor3us(uint16_t r, uint16_t g, uint16_t b); -void swColor3usv(const uint16_t* v); -void swColor3ui(uint32_t r, uint32_t g, uint32_t b); -void swColor3uiv(const uint32_t* v); void swColor3f(float r, float g, float b); void swColor3fv(const float* v); void swColor4ub(uint8_t r, uint8_t g, uint8_t b, uint8_t a); void swColor4ubv(const uint8_t* v); -void swColor4us(uint16_t r, uint16_t g, uint16_t b, uint16_t a); -void swColor4usv(const uint16_t* v); -void swColor4ui(uint32_t r, uint32_t g, uint32_t b, uint32_t a); -void swColor4uiv(const uint32_t* v); void swColor4f(float r, float g, float b, float a); void swColor4fv(const float* v); void swTexCoord2f(float u, float v); -void swTexCoordfv(const float* v); +void swTexCoord2fv(const float* v); void swNormal3f(float x, float y, float z); void swNormal3fv(const float* v); @@ -3466,22 +3520,6 @@ void swVertex4fv(const float* v) } } -void swColor1ui(uint32_t color) -{ - union { - uint32_t v; - uint8_t a[4]; - } c = { .v = color }; - - float cv[4]; - cv[0] = (float)c.a[0] / 255; - cv[1] = (float)c.a[1] / 255; - cv[2] = (float)c.a[2] / 255; - cv[3] = (float)c.a[3] / 255; - - swColor4fv(cv); -} - void swColor3ub(uint8_t r, uint8_t g, uint8_t b) { float cv[4]; @@ -3504,50 +3542,6 @@ void swColor3ubv(const uint8_t* v) swColor4fv(cv); } -void swColor3us(uint16_t r, uint16_t g, uint16_t b) -{ - float cv[4]; - cv[0] = (float)((uint8_t)(r >> 8)) / 255; - cv[1] = (float)((uint8_t)(g >> 8)) / 255; - cv[2] = (float)((uint8_t)(b >> 8)) / 255; - cv[3] = 1.0f; - - swColor4fv(cv); -} - -void swColor3usv(const uint16_t* v) -{ - float cv[4]; - cv[0] = (float)((uint8_t)(v[0] >> 8)) / 255; - cv[1] = (float)((uint8_t)(v[1] >> 8)) / 255; - cv[2] = (float)((uint8_t)(v[2] >> 8)) / 255; - cv[3] = 1.0f; - - swColor4fv(cv); -} - -void swColor3ui(uint32_t r, uint32_t g, uint32_t b) -{ - float cv[4]; - cv[0] = (float)((uint8_t)(r >> 24)) / 255; - cv[1] = (float)((uint8_t)(g >> 24)) / 255; - cv[2] = (float)((uint8_t)(b >> 24)) / 255; - cv[3] = 1.0f; - - swColor4fv(cv); -} - -void swColor3uiv(const uint32_t* v) -{ - float cv[4]; - cv[0] = (float)((uint8_t)(v[0] >> 24)) / 255; - cv[1] = (float)((uint8_t)(v[1] >> 24)) / 255; - cv[2] = (float)((uint8_t)(v[2] >> 24)) / 255; - cv[3] = 1.0f; - - swColor4fv(cv); -} - void swColor3f(float r, float g, float b) { float cv[4]; @@ -3592,50 +3586,6 @@ void swColor4ubv(const uint8_t* v) swColor4fv(cv); } -void swColor4us(uint16_t r, uint16_t g, uint16_t b, uint16_t a) -{ - float cv[4]; - cv[0] = (float)((uint8_t)(r >> 8)) / 255; - cv[1] = (float)((uint8_t)(g >> 8)) / 255; - cv[2] = (float)((uint8_t)(b >> 8)) / 255; - cv[3] = (float)((uint8_t)(a >> 8)) / 255; - - swColor4fv(cv); -} - -void swColor4usv(const uint16_t* v) -{ - float cv[4]; - cv[0] = (float)((uint8_t)(v[0] >> 8)) / 255; - cv[1] = (float)((uint8_t)(v[1] >> 8)) / 255; - cv[2] = (float)((uint8_t)(v[2] >> 8)) / 255; - cv[3] = (float)((uint8_t)(v[3] >> 8)) / 255; - - swColor4fv(cv); -} - -void swColor4ui(uint32_t r, uint32_t g, uint32_t b, uint32_t a) -{ - float cv[4]; - cv[0] = (float)((uint8_t)(r >> 24)) / 255; - cv[1] = (float)((uint8_t)(g >> 24)) / 255; - cv[2] = (float)((uint8_t)(b >> 24)) / 255; - cv[3] = (float)((uint8_t)(a >> 24)) / 255; - - swColor4fv(cv); -} - -void swColor4uiv(const uint32_t* v) -{ - float cv[4]; - cv[0] = (float)((uint8_t)(v[0] >> 24)) / 255; - cv[1] = (float)((uint8_t)(v[1] >> 24)) / 255; - cv[2] = (float)((uint8_t)(v[2] >> 24)) / 255; - cv[3] = (float)((uint8_t)(v[3] >> 24)) / 255; - - swColor4fv(cv); -} - void swColor4f(float r, float g, float b, float a) { float cv[4]; @@ -3663,7 +3613,7 @@ void swTexCoord2f(float u, float v) RLSW.vertexBuffer[RLSW.vertexCounter].texcoord[1] = t; } -void swTexCoordfv(const float* v) +void swTexCoord2fv(const float* v) { float s = RLSW.matTexture[0]*v[0] + RLSW.matTexture[4]*v[1] + RLSW.matTexture[12]; float t = RLSW.matTexture[1]*v[0] + RLSW.matTexture[5]*v[1] + RLSW.matTexture[13]; @@ -3717,7 +3667,7 @@ void swDrawArrays(SWdraw mode, int offset, int count) for (int i = offset; i < count; i++) { if (RLSW.array.texcoords) { - swTexCoordfv(RLSW.array.texcoords + 2 * i); + swTexCoord2fv(RLSW.array.texcoords + 2 * i); } if (RLSW.array.normals) { swNormal3fv(RLSW.array.normals + 3 * i); From 13585d8684a2d2b133a61b837a1effe9bb907f04 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sat, 22 Mar 2025 18:30:27 +0100 Subject: [PATCH 34/43] add `glHint` and `glShadeModel` macros (not implmented) --- src/external/rlsw.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 38f00a0e7..c2dc4ee10 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -269,7 +269,8 @@ #define glDepthMask(X) ((void)(X)) #define glColorMask(X) ((void)(X)) #define glPixelStorei(X, Y) ((void)(X);(void)(Y)) - +#define glHint(X, Y) ((void)(X);(void)(Y)) +#define glShadeModel(X) ((void)(X)) /* === RLSW Enums === */ From 64c39e9d536d1df292b1c37500e2716ec160a309 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sat, 22 Mar 2025 19:19:15 +0100 Subject: [PATCH 35/43] binding tweaks --- src/external/rlsw.h | 70 +++++++++++++++++++++++++++++++++++++-------- 1 file changed, 58 insertions(+), 12 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index c2dc4ee10..0636d08f2 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -79,8 +79,30 @@ #endif +/* === OpenGL Compatibility Types === */ + +typedef unsigned int GLenum; +typedef unsigned char GLboolean; +typedef unsigned int GLbitfield; +typedef void GLvoid; +typedef signed char GLbyte; +typedef short GLshort; +typedef int GLint; +typedef unsigned char GLubyte; +typedef unsigned short GLushort; +typedef unsigned int GLuint; +typedef int GLsizei; +typedef float GLfloat; +typedef float GLclampf; +typedef double GLdouble; +typedef double GLclampd; + + /* === OpenGL Definitions === */ +#define GL_FALSE 0 +#define GL_TRUE 1 + #define GL_SCISSOR_TEST 0x0C11 #define GL_TEXTURE_2D 0x0DE1 #define GL_DEPTH_TEST 0x0B71 @@ -148,9 +170,6 @@ #define GL_LINE 0x1B01 #define GL_FILL 0x1B02 -//#define GL_CW 0x0900 -//#define GL_CCW 0x0901 - #define GL_FRONT 0x0404 #define GL_BACK 0x0405 @@ -202,6 +221,26 @@ #define GL_FLOAT 0x1406 +/* === Not Used Definitions === */ + +#define GL_PERSPECTIVE_CORRECTION_HINT 0x0C50 +#define GL_PACK_ALIGNMENT 0x0D05 +#define GL_UNPACK_ALIGNMENT 0x0CF5 +#define GL_LINE_SMOOTH 0x0B20 +#define GL_SMOOTH 0x1D01 +#define GL_NICEST 0x1102 +#define GL_CCW 0x0901 +#define GL_CW 0x0900 +#define GL_NEVER 0x0200 +#define GL_LESS 0x0201 +#define GL_EQUAL 0x0202 +#define GL_LEQUAL 0x0203 +#define GL_GREATER 0x0204 +#define GL_NOTEQUAL 0x0205 +#define GL_GEQUAL 0x0206 +#define GL_ALWAYS 0x0207 + + /* === OpenGL Binding === */ #define glEnable(state) swEnable(state) @@ -213,7 +252,7 @@ #define glClearColor(r, g, b, a) swClearColor(r, g, b, a) #define glClear(bitmask) swClear(bitmask) #define glBlendFunc(sfactor, dfactor) swBlendFunc(sfactor, dfactor) -#define glPolygonMode(mode) swPolygonMode(mode) +#define glPolygonMode(face, mode) swPolygonMode(mode) #define glCullFace(face) swCullFace(face) #define glPointSize(size) swPointSize(size) #define glLineWidth(width) swLineWidth(width) @@ -225,7 +264,7 @@ #define glRotatef(a, x, y, z) swRotatef(a, x, y, z) #define glScalef(x, y, z) swScalef(x, y, z) #define glMultMatrixf(v) swMultMatrixf(v) -#define glFrusutm(l, r, b, t, n, f) swFrustum(l, r, b, t, n, f) +#define glFrustum(l, r, b, t, n, f) swFrustum(l, r, b, t, n, f) #define glOrtho(l, r, b, t, n, f) swOrtho(l, r, b, t, n, f) #define glBegin(mode) swBegin(mode) #define glEnd() swEnd() @@ -254,23 +293,30 @@ #define glDisableClientState(t) swBindArray(t, 0) #define glVertexPointer(sz, t, s, p) swBindArray(SW_VERTEX_ARRAY, p) #define glTexCoordPointer(sz, t, s, p) swBindArray(SW_TEXTURE_COORD_ARRAY, p) -#define glNormalPointer(sz, t, s, p) swBindArray(SW_NORMAL_ARRAY, p) +#define glNormalPointer(t, s, p) swBindArray(SW_NORMAL_ARRAY, p) #define glColorPointer(sz, t, s, p) swBindArray(SW_COLOR_ARRAY, p) #define glDrawArrays(m, o, c) swDrawArrays(m, o, c) #define glGenTextures(c, v) swGenTextures(c, v) #define glDeleteTextures(c, v) swDeleteTextures(c, v) #define glTexImage2D(tr, l, if, w, h, b, f, t, p) swTexImage2D(w, h, f, t, p) -#define glTexParameteri(pname, param) swTexParameteri(param, value) +#define glTexParameteri(tr, pname, param) swTexParameteri(pname, param) #define glBindTexture(tr, id) swBindTexture(id) /* === Not Implemented === */ -#define glDepthMask(X) ((void)(X)) -#define glColorMask(X) ((void)(X)) -#define glPixelStorei(X, Y) ((void)(X);(void)(Y)) -#define glHint(X, Y) ((void)(X);(void)(Y)) -#define glShadeModel(X) ((void)(X)) +#define glClearDepth(X) ((void)(X)) +#define glDepthMask(X) ((void)(X)) +#define glColorMask(X,Y,Z,W) ((void)(X),(void)(Y),(void)(Z),(void)(W)) +#define glPixelStorei(X,Y) ((void)(X),(void)(Y)) +#define glHint(X,Y) ((void)(X),(void)(Y)) +#define glShadeModel(X) ((void)(X)) +#define glFrontFace(X) ((void)(X)) +#define glDepthFunc(X) ((void)(X)) +#define glTexSubImage2D(X,Y,Z,W,A,B,C,D,E) ((void)(X),(void)(Y),(void)(Z),(void)(W),(void)(A),(void)(B),(void)(C),(void)(D),(void)(E)) +#define glGetTexImage(X,Y,Z,W,A) ((void)(X),(void)(Y),(void)(Z),(void)(W),(void)(A)) +#define glDrawElements(X,Y,Z,W) ((void)(X),(void)(Y),(void)(Z),(void)(W)) + /* === RLSW Enums === */ From 02f7d3146b0cfa87d6d089cf8b5d9179e6316b11 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sat, 22 Mar 2025 19:50:27 +0100 Subject: [PATCH 36/43] impl copy framebuffer function + glReadPixels --- src/external/rlsw.h | 292 ++++++++++++++++++++++++++++++++++++++------ 1 file changed, 258 insertions(+), 34 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 0636d08f2..70762152d 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -243,6 +243,7 @@ typedef double GLclampd; /* === OpenGL Binding === */ +#define glReadPixels(x, y, w, h, f, t, p) swCopyFramebuffer(x, y, w, h, f, t, p) #define glEnable(state) swEnable(state) #define glDisable(state) swDisable(state) #define glGetFloatv(pname, params) swGetFloatv(pname, params) @@ -446,8 +447,8 @@ typedef enum { bool swInit(int w, int h); void swClose(void); -void* swGetColorBuffer(int* w, int* h); bool swResizeFramebuffer(int w, int h); +void swCopyFramebuffer(int x, int y, int w, int h, SWformat format, SWtype type, void* pixels); void swEnable(SWstate state); void swDisable(SWstate state); @@ -1101,6 +1102,62 @@ static inline void sw_framebuffer_fill(void* colorPtr, void* depthPtr, int size, } +/* === Half Floating Point === */ + +static inline uint32_t sw_cvt_hf_ui(uint16_t h) +{ + uint32_t s = (uint32_t)(h & 0x8000) << 16; + int32_t em = h & 0x7fff; + + // bias exponent and pad mantissa with 0; 112 is relative exponent bias (127-15) + int32_t r = (em + (112 << 10)) << 13; + + // denormal: flush to zero + r = (em < (1 << 10)) ? 0 : r; + + // infinity/NaN; note that we preserve NaN payload as a byproduct of unifying inf/nan cases + // 112 is an exponent bias fixup; since we already applied it once, applying it twice converts 31 to 255 + r += (em >= (31 << 10)) ? (112 << 23) : 0; + + return s | r; +} + +static inline float sw_cvt_hf(sw_half_t y) +{ + union { float f; uint32_t i; } v = { + .i = sw_cvt_hf_ui(y) + }; + return v.f; +} + +static inline uint16_t sw_cvt_fh_ui(uint32_t ui) +{ + int32_t s = (ui >> 16) & 0x8000; + int32_t em = ui & 0x7fffffff; + + // bias exponent and round to nearest; 112 is relative exponent bias (127-15) + int32_t h = (em - (112 << 23) + (1 << 12)) >> 13; + + // underflow: flush to zero; 113 encodes exponent -14 + h = (em < (113 << 23)) ? 0 : h; + + // overflow: infinity; 143 encodes exponent 16 + h = (em >= (143 << 23)) ? 0x7c00 : h; + + // NaN; note that we convert all types of NaN to qNaN + h = (em > (255 << 23)) ? 0x7e00 : h; + + return (uint16_t)(s | h); +} + +static inline sw_half_t sw_cvt_fh(float i) +{ + union { float f; uint32_t i; } v; + v.f = i; + return sw_cvt_fh_ui(v.i); +} + + /* === Pixel Format Part === */ static inline int sw_get_pixel_format(SWformat format, SWtype type) @@ -1150,32 +1207,6 @@ static inline int sw_get_pixel_format(SWformat format, SWtype type) return -1; // Unsupported format } -static inline uint32_t sw_cvt_hf_ui(uint16_t h) -{ - uint32_t s = (uint32_t)(h & 0x8000) << 16; - int32_t em = h & 0x7fff; - - // bias exponent and pad mantissa with 0; 112 is relative exponent bias (127-15) - int32_t r = (em + (112 << 10)) << 13; - - // denormal: flush to zero - r = (em < (1 << 10)) ? 0 : r; - - // infinity/NaN; note that we preserve NaN payload as a byproduct of unifying inf/nan cases - // 112 is an exponent bias fixup; since we already applied it once, applying it twice converts 31 to 255 - r += (em >= (31 << 10)) ? (112 << 23) : 0; - - return s | r; -} - -static inline float sw_cvt_hf(sw_half_t y) -{ - union { float f; uint32_t i; } v = { - .i = sw_cvt_hf_ui(y) - }; - return v.f; -} - static inline void sw_get_pixel_grayscale(float* color, const void* pixels, uint32_t offset) { float gray = (float)((uint8_t*)pixels)[offset] * (1.0f / 255); @@ -1376,6 +1407,193 @@ static inline void sw_get_pixel(float* color, const void* pixels, uint32_t offse } } +static inline void sw_set_pixel_grayscale(void* pixels, uint32_t offset, const float* color) +{ + ((uint8_t*)pixels)[offset] = (uint8_t)(color[0] * 255.0f); +} + +static inline void sw_set_pixel_red_16(void* pixels, uint32_t offset, const float* color) +{ + ((sw_half_t*)pixels)[offset] = sw_cvt_fh(color[0]); +} + +static inline void sw_set_pixel_red_32(void* pixels, uint32_t offset, const float* color) +{ + ((float*)pixels)[offset] = color[0]; +} + +static inline void sw_set_pixel_grayscale_alpha(void* pixels, uint32_t offset, const float* color) +{ + uint8_t* pixelData = (uint8_t*)pixels + 2 * offset; + + pixelData[0] = (uint8_t)(color[0] * 255.0f); // Valeur de gris + pixelData[1] = (uint8_t)(color[3] * 255.0f); // Alpha +} + +static inline void sw_set_pixel_rgb_565(void* pixels, uint32_t offset, const float* color) +{ + uint16_t* pixel = (uint16_t*)pixels + offset; + + uint16_t r = (uint16_t)(color[0] * 31) & 0x1F; + uint16_t g = (uint16_t)(color[1] * 63) & 0x3F; + uint16_t b = (uint16_t)(color[2] * 31) & 0x1F; + + *pixel = (r << 11) | (g << 5) | b; +} + +static inline void sw_set_pixel_rgb_888(void* pixels, uint32_t offset, const float* color) +{ + uint8_t* pixel = (uint8_t*)pixels + 3 * offset; + + pixel[0] = (uint8_t)(color[0] * 255.0f); + pixel[1] = (uint8_t)(color[1] * 255.0f); + pixel[2] = (uint8_t)(color[2] * 255.0f); +} + +static inline void sw_set_pixel_rgb_161616(void* pixels, uint32_t offset, const float* color) +{ + sw_half_t* pixel = (sw_half_t*)pixels + 3 * offset; + + pixel[0] = sw_cvt_fh(color[0]); + pixel[1] = sw_cvt_fh(color[1]); + pixel[2] = sw_cvt_fh(color[2]); +} + +static inline void sw_set_pixel_rgb_323232(void* pixels, uint32_t offset, const float* color) +{ + float* pixel = (float*)pixels + 3 * offset; + + pixel[0] = color[0]; + pixel[1] = color[1]; + pixel[2] = color[2]; +} + +static inline void sw_set_pixel_rgba_5551(void* pixels, uint32_t offset, const float* color) +{ + uint16_t* pixel = (uint16_t*)pixels + offset; + + uint16_t r = (uint16_t)(color[0] * 31) & 0x1F; + uint16_t g = (uint16_t)(color[1] * 31) & 0x1F; + uint16_t b = (uint16_t)(color[2] * 31) & 0x1F; + uint16_t a = (color[3] > 0.5f) ? 1 : 0; // Alpha 1 bit + + *pixel = (r << 11) | (g << 6) | (b << 1) | a; +} + +static inline void sw_set_pixel_rgba_4444(void* pixels, uint32_t offset, const float* color) +{ + uint16_t* pixel = (uint16_t*)pixels + offset; + + uint16_t r = (uint16_t)(color[0] * 15) & 0x0F; + uint16_t g = (uint16_t)(color[1] * 15) & 0x0F; + uint16_t b = (uint16_t)(color[2] * 15) & 0x0F; + uint16_t a = (uint16_t)(color[3] * 15) & 0x0F; + + *pixel = (r << 12) | (g << 8) | (b << 4) | a; +} + +static inline void sw_set_pixel_rgba_8888(void* pixels, uint32_t offset, const float* color) +{ + uint8_t* pixel = (uint8_t*)pixels + 4 * offset; + + pixel[0] = (uint8_t)(color[0] * 255.0f); + pixel[1] = (uint8_t)(color[1] * 255.0f); + pixel[2] = (uint8_t)(color[2] * 255.0f); + pixel[3] = (uint8_t)(color[3] * 255.0f); +} + +static inline void sw_set_pixel_rgba_16161616(void* pixels, uint32_t offset, const float* color) +{ + sw_half_t* pixel = (sw_half_t*)pixels + 4 * offset; + + pixel[0] = sw_cvt_fh(color[0]); + pixel[1] = sw_cvt_fh(color[1]); + pixel[2] = sw_cvt_fh(color[2]); + pixel[3] = sw_cvt_fh(color[3]); +} + +static inline void sw_set_pixel_rgba_32323232(void* pixels, uint32_t offset, const float* color) +{ + float* pixel = (float*)pixels + 4 * offset; + + pixel[0] = color[0]; + pixel[1] = color[1]; + pixel[2] = color[2]; + pixel[3] = color[3]; +} + +static inline void sw_set_pixel(void* pixels, uint32_t offset, sw_pixelformat_e format, const float* color) +{ + switch (format) { + + case SW_PIXELFORMAT_UNCOMPRESSED_GRAYSCALE: + sw_set_pixel_grayscale(pixels, offset, color); + break; + + case SW_PIXELFORMAT_UNCOMPRESSED_GRAY_ALPHA: + sw_set_pixel_grayscale_alpha(pixels, offset, color); + break; + + case SW_PIXELFORMAT_UNCOMPRESSED_R5G6B5: + sw_set_pixel_rgb_565(pixels, offset, color); + break; + + case SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8: + sw_set_pixel_rgb_888(pixels, offset, color); + break; + + case SW_PIXELFORMAT_UNCOMPRESSED_R5G5B5A1: + sw_set_pixel_rgba_5551(pixels, offset, color); + break; + + case SW_PIXELFORMAT_UNCOMPRESSED_R4G4B4A4: + sw_set_pixel_rgba_4444(pixels, offset, color); + break; + + case SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8A8: + sw_set_pixel_rgba_8888(pixels, offset, color); + break; + + case SW_PIXELFORMAT_UNCOMPRESSED_R32: + sw_set_pixel_red_32(pixels, offset, color); + break; + + case SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32: + sw_set_pixel_rgb_323232(pixels, offset, color); + break; + + case SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32A32: + sw_set_pixel_rgba_32323232(pixels, offset, color); + break; + + case SW_PIXELFORMAT_UNCOMPRESSED_R16: + sw_set_pixel_red_16(pixels, offset, color); + break; + + case SW_PIXELFORMAT_UNCOMPRESSED_R16G16B16: + sw_set_pixel_rgb_161616(pixels, offset, color); + break; + + case SW_PIXELFORMAT_UNCOMPRESSED_R16G16B16A16: + sw_set_pixel_rgba_16161616(pixels, offset, color); + break; + + case SW_PIXELFORMAT_COMPRESSED_DXT1_RGB: + case SW_PIXELFORMAT_COMPRESSED_DXT1_RGBA: + case SW_PIXELFORMAT_COMPRESSED_DXT3_RGBA: + case SW_PIXELFORMAT_COMPRESSED_DXT5_RGBA: + case SW_PIXELFORMAT_COMPRESSED_ETC1_RGB: + case SW_PIXELFORMAT_COMPRESSED_ETC2_RGB: + case SW_PIXELFORMAT_COMPRESSED_ETC2_EAC_RGBA: + case SW_PIXELFORMAT_COMPRESSED_PVRT_RGB: + case SW_PIXELFORMAT_COMPRESSED_PVRT_RGBA: + case SW_PIXELFORMAT_COMPRESSED_ASTC_4x4_RGBA: + case SW_PIXELFORMAT_COMPRESSED_ASTC_8x8_RGBA: + break; + + } +} + /* === Texture Sampling Part === */ @@ -2918,17 +3136,23 @@ void swClose(void) RLSW = (sw_context_t) { 0 }; } -void* swGetColorBuffer(int* w, int* h) +bool swResizeFramebuffer(int w, int h) { - if (w) *w = RLSW.framebuffer.width; - if (h) *h = RLSW.framebuffer.height; - - return RLSW.framebuffer.color; + return sw_framebuffer_resize(w, h); } -bool swResizeFramebuffer(int w, int h) +void swCopyFramebuffer(int x, int y, int w, int h, SWformat format, SWtype type, void* pixels) { - return sw_framebuffer_resize(w, h); + sw_pixelformat_e pFormat = sw_get_pixel_format(format, type); + + for (int y = 0; y < h; y++) { + for (int x = 0; x < w; x++) { + float color[4]; + int offset = y * RLSW.framebuffer.width + x; + sw_framebuffer_read_color(color, sw_framebuffer_get_color_addr(RLSW.framebuffer.color, offset)); + sw_set_pixel(pixels, offset, pFormat, color); + } + } } void swEnable(SWstate state) From c7fa9a0fa2a4920d30464f23f1fc604ff38426e7 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sat, 22 Mar 2025 20:00:32 +0100 Subject: [PATCH 37/43] review `swCopyFramebuffer` --- src/external/rlsw.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 70762152d..4d77bdc68 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -3145,12 +3145,16 @@ void swCopyFramebuffer(int x, int y, int w, int h, SWformat format, SWtype type, { sw_pixelformat_e pFormat = sw_get_pixel_format(format, type); + void* src = RLSW.framebuffer.color; + + int srcW = RLSW.framebuffer.width; + int srcHm1 = RLSW.framebuffer.height - 1; + for (int y = 0; y < h; y++) { for (int x = 0; x < w; x++) { float color[4]; - int offset = y * RLSW.framebuffer.width + x; - sw_framebuffer_read_color(color, sw_framebuffer_get_color_addr(RLSW.framebuffer.color, offset)); - sw_set_pixel(pixels, offset, pFormat, color); + sw_framebuffer_read_color(color, sw_framebuffer_get_color_addr(src, (srcHm1 - y) * srcW + x)); + sw_set_pixel(pixels, y * srcW + x, pFormat, color); } } } From c9338481a489d54c9e602a9801bf36e99b317618 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sat, 22 Mar 2025 22:44:59 +0100 Subject: [PATCH 38/43] update rlgl.h --- src/rlgl.h | 68 +++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 52 insertions(+), 16 deletions(-) diff --git a/src/rlgl.h b/src/rlgl.h index d55ae82f2..a035481cc 100644 --- a/src/rlgl.h +++ b/src/rlgl.h @@ -159,7 +159,7 @@ #endif // Security check in case multiple GRAPHICS_API_OPENGL_* defined -#if defined(GRAPHICS_API_OPENGL_11) +#if defined(GRAPHICS_API_OPENGL_11) || defined(GRAPHICS_API_OPENGL_11_SOFTWARE) #if defined(GRAPHICS_API_OPENGL_21) #undef GRAPHICS_API_OPENGL_21 #endif @@ -174,6 +174,11 @@ #endif #endif +// Software implementation uses OpenGL 1.1 functionality +#if defined(GRAPHICS_API_OPENGL_11_SOFTWARE) + #define GRAPHICS_API_OPENGL_11 +#endif + // OpenGL 2.1 uses most of OpenGL 3.3 Core functionality // WARNING: Specific parts are checked with #if defines #if defined(GRAPHICS_API_OPENGL_21) @@ -767,6 +772,10 @@ RLAPI unsigned int rlLoadFramebuffer(void); // Loa RLAPI void rlFramebufferAttach(unsigned int fboId, unsigned int texId, int attachType, int texType, int mipLevel); // Attach texture/renderbuffer to a framebuffer RLAPI bool rlFramebufferComplete(unsigned int id); // Verify framebuffer is complete RLAPI void rlUnloadFramebuffer(unsigned int id); // Delete framebuffer from GPU +#if defined(GRAPHICS_API_OPENGL_11_SOFTWARE) +RLAPI void* rlGetFramebuffer(int* width, int* height); +RLAPI void rlResizeFramebuffer(int width, int height); +#endif // Shaders management RLAPI unsigned int rlLoadShaderCode(const char *vsCode, const char *fsCode); // Load shader from code strings @@ -833,24 +842,29 @@ RLAPI void rlLoadDrawQuad(void); // Load and draw a quad #endif #if defined(GRAPHICS_API_OPENGL_11) - #if defined(__APPLE__) - #include // OpenGL 1.1 library for OSX - #include // OpenGL extensions library + #if defined(GRAPHICS_API_OPENGL_11_SOFTWARE) + #define RLSW_IMPL + #include // OpenGL 1.1 software implementation #else - // APIENTRY for OpenGL function pointer declarations is required - #if !defined(APIENTRY) - #if defined(_WIN32) - #define APIENTRY __stdcall - #else - #define APIENTRY + #if defined(__APPLE__) + #include // OpenGL 1.1 library for OSX + #include // OpenGL extensions library + #else + // APIENTRY for OpenGL function pointer declarations is required + #if !defined(APIENTRY) + #if defined(_WIN32) + #define APIENTRY __stdcall + #else + #define APIENTRY + #endif + #endif + // WINGDIAPI definition. Some Windows OpenGL headers need it + #if !defined(WINGDIAPI) && defined(_WIN32) + #define WINGDIAPI __declspec(dllimport) #endif - #endif - // WINGDIAPI definition. Some Windows OpenGL headers need it - #if !defined(WINGDIAPI) && defined(_WIN32) - #define WINGDIAPI __declspec(dllimport) - #endif - #include // OpenGL 1.1 library + #include // OpenGL 1.1 library + #endif #endif #endif @@ -2310,6 +2324,10 @@ void rlglInit(int width, int height) glShadeModel(GL_SMOOTH); // Smooth shading between vertex (vertex colors interpolation) #endif +#if defined(GRAPHICS_API_OPENGL_11_SOFTWARE) + swInit(width, height); +#endif + #if defined(GRAPHICS_API_OPENGL_33) || defined(GRAPHICS_API_OPENGL_ES2) // Store screen size into global variables RLGL.State.framebufferWidth = width; @@ -2336,6 +2354,10 @@ void rlglClose(void) glDeleteTextures(1, &RLGL.State.defaultTextureId); // Unload default texture TRACELOG(RL_LOG_INFO, "TEXTURE: [ID %i] Default texture unloaded successfully", RLGL.State.defaultTextureId); #endif + +#if defined(GRAPHICS_API_OPENGL_11_SOFTWARE) + swClose(); +#endif } // Load OpenGL extensions @@ -3673,6 +3695,20 @@ void *rlReadTexturePixels(unsigned int id, int width, int height, int format) return pixels; } +#if defined(GRAPHICS_API_OPENGL_11_SOFTWARE) + +void* rlGetFramebuffer(int* width, int* height) +{ + return swGetColorBuffer(&width, &height); +} + +void rlResizeFramebuffer(int width, int height) +{ + swResizeFramebuffer(width, height); +} + +#endif + // Read screen pixel data (color buffer) unsigned char *rlReadScreenPixels(int width, int height) { From 89d40510c6b130dcab2ce1dc926515f5fe0b6259 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Sat, 22 Mar 2025 22:49:24 +0100 Subject: [PATCH 39/43] update rlgl.h --- src/rlgl.h | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/rlgl.h b/src/rlgl.h index a035481cc..3a7acec5e 100644 --- a/src/rlgl.h +++ b/src/rlgl.h @@ -773,7 +773,7 @@ RLAPI void rlFramebufferAttach(unsigned int fboId, unsigned int texId, int attac RLAPI bool rlFramebufferComplete(unsigned int id); // Verify framebuffer is complete RLAPI void rlUnloadFramebuffer(unsigned int id); // Delete framebuffer from GPU #if defined(GRAPHICS_API_OPENGL_11_SOFTWARE) -RLAPI void* rlGetFramebuffer(int* width, int* height); +RLAPI void rlCopyFramebuffer(int x, int y, int w, int h, int format, void* pixels); RLAPI void rlResizeFramebuffer(int width, int height); #endif @@ -3696,17 +3696,16 @@ void *rlReadTexturePixels(unsigned int id, int width, int height, int format) } #if defined(GRAPHICS_API_OPENGL_11_SOFTWARE) - -void* rlGetFramebuffer(int* width, int* height) +void rlCopyFramebuffer(int x, int y, int w, int h, int format, void* pixels) { - return swGetColorBuffer(&width, &height); + unsigned int glInternalFormat, glFormat, glType; + rlGetGlTextureFormats(format, &glInternalFormat, &glFormat, &glType); + swCopyFramebuffer(x, y, w, h, glFormat, glType, pixels); } - void rlResizeFramebuffer(int width, int height) { swResizeFramebuffer(width, height); } - #endif // Read screen pixel data (color buffer) @@ -5306,4 +5305,4 @@ static Matrix rlMatrixInvert(Matrix mat) return result; } -#endif // RLGL_IMPLEMENTATION +#endif // RLGL_IMPLEMENTATION \ No newline at end of file From be24b0635684a6536799ab0b3bd9e4917ce0af3c Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Mon, 24 Mar 2025 14:33:12 +0100 Subject: [PATCH 40/43] texture copy support --- src/external/rlsw.h | 97 ++++++++++++++++++++++++++++++++++++++------- 1 file changed, 83 insertions(+), 14 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 4d77bdc68..dca834349 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -46,6 +46,10 @@ # define SW_FREE(ptr) free(ptr) #endif +#ifndef SW_GL_BINDING_COPY_TEXTURE +# define SW_GL_BINDING_COPY_TEXTURE true +#endif + #ifndef SW_COLOR_BUFFER_BITS # define SW_COLOR_BUFFER_BITS 24 #endif @@ -299,7 +303,7 @@ typedef double GLclampd; #define glDrawArrays(m, o, c) swDrawArrays(m, o, c) #define glGenTextures(c, v) swGenTextures(c, v) #define glDeleteTextures(c, v) swDeleteTextures(c, v) -#define glTexImage2D(tr, l, if, w, h, b, f, t, p) swTexImage2D(w, h, f, t, p) +#define glTexImage2D(tr, l, if, w, h, b, f, t, p) swTexImage2D(w, h, f, t, SW_GL_BINDING_COPY_TEXTURE, p) #define glTexParameteri(tr, pname, param) swTexParameteri(pname, param) #define glBindTexture(tr, id) swBindTexture(id) @@ -514,7 +518,7 @@ void swDrawArrays(SWdraw mode, int offset, int count); void swGenTextures(int count, uint32_t* textures); void swDeleteTextures(int count, uint32_t* textures); -void swTexImage2D(int width, int height, SWformat format, SWtype type, const void* data); +void swTexImage2D(int width, int height, SWformat format, SWtype type, bool copy, const void* data); void swTexParameteri(int param, int value); void swBindTexture(uint32_t id); @@ -590,7 +594,14 @@ typedef struct { typedef struct { - const void* pixels; + // Dirty hack for copied data + // TODO: Rework copied image handling + + union { + const void* cptr; //< NOTE: Is used for all data reads + void* ptr; //< WARNING: Should only be used to allocate and free data + } pixels; + int width; int height; int format; @@ -604,6 +615,8 @@ typedef struct { float tx; float ty; + bool copy; + } sw_texture_t; typedef struct { @@ -1207,6 +1220,43 @@ static inline int sw_get_pixel_format(SWformat format, SWtype type) return -1; // Unsupported format } +int sw_get_pixel_bpp(sw_pixelformat_e format) +{ + int bpp = 0; + + switch (format) + { + case SW_PIXELFORMAT_UNCOMPRESSED_GRAYSCALE: bpp = 8; break; + case SW_PIXELFORMAT_UNCOMPRESSED_GRAY_ALPHA: + case SW_PIXELFORMAT_UNCOMPRESSED_R5G6B5: + case SW_PIXELFORMAT_UNCOMPRESSED_R5G5B5A1: + case SW_PIXELFORMAT_UNCOMPRESSED_R4G4B4A4: bpp = 16; break; + case SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8A8: bpp = 32; break; + case SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8: bpp = 24; break; + case SW_PIXELFORMAT_UNCOMPRESSED_R32: bpp = 32; break; + case SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32: bpp = 32*3; break; + case SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32A32: bpp = 32*4; break; + case SW_PIXELFORMAT_UNCOMPRESSED_R16: bpp = 16; break; + case SW_PIXELFORMAT_UNCOMPRESSED_R16G16B16: bpp = 16*3; break; + case SW_PIXELFORMAT_UNCOMPRESSED_R16G16B16A16: bpp = 16*4; break; + case SW_PIXELFORMAT_COMPRESSED_DXT1_RGB: + case SW_PIXELFORMAT_COMPRESSED_DXT1_RGBA: + case SW_PIXELFORMAT_COMPRESSED_ETC1_RGB: + case SW_PIXELFORMAT_COMPRESSED_ETC2_RGB: + case SW_PIXELFORMAT_COMPRESSED_PVRT_RGB: + case SW_PIXELFORMAT_COMPRESSED_PVRT_RGBA: bpp = 4; break; + case SW_PIXELFORMAT_COMPRESSED_DXT3_RGBA: + case SW_PIXELFORMAT_COMPRESSED_DXT5_RGBA: + case SW_PIXELFORMAT_COMPRESSED_ETC2_EAC_RGBA: + case SW_PIXELFORMAT_COMPRESSED_ASTC_4x4_RGBA: bpp = 8; break; + case SW_PIXELFORMAT_COMPRESSED_ASTC_8x8_RGBA: bpp = 2; break; + default: break; + } + + return bpp; +} + + static inline void sw_get_pixel_grayscale(float* color, const void* pixels, uint32_t offset) { float gray = (float)((uint8_t*)pixels)[offset] * (1.0f / 255); @@ -1640,7 +1690,7 @@ static inline void sw_texture_sample_nearest(float* color, const sw_texture_t* t int x, y; sw_texture_map(&x, u, tex->width, tex->sWrap); sw_texture_map(&y, v, tex->height, tex->tWrap); - sw_get_pixel(color, tex->pixels, y * tex->width + x, tex->format); + sw_get_pixel(color, tex->pixels.cptr, y * tex->width + x, tex->format); } static inline void sw_texture_sample_linear(float* color, const sw_texture_t* tex, float u, float v) @@ -1655,10 +1705,10 @@ static inline void sw_texture_sample_linear(float* color, const sw_texture_t* te float fy = v * (tex->height - 1) - y0; float c00[4], c10[4], c01[4], c11[4]; - sw_get_pixel(c00, tex->pixels, y0 * tex->width + x0, tex->format); - sw_get_pixel(c10, tex->pixels, y0 * tex->width + x1, tex->format); - sw_get_pixel(c01, tex->pixels, y1 * tex->width + x0, tex->format); - sw_get_pixel(c11, tex->pixels, y1 * tex->width + x1, tex->format); + sw_get_pixel(c00, tex->pixels.cptr, y0 * tex->width + x0, tex->format); + sw_get_pixel(c10, tex->pixels.cptr, y0 * tex->width + x1, tex->format); + sw_get_pixel(c01, tex->pixels.cptr, y1 * tex->width + x0, tex->format); + sw_get_pixel(c11, tex->pixels.cptr, y1 * tex->width + x1, tex->format); float c0[4], c1[4]; for (int i = 0; i < 4; i++) { @@ -2937,7 +2987,7 @@ static inline bool sw_is_texture_valid(uint32_t id) if (id == 0) valid = false; else if (id >= SW_MAX_TEXTURES) valid = false; - else if (RLSW.loadedTextures[id].pixels == 0) valid = false; + else if (RLSW.loadedTextures[id].pixels.cptr == 0) valid = false; return true; } @@ -3099,7 +3149,7 @@ bool swInit(int w, int h) 1.0f, 1.0f, 1.0f, }; - RLSW.loadedTextures[0].pixels = defTex; + RLSW.loadedTextures[0].pixels.cptr = defTex; RLSW.loadedTextures[0].width = 2; RLSW.loadedTextures[0].height = 2; RLSW.loadedTextures[0].format = SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32; @@ -3990,12 +4040,15 @@ void swDeleteTextures(int count, uint32_t* textures) RLSW.errCode = SW_INVALID_VALUE; continue; } - RLSW.loadedTextures[textures[i]].pixels = 0; + if (RLSW.loadedTextures[textures[i]].copy) { + SW_FREE(RLSW.loadedTextures[textures[i]].pixels.ptr); + } + RLSW.loadedTextures[textures[i]].pixels.cptr = NULL; RLSW.freeTextureIds[RLSW.freeTextureIdCount++] = textures[i]; } } -void swTexImage2D(int width, int height, SWformat format, SWtype type, const void* data) +void swTexImage2D(int width, int height, SWformat format, SWtype type, bool copy, const void* data) { uint32_t id = RLSW.currentTexture; @@ -4013,12 +4066,28 @@ void swTexImage2D(int width, int height, SWformat format, SWtype type, const voi sw_texture_t* texture = &RLSW.loadedTextures[id]; - texture->pixels = data; + if (copy) { + int bpp = sw_get_pixel_bpp(pixelFormat); + int size = bpp * width * height; + texture->pixels.ptr = SW_MALLOC(size); + if (texture->pixels.ptr == NULL) { + RLSW.errCode = SW_STACK_OVERFLOW; //< Out of memory... + return; + } + for (int i = 0; i < size; i++) { + ((uint8_t*)texture->pixels.ptr)[i] = ((uint8_t*)data)[i]; + } + } + else { + texture->pixels.cptr = data; + } + texture->width = width; texture->height = height; texture->format = pixelFormat; texture->tx = 1.0f / width; texture->ty = 1.0f / height; + texture->copy = copy; } void swTexParameteri(int param, int value) @@ -4080,7 +4149,7 @@ void swBindTexture(uint32_t id) return; } - if (id > 0 && RLSW.loadedTextures[id].pixels == 0) { + if (id > 0 && RLSW.loadedTextures[id].pixels.cptr == NULL) { RLSW.errCode = SW_INVALID_OPERATION; return; } From f902cc3f29c2db45b79c9f6f808967e18828e3d0 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Mon, 24 Mar 2025 14:34:21 +0100 Subject: [PATCH 41/43] fix typo.. --- src/external/rlsw.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index dca834349..2627ddc3d 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -458,7 +458,7 @@ void swEnable(SWstate state); void swDisable(SWstate state); void swGetFloatv(SWget name, float* v); -const char* glGetString(SWget name); +const char* swGetString(SWget name); void swViewport(int x, int y, int width, int height); void swScissor(int x, int y, int width, int height); @@ -3320,7 +3320,7 @@ void swGetFloatv(SWget name, float* v) } } -const char* glGetString(SWget name) +const char* swGetString(SWget name) { const char* result = NULL; From f3de37eff3c5086192a332edce4091038ec06f06 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Mon, 24 Mar 2025 14:35:51 +0100 Subject: [PATCH 42/43] add get error function --- src/external/rlsw.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/external/rlsw.h b/src/external/rlsw.h index 2627ddc3d..ed2eed4c9 100644 --- a/src/external/rlsw.h +++ b/src/external/rlsw.h @@ -252,6 +252,7 @@ typedef double GLclampd; #define glDisable(state) swDisable(state) #define glGetFloatv(pname, params) swGetFloatv(pname, params) #define glGetString(pname) swGetString(pname) +#define glGetError() swGetError() #define glViewport(x, y, w, h) swViewport(x, y, w, h) #define glScissor(x, y, w, h) swScissor(x, y, w, h) #define glClearColor(r, g, b, a) swClearColor(r, g, b, a) @@ -459,6 +460,7 @@ void swDisable(SWstate state); void swGetFloatv(SWget name, float* v); const char* swGetString(SWget name); +SWerrcode swGetError(void); void swViewport(int x, int y, int width, int height); void swScissor(int x, int y, int width, int height); @@ -3345,6 +3347,13 @@ const char* swGetString(SWget name) return result; } +SWerrcode swGetError(void) +{ + SWerrcode ret = RLSW.errCode; + RLSW.errCode = SW_NO_ERROR; + return ret; +} + void swViewport(int x, int y, int width, int height) { if (x <= -width || y <= -height) { From a420e5ab822d0fa02d8fa20f97009220980869a5 Mon Sep 17 00:00:00 2001 From: Bigfoot71 Date: Mon, 24 Mar 2025 14:38:19 +0100 Subject: [PATCH 43/43] def sw alloc macros --- src/rlgl.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/rlgl.h b/src/rlgl.h index 3a7acec5e..3daff2348 100644 --- a/src/rlgl.h +++ b/src/rlgl.h @@ -844,6 +844,9 @@ RLAPI void rlLoadDrawQuad(void); // Load and draw a quad #if defined(GRAPHICS_API_OPENGL_11) #if defined(GRAPHICS_API_OPENGL_11_SOFTWARE) #define RLSW_IMPL + #define SW_MALLOC(sz) RL_MALLOC(sz) + #define SW_REALLOC(ptr, newSz) RL_REALLOC(ptr, newSz) + #define SW_FREE(ptr) RL_FREE(ptr) #include // OpenGL 1.1 software implementation #else #if defined(__APPLE__)