From 589d606000a61aaf9d7f14089a83a2bb30c3e714 Mon Sep 17 00:00:00 2001 From: raysan5 Date: Mon, 15 Jun 2020 12:06:41 +0200 Subject: [PATCH] REDESIGNED: ImageDraw(), optimized #1218 After multiple tests and lot of redesign, current implementation is the fastest one. It also considers several fast-paths for maximum speed! --- src/textures.c | 278 ++++++++++--------------------------------------- 1 file changed, 56 insertions(+), 222 deletions(-) diff --git a/src/textures.c b/src/textures.c index 033c95784..ea2f33f11 100644 --- a/src/textures.c +++ b/src/textures.c @@ -2524,146 +2524,26 @@ void ImageDraw(Image *dst, Image src, Rectangle srcRec, Rectangle dstRec, Color if (dst->format >= COMPRESSED_DXT1_RGB) TRACELOG(LOG_WARNING, "Image drawing not supported for compressed formats"); else { - // Despite all my efforts for optimization, original implementation is faster... - // I left here other implementations for future reference -#define IMAGEDRAW_METHOD02 -#if defined(IMAGEDRAW_METHOD01) - // Security checks to avoid size and rectangle issues (out of bounds) - // Check that srcRec is inside src image - if (srcRec.x < 0) srcRec.x = 0; - if (srcRec.y < 0) srcRec.y = 0; - - if ((srcRec.x + srcRec.width) > src.width)srcRec.width = src.width - srcRec.x; - if ((srcRec.y + srcRec.height) > src.height) srcRec.height = src.height - srcRec.y; - - Image srcMod = ImageCopy(src); // Make a copy of source image to work with it - - // Crop source image to desired source rectangle (if required) - if ((src.width != (int)srcRec.width) && (src.height != (int)srcRec.height)) - { - ImageCrop(&srcMod, srcRec); - } - - // Scale source image in case destination rec size is different than source rec size - if (((int)dstRec.width != (int)srcRec.width) || ((int)dstRec.height != (int)srcRec.height)) - { - ImageResize(&srcMod, (int)dstRec.width, (int)dstRec.height); - } - - // Check that dstRec is inside dst image - // Allow negative position within destination with cropping - if (dstRec.x < 0) - { - ImageCrop(&srcMod, (Rectangle) { -dstRec.x, 0, dstRec.width + dstRec.x, dstRec.height }); - dstRec.width = dstRec.width + dstRec.x; - dstRec.x = 0; - } - - if ((dstRec.x + dstRec.width) > dst->width) - { - ImageCrop(&srcMod, (Rectangle) { 0, 0, dst->width - dstRec.x, dstRec.height }); - dstRec.width = dst->width - dstRec.x; - } - - if (dstRec.y < 0) - { - ImageCrop(&srcMod, (Rectangle) { 0, -dstRec.y, dstRec.width, dstRec.height + dstRec.y }); - dstRec.height = dstRec.height + dstRec.y; - dstRec.y = 0; - } - - if ((dstRec.y + dstRec.height) > dst->height) - { - ImageCrop(&srcMod, (Rectangle) { 0, 0, dstRec.width, dst->height - dstRec.y }); - dstRec.height = dst->height - dstRec.y; - } - - // Get image data as Color pixels array to work with it - Color *dstPixels = GetImageData(*dst); - Color *srcPixels = GetImageData(srcMod); - - UnloadImage(srcMod); // Source copy not required any more - - Vector4 fsrc, fdst, fout; // Normalized pixel data (ready for operation) - Vector4 ftint = ColorNormalize(tint); // Normalized color tint - - // Blit pixels, copy source image into destination - for (int j = (int)dstRec.y; j < (int)(dstRec.y + dstRec.height); j++) - { - for (int i = (int)dstRec.x; i < (int)(dstRec.x + dstRec.width); i++) - { - // Alpha blending (https://en.wikipedia.org/wiki/Alpha_compositing) - - fdst = ColorNormalize(dstPixels[j*(int)dst->width + i]); - fsrc = ColorNormalize(srcPixels[(j - (int)dstRec.y)*(int)dstRec.width + (i - (int)dstRec.x)]); - - // Apply color tint to source image - fsrc.x *= ftint.x; fsrc.y *= ftint.y; fsrc.z *= ftint.z; fsrc.w *= ftint.w; - - fout.w = fsrc.w + fdst.w*(1.0f - fsrc.w); - - if (fout.w <= 0.0f) - { - fout.x = 0.0f; - fout.y = 0.0f; - fout.z = 0.0f; - } - else - { - fout.x = (fsrc.x*fsrc.w + fdst.x*fdst.w*(1 - fsrc.w))/fout.w; - fout.y = (fsrc.y*fsrc.w + fdst.y*fdst.w*(1 - fsrc.w))/fout.w; - fout.z = (fsrc.z*fsrc.w + fdst.z*fdst.w*(1 - fsrc.w))/fout.w; - } - - dstPixels[j*(int)dst->width + i] = (Color){ (unsigned char)(fout.x*255.0f), - (unsigned char)(fout.y*255.0f), - (unsigned char)(fout.z*255.0f), - (unsigned char)(fout.w*255.0f) }; - - // TODO: Support other blending options - } - } - - Image final = { - .data = dstPixels, - .width = dst->width, - .height = dst->height, - .format = UNCOMPRESSED_R8G8B8A8, - .mipmaps = 1 - }; - - // NOTE: dstPixels are free() inside ImageFormat() - ImageFormat(&final, dst->format); - - UnloadImage(*dst); - *dst = final; - - RL_FREE(srcPixels); -#endif -#if defined(IMAGEDRAW_METHOD02) - Image srcMod = ImageCopy(src); // Make a copy of source image to work with it - ImageFormat(&srcMod, UNCOMPRESSED_R8G8B8A8); // Convert to R8G8B8A8 to help on blending + Image srcMod = { 0 }; // Source copy (in case it was required) + Image *srcPtr = &src; // Pointer to source image + bool useSrcMod = false; // Track source copy required // Source rectangle out-of-bounds security checks if (srcRec.x < 0) { srcRec.width -= srcRec.x; srcRec.x = 0; } if (srcRec.y < 0) { srcRec.height -= srcRec.y; srcRec.y = 0; } if ((srcRec.x + srcRec.width) > src.width) srcRec.width = src.width - srcRec.x; if ((srcRec.y + srcRec.height) > src.height) srcRec.height = src.height - srcRec.y; - + // Check if source rectangle needs to be resized to destination rectangle // In that case, we make a copy of source and we apply all required transform - if ((srcRec.width != fabs(dstRec.width - dstRec.x)) || (srcRec.height != fabs(dstRec.height - dstRec.y))) + if (((int)srcRec.width != (int)dstRec.width) || ((int)srcRec.height != (int)dstRec.height)) { - ImageCrop(&srcMod, srcRec); // Crop to source rectangle + srcMod = ImageFromImage(src, srcRec); // Create image from another image ImageResize(&srcMod, (int)dstRec.width, (int)dstRec.height); // Resize to destination rectangle srcRec = (Rectangle){ 0, 0, srcMod.width, srcMod.height }; - } - - // Check if destination format is different than source format and no source copy created yet - if (dst->format != src.format) - { - ImageCrop(&srcMod, srcRec); // Crop to source rectangle - srcRec = (Rectangle){ 0, 0, srcMod.width, srcMod.height }; + + srcPtr = &srcMod; + useSrcMod = true; } // Destination rectangle out-of-bounds security checks @@ -2673,7 +2553,7 @@ void ImageDraw(Image *dst, Image src, Rectangle srcRec, Rectangle dstRec, Color srcRec.width += dstRec.x; dstRec.x = 0; } - else if ((dstRec.x + srcMod.width) > dst->width) srcRec.width = dst->width - dstRec.x; + else if ((dstRec.x + srcPtr->width) > dst->width) srcRec.width = dst->width - dstRec.x; if (dstRec.y < 0) { @@ -2681,113 +2561,67 @@ void ImageDraw(Image *dst, Image src, Rectangle srcRec, Rectangle dstRec, Color srcRec.height += dstRec.y; dstRec.y = 0; } - else if ((dstRec.y + srcMod.height) > dst->height) srcRec.height = dst->height - dstRec.y; + else if ((dstRec.y + srcPtr->height) > dst->height) srcRec.height = dst->height - dstRec.y; if (dst->width < srcRec.width) srcRec.width = dst->width; if (dst->height < srcRec.height) srcRec.height = dst->height; + + // This blitting method is quite fast! The process followed is: + // for every pixel -> [get_src_format/get_dst_format -> blend -> format_to_dst] + // Some optimization ideas: + // [x] Avoid creating source copy if not required (no resize required) + // [x] Optimize ImageResize() for pixel format (alternative: ImageResizeNN()) + // [x] Optimize ColorAlphaBlend() to avoid processing (alpha = 0) and (alpha = 1) + // [x] Optimize ColorAlphaBlend() for faster operations (maybe avoiding divs?) + // [x] Consider fast path: no alpha blending required cases (src has no alpha) + // [x] Consider fast path: same src/dst format with no alpha -> direct line copy + // [-] GetPixelColor(): Return Vector4 instead of Color, easier for ColorAlphaBlend() + + Color colSrc, colDst, blend; + bool blendRequired = true; - #define IMAGEDRAW_NO_IMAGEFORMAT - #if defined(IMAGEDRAW_NO_BLENDING) - // This method is very fast but no pixels blending is considered - int dataSize = GetPixelDataSize(dst->width, dst->height, dst->format); - int bytesPerPixel = dataSize/(dst->width*dst->height); - - // Image blitting src -> destination, line by line - for (int y = 0; y < (int)srcRec.height; y++) - { - memcpy((unsigned char *)dst->data + ((int)dstRec.y*dst->width + (int)dstRec.x + y*dst->width)*bytesPerPixel, - (unsigned char *)srcMod.data + ((y + (int)srcRec.y)*srcMod.width + (int)srcRec.x)*bytesPerPixel, - (int)srcRec.width*bytesPerPixel); - } - #endif - #if defined(IMAGEDRAW_IMAGEFORMAT) - // This method is slower than expected, it seems ImageFormat() to RGBA and back to original format, - // combined with alpha blending makes it quite slow - - // Convert destination to R8G8B8A8 for blending calculation - int dstFormat = dst->format; - ImageFormat(dst, UNCOMPRESSED_R8G8B8A8); // Force 4 bytes per pixel with alpha for blending - - Vector4 fsrc, fdst, fout; // Normalized pixel data (ready for operation) - Vector4 ftint = ColorNormalize(tint); // Normalized color tint - unsigned char srcAlpha = 0; + // Fast path: Avoid blend if source has no alpha to blend + if ((tint.a == 255) && ((srcPtr->format == UNCOMPRESSED_GRAYSCALE) || (srcPtr->format == UNCOMPRESSED_R8G8B8) || (srcPtr->format == UNCOMPRESSED_R5G6B5))) blendRequired = false; - for (int y = 0; y < (int)srcRec.height; y++) - { - for (int x = 0; x < (int)srcRec.width; x++) - { - srcAlpha = ((Color *)srcMod.data)[((y + (int)srcRec.y)*srcMod.width + (int)srcRec.x) + x].a; - - if (srcAlpha == 255) - { - ((Color *)dst->data)[((int)dstRec.y*dst->width + (int)dstRec.x) + y*(dst->width) + x] = ((Color *)srcMod.data)[((y + (int)srcRec.y)*srcMod.width + (int)srcRec.x) + x]; - } - else if (srcAlpha > 0) - { - // Alpha blending (https://en.wikipedia.org/wiki/Alpha_compositing) + int strideDst = GetPixelDataSize(dst->width, 1, dst->format); + int bytesPerPixelDst = strideDst/(dst->width); - fdst = ColorNormalize(((Color *)dst->data)[((int)dstRec.y*dst->width + (int)dstRec.x) + y*(dst->width) + x]); - fsrc = ColorNormalize(((Color *)srcMod.data)[((y + (int)srcRec.y)*srcMod.width + (int)srcRec.x) + x]); + int strideSrc = GetPixelDataSize(srcPtr->width, 1, srcPtr->format); + int bytesPerPixelSrc = strideSrc/(srcPtr->width); - // Apply color tint to source image - fsrc.x *= ftint.x; fsrc.y *= ftint.y; fsrc.z *= ftint.z; fsrc.w *= ftint.w; + unsigned char *pSrcBase = (unsigned char *)srcPtr->data + ((int)srcRec.y*srcPtr->width + (int)srcRec.x)*bytesPerPixelSrc; + unsigned char *pDstBase = (unsigned char *)dst->data + ((int)dstRec.y*dst->width + (int)dstRec.x)*bytesPerPixelDst; - fout.w = fsrc.w + fdst.w*(1.0f - fsrc.w); + for (int y = 0; y < (int)srcRec.height; y++) + { + unsigned char *pSrc = pSrcBase; + unsigned char *pDst = pDstBase; - if (fout.w <= 0.0f) - { - fout.x = 0.0f; - fout.y = 0.0f; - fout.z = 0.0f; - } - else - { - fout.x = (fsrc.x*fsrc.w + fdst.x*fdst.w*(1 - fsrc.w))/fout.w; - fout.y = (fsrc.y*fsrc.w + fdst.y*fdst.w*(1 - fsrc.w))/fout.w; - fout.z = (fsrc.z*fsrc.w + fdst.z*fdst.w*(1 - fsrc.w))/fout.w; - } + // Fast path: Avoid moving pixel by pixel if no blend required and same format + if (!blendRequired && (srcPtr->format == dst->format)) memcpy(pDst, pSrc, srcRec.width*bytesPerPixelSrc); + else + { + for (int x = 0; x < (int)srcRec.width; x++) + { + colSrc = GetPixelColor(pSrc, srcPtr->format); + colDst = GetPixelColor(pDst, dst->format); + + // Fast path: Avoid blend if source has no alpha to blend + if (blendRequired) blend = ColorAlphaBlend(colDst, colSrc, tint); + else blend = colSrc; - ((Color *)dst->data)[((int)dstRec.y*dst->width + (int)dstRec.x) + y*(dst->width) + x] = (Color){ (unsigned char)(fout.x*255.0f), (unsigned char)(fout.y*255.0f), (unsigned char)(fout.z*255.0f), (unsigned char)(fout.w*255.0f) }; + SetPixelColor(pDst, blend, dst->format); - // TODO: Support other blending options + pDst += bytesPerPixelDst; + pSrc += bytesPerPixelSrc; } } - } - - ImageFormat(dst, dstFormat); // Restore original image format after drawing with blending - UnloadImage(srcMod); // Unload source modified image - #endif - #if defined(IMAGEDRAW_NO_IMAGEFORMAT) - // This new method is quite fast, it seems it gets the best results! - // It [formats_src -> blend -> format_dst] per pixel! and it can be further optimized! - // Some ideas: - // - Optimize ColorAlphaBlend() to avoid processing (alpha = 0) and (alpha = 1) - // - Optimize ColorAlphaBlend() for faster operations (maybe avoiding divs?) - // - GetPixelColor(): Return Vector4 instead of Color, easier for ColorAlphaBlend() - // - Consider special src/dst format cases when there is no alpha -> [formats_src -> format_dst] - Color psrc, pdst, blend; - - int dataSizeDst = GetPixelDataSize(dst->width, dst->height, dst->format); - int bytesPerPixelDst = dataSizeDst/(dst->width*dst->height); - - int dataSizeSrc = GetPixelDataSize(srcMod.width, srcMod.height, srcMod.format); - int bytesPerPixelSrc = dataSizeSrc/(srcMod.width*srcMod.height); - - for (int y = 0; y < (int)srcRec.height; y++) - { - for (int x = 0; x < (int)srcRec.width; x++) - { - psrc = GetPixelColor((unsigned char *)srcMod.data + (((y + (int)srcRec.y)*srcMod.width + (int)srcRec.x) + x)*bytesPerPixelSrc, srcMod.format); - pdst = GetPixelColor((unsigned char *)dst->data + (((int)dstRec.y*dst->width + (int)dstRec.x) + y*(dst->width) + x)*bytesPerPixelDst, dst->format); - blend = ColorAlphaBlend(pdst, psrc, tint); - - ColorWrite((unsigned char *)dst->data + (((int)dstRec.y*dst->width + (int)dstRec.x) + y*(dst->width) + x)*bytesPerPixelDst, blend, dst->format); - } + + pSrcBase += strideSrc; + pDstBase += strideDst; } - UnloadImage(srcMod); // Unload source modified image - #endif -#endif + if (useSrcMod) UnloadImage(srcMod); // Unload source modified image } }