Significant performance improvement of alpha_composite function.

This commit is contained in:
homm 2013-03-23 05:44:36 +04:00
parent 5aeb7b584b
commit c39a51bcf0

View File

@ -12,81 +12,74 @@
#include "Imaging.h"
typedef struct
{
UINT8 r;
UINT8 g;
UINT8 b;
UINT8 a;
} rgba8;
Imaging
ImagingAlphaComposite(Imaging imDst, Imaging imSrc)
{
Imaging imOut;
int x, y;
float dstR, dstG, dstB, dstA;
float srcR, srcG, srcB, srcA;
float outR, outG, outB, outA;
/* Check arguments */
if (!imDst || !imSrc ||
strcmp(imDst->mode, "RGBA") ||
imDst->type != IMAGING_TYPE_UINT8 ||
imDst->bands != 4)
return ImagingError_ModeError();
strcmp(imDst->mode, "RGBA") ||
imDst->type != IMAGING_TYPE_UINT8 ||
imDst->bands != 4)
return ImagingError_ModeError();
if (strcmp(imDst->mode, imSrc->mode) ||
imDst->type != imSrc->type ||
imDst->bands != imSrc->bands ||
imDst->xsize != imSrc->xsize ||
imDst->ysize != imSrc->ysize)
return ImagingError_Mismatch();
imDst->type != imSrc->type ||
imDst->bands != imSrc->bands ||
imDst->xsize != imSrc->xsize ||
imDst->ysize != imSrc->ysize)
return ImagingError_Mismatch();
imOut = ImagingNew(imDst->mode, imDst->xsize, imDst->ysize);
if (!imOut)
return NULL;
return NULL;
ImagingCopyInfo(imOut, imDst);
for (y = 0; y < imDst->ysize; y++) {
UINT8* dst = (UINT8*) imDst->image[y];
UINT8* src = (UINT8*) imSrc->image[y];
UINT8* out = (UINT8*) imOut->image[y];
rgba8* pdst = (rgba8*) imDst->image[y];
rgba8* psrc = (rgba8*) imSrc->image[y];
rgba8* pout = (rgba8*) imOut->image[y];
for (x = 0; x < imDst->linesize; x += 4) {
for (x = 0; x < imDst->xsize; x ++) {
rgba8 src = psrc[x];
dstR = dst[x + 0] / 255.0;
dstG = dst[x + 1] / 255.0;
dstB = dst[x + 2] / 255.0;
dstA = dst[x + 3] / 255.0;
if (src.a == 0) {
// Copy 4 bytes at once.
pout[x] = pdst[x];
} else {
rgba8 dst = pdst[x];
rgba8* out = &pout[x];
srcR = src[x + 0] / 255.0;
srcG = src[x + 1] / 255.0;
srcB = src[x + 2] / 255.0;
srcA = src[x + 3] / 255.0;
// Integer implementation with increased precision.
// Each variable has extra meaningful bits.
// Divisions are rounded.
if (dstA == 1.0) {
outR = srcR * srcA + dstR * (1.0 - srcA);
outG = srcG * srcA + dstG * (1.0 - srcA);
outB = srcB * srcA + dstB * (1.0 - srcA);
outA = 1.0;
} else if (srcA == 0.0) {
outR = dstR;
outG = dstG;
outB = dstB;
outA = dstA;
} else {
outA = srcA + dstA * (1.0 - srcA);
if (outA == 0.0) {
outR = 0.0;
outG = 0.0;
outB = 0.0;
} else {
outR = (srcR * srcA + dstR * dstA * (1.0 - srcA)) / outA;
outG = (srcG * srcA + dstG * dstA * (1.0 - srcA)) / outA;
outB = (srcB * srcA + dstB * dstA * (1.0 - srcA)) / outA;
}
}
UINT16 blend = dst.a * (255 - src.a); // 16 bit max
UINT16 outa = (src.a << 4) + ((blend + 0x8) >> 4); // 12
UINT16 coef1 = (src.a << 16) / outa; // 12
UINT16 coef2 = (blend << 8) / outa; // 12
out[x + 0] = (UINT8) (255.0 * outR + 0.5);
out[x + 1] = (UINT8) (255.0 * outG + 0.5);
out[x + 2] = (UINT8) (255.0 * outB + 0.5);
out[x + 3] = (UINT8) (255.0 * outA + 0.5);
out->r = (src.r * coef1 + dst.r * coef2 + 0x800) >> 12;
out->g = (src.g * coef1 + dst.g * coef2 + 0x800) >> 12;
out->b = (src.b * coef1 + dst.b * coef2 + 0x800) >> 12;
out->a = (outa + 0x8) >> 4;
}
}
}
}