mirror of
				https://github.com/python-pillow/Pillow.git
				synced 2025-10-25 05:01:26 +03:00 
			
		
		
		
	Merge pull request #156 from homm/master
Performance improvement of alpha_composite function
This commit is contained in:
		
						commit
						4554bcb4e4
					
				|  | @ -12,81 +12,82 @@ | |||
| #include "Imaging.h" | ||||
| 
 | ||||
| 
 | ||||
| typedef struct | ||||
| { | ||||
|     UINT8 r; | ||||
|     UINT8 g; | ||||
|     UINT8 b; | ||||
|     UINT8 a; | ||||
| } rgba8; | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| Imaging | ||||
| ImagingAlphaComposite(Imaging imDst, Imaging imSrc) | ||||
| { | ||||
|     Imaging imOut; | ||||
|     int x, y; | ||||
|     float dstR, dstG, dstB, dstA; | ||||
|     float srcR, srcG, srcB, srcA; | ||||
|     float outR, outG, outB, outA; | ||||
| 
 | ||||
|     /* Check arguments */ | ||||
|     if (!imDst || !imSrc || | ||||
| 	strcmp(imDst->mode, "RGBA") || | ||||
| 	imDst->type != IMAGING_TYPE_UINT8 || | ||||
| 	imDst->bands != 4) | ||||
| 	return ImagingError_ModeError(); | ||||
|         strcmp(imDst->mode, "RGBA") || | ||||
|         imDst->type != IMAGING_TYPE_UINT8 || | ||||
|         imDst->bands != 4) | ||||
|         return ImagingError_ModeError(); | ||||
| 
 | ||||
|     if (strcmp(imDst->mode, imSrc->mode) || | ||||
| 	imDst->type  != imSrc->type  || | ||||
| 	imDst->bands != imSrc->bands || | ||||
| 	imDst->xsize != imSrc->xsize || | ||||
| 	imDst->ysize != imSrc->ysize) | ||||
| 	return ImagingError_Mismatch(); | ||||
|         imDst->type  != imSrc->type  || | ||||
|         imDst->bands != imSrc->bands || | ||||
|         imDst->xsize != imSrc->xsize || | ||||
|         imDst->ysize != imSrc->ysize) | ||||
|         return ImagingError_Mismatch(); | ||||
| 
 | ||||
|     imOut = ImagingNew(imDst->mode, imDst->xsize, imDst->ysize); | ||||
|     if (!imOut) | ||||
| 	return NULL; | ||||
|         return NULL; | ||||
| 
 | ||||
|     ImagingCopyInfo(imOut, imDst); | ||||
| 
 | ||||
|     for (y = 0; y < imDst->ysize; y++) { | ||||
| 
 | ||||
| 	UINT8* dst = (UINT8*) imDst->image[y]; | ||||
| 	UINT8* src = (UINT8*) imSrc->image[y]; | ||||
| 	UINT8* out = (UINT8*) imOut->image[y]; | ||||
|         rgba8* dst = (rgba8*) imDst->image[y]; | ||||
|         rgba8* src = (rgba8*) imSrc->image[y]; | ||||
|         rgba8* out = (rgba8*) imOut->image[y]; | ||||
| 
 | ||||
| 	for (x = 0; x < imDst->linesize; x += 4) { | ||||
|         for (x = 0; x < imDst->xsize; x ++) { | ||||
| 
 | ||||
| 	    dstR = dst[x + 0] / 255.0; | ||||
| 	    dstG = dst[x + 1] / 255.0; | ||||
| 	    dstB = dst[x + 2] / 255.0; | ||||
| 	    dstA = dst[x + 3] / 255.0; | ||||
|             if (src->a == 0) { | ||||
|                 // Copy 4 bytes at once.
 | ||||
|                 *out = *dst; | ||||
|             } else { | ||||
|                 // Integer implementation with increased precision.
 | ||||
|                 // Each variable has extra meaningful bits.
 | ||||
|                 // Divisions are rounded.
 | ||||
| 
 | ||||
| 	    srcR = src[x + 0] / 255.0; | ||||
| 	    srcG = src[x + 1] / 255.0; | ||||
| 	    srcB = src[x + 2] / 255.0; | ||||
| 	    srcA = src[x + 3] / 255.0; | ||||
|                 // This code uses trick from Paste.c:
 | ||||
|                 // (a + (2 << (n-1)) - 1) / ((2 << n)-1)
 | ||||
|                 // almost equivalent to:
 | ||||
|                 // tmp = a + (2 << (n-1)), ((tmp >> n) + tmp) >> n
 | ||||
| 
 | ||||
| 	    if (dstA == 1.0) { | ||||
| 		outR = srcR * srcA + dstR * (1.0 - srcA); | ||||
| 		outG = srcG * srcA + dstG * (1.0 - srcA); | ||||
| 		outB = srcB * srcA + dstB * (1.0 - srcA); | ||||
| 		outA = 1.0; | ||||
| 	    } else if (srcA == 0.0) { | ||||
| 		outR = dstR; | ||||
| 		outG = dstG; | ||||
| 		outB = dstB; | ||||
| 		outA = dstA; | ||||
| 	    } else { | ||||
| 		outA = srcA + dstA * (1.0 - srcA); | ||||
| 		if (outA == 0.0) { | ||||
| 		    outR = 0.0; | ||||
| 		    outG = 0.0; | ||||
| 		    outB = 0.0; | ||||
| 		} else { | ||||
| 		    outR = (srcR * srcA + dstR * dstA * (1.0 - srcA)) / outA; | ||||
| 		    outG = (srcG * srcA + dstG * dstA * (1.0 - srcA)) / outA; | ||||
| 		    outB = (srcB * srcA + dstB * dstA * (1.0 - srcA)) / outA; | ||||
| 		} | ||||
| 	    } | ||||
|                 // 0xff * 0xff = 16 meaningful bits.
 | ||||
|                 UINT16 blend = dst->a * (255 - src->a); | ||||
|                 // Shift 4 bits up, to don't loose blend precision
 | ||||
|                 // on very transparent pixels.
 | ||||
|                 UINT16 outa = (src->a << 4) + (((blend << 4) + (blend >> 4) + 0x80) >> 8); | ||||
|                 UINT16 coef1 = (((src->a << 8) - src->a) << 8) / outa;  // 12
 | ||||
|                 UINT16 coef2 = (blend << 8) / outa;  // 12
 | ||||
| 
 | ||||
| 	    out[x + 0] = (UINT8) (255.0 * outR + 0.5); | ||||
| 	    out[x + 1] = (UINT8) (255.0 * outG + 0.5); | ||||
| 	    out[x + 2] = (UINT8) (255.0 * outB + 0.5); | ||||
| 	    out[x + 3] = (UINT8) (255.0 * outA + 0.5); | ||||
|                 UINT32 tmpr = src->r * coef1 + dst->r * coef2 + 0x800; | ||||
|                 out->r = ((tmpr >> 8) + tmpr) >> 12; | ||||
|                 UINT32 tmpg = src->g * coef1 + dst->g * coef2 + 0x800; | ||||
|                 out->g = ((tmpg >> 8) + tmpg) >> 12; | ||||
|                 UINT32 tmpb = src->b * coef1 + dst->b * coef2 + 0x800; | ||||
|                 out->b = ((tmpb >> 8) + tmpb) >> 12; | ||||
|                 out->a = (outa + 0x7) >> 4; | ||||
|             } | ||||
| 
 | ||||
| 	} | ||||
|             dst++; src++; out++; | ||||
|         } | ||||
| 
 | ||||
|     } | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	Block a user