src/video/SDL_blit_A.c
changeset 7677 871d43c6968a
parent 7641 0cd36d20df2b
child 7790 8136ce6b3950
equal deleted inserted replaced
7676:013483acb374 7677:871d43c6968a
   353 		if (alpha == 0) {
   353 		if (alpha == 0) {
   354 			/* do nothing */
   354 			/* do nothing */
   355 		} else if (alpha == amask) {
   355 		} else if (alpha == amask) {
   356 			*dstp = *srcp;
   356 			*dstp = *srcp;
   357 		} else {
   357 		} else {
   358 			src1 = _mm_cvtsi32_si64(*srcp); /* src(ARGB) -> src1 (0000ARGB)*/
   358 			src1 = _mm_cvtsi32_si64(*srcp); /* src(ARGB) -> src1 (0000ARGB) */
   359 			src1 = _mm_unpacklo_pi8(src1, mm_zero); /* 0A0R0G0B -> src1 */
   359 			src1 = _mm_unpacklo_pi8(src1, mm_zero); /* 0A0R0G0B -> src1 */
   360 
   360 
   361 			dst1 = _mm_cvtsi32_si64(*dstp); /* dst(ARGB) -> dst1 (0000ARGB)*/
   361 			dst1 = _mm_cvtsi32_si64(*dstp); /* dst(ARGB) -> dst1 (0000ARGB) */
   362 			dst1 = _mm_unpacklo_pi8(dst1, mm_zero); /* 0A0R0G0B -> dst1 */
   362 			dst1 = _mm_unpacklo_pi8(dst1, mm_zero); /* 0A0R0G0B -> dst1 */
   363 
   363 
   364 			mm_alpha = _mm_cvtsi32_si64(alpha); /* alpha -> mm_alpha (0000000A) */
   364 			mm_alpha = _mm_cvtsi32_si64(alpha); /* alpha -> mm_alpha (0000000A) */
   365 			mm_alpha = _mm_srli_si64(mm_alpha, ashift); /* mm_alpha >> ashift -> mm_alpha(0000000A) */
   365 			mm_alpha = _mm_srli_si64(mm_alpha, ashift); /* mm_alpha >> ashift -> mm_alpha(0000000A) */
   366 			mm_alpha = _mm_unpacklo_pi16(mm_alpha, mm_alpha); /* 00000A0A -> mm_alpha */
   366 			mm_alpha = _mm_unpacklo_pi16(mm_alpha, mm_alpha); /* 00000A0A -> mm_alpha */
   367 			mm_alpha2 = _mm_unpacklo_pi32(mm_alpha, mm_alpha); /* 0A0A0A0A -> mm_alpha2 */
   367 			mm_alpha2 = _mm_unpacklo_pi32(mm_alpha, mm_alpha); /* 0A0A0A0A -> mm_alpha2 */
   368 			mm_alpha = _mm_or_si64(mm_alpha2, *(__m64 *) & multmask);	/* 0F0A0A0A -> mm_alpha*/
   368 			mm_alpha = _mm_or_si64(mm_alpha2, *(__m64 *) & multmask);	/* 0F0A0A0A -> mm_alpha */
   369 			mm_alpha2 = _mm_xor_si64(mm_alpha2, *(__m64 *) & multmask2);	/* 255 - mm_alpha -> mm_alpha*/
   369 			mm_alpha2 = _mm_xor_si64(mm_alpha2, *(__m64 *) & multmask2);	/* 255 - mm_alpha -> mm_alpha */
   370 
   370 
   371 			/* blend */		    
   371 			/* blend */		    
   372 			src1 = _mm_mullo_pi16(src1, mm_alpha);
   372 			src1 = _mm_mullo_pi16(src1, mm_alpha);
   373 			src1 = _mm_srli_pi16(src1, 8);
   373 			src1 = _mm_srli_pi16(src1, 8);
   374 			dst1 = _mm_mullo_pi16(dst1, mm_alpha2);
   374 			dst1 = _mm_mullo_pi16(dst1, mm_alpha2);
   546 		if (alpha == 0) {
   546 		if (alpha == 0) {
   547 			/* do nothing */
   547 			/* do nothing */
   548 		} else if (alpha == amask) {
   548 		} else if (alpha == amask) {
   549 			*dstp = *srcp;
   549 			*dstp = *srcp;
   550 		} else {
   550 		} else {
   551 			src1 = _mm_cvtsi32_si64(*srcp); /* src(ARGB) -> src1 (0000ARGB)*/
   551 			src1 = _mm_cvtsi32_si64(*srcp); /* src(ARGB) -> src1 (0000ARGB) */
   552 			src1 = _mm_unpacklo_pi8(src1, mm_zero); /* 0A0R0G0B -> src1 */
   552 			src1 = _mm_unpacklo_pi8(src1, mm_zero); /* 0A0R0G0B -> src1 */
   553 
   553 
   554 			dst1 = _mm_cvtsi32_si64(*dstp); /* dst(ARGB) -> dst1 (0000ARGB)*/
   554 			dst1 = _mm_cvtsi32_si64(*dstp); /* dst(ARGB) -> dst1 (0000ARGB) */
   555 			dst1 = _mm_unpacklo_pi8(dst1, mm_zero); /* 0A0R0G0B -> dst1 */
   555 			dst1 = _mm_unpacklo_pi8(dst1, mm_zero); /* 0A0R0G0B -> dst1 */
   556 
   556 
   557 			mm_alpha = _mm_cvtsi32_si64(alpha); /* alpha -> mm_alpha (0000000A) */
   557 			mm_alpha = _mm_cvtsi32_si64(alpha); /* alpha -> mm_alpha (0000000A) */
   558 			mm_alpha = _mm_srli_si64(mm_alpha, ashift); /* mm_alpha >> ashift -> mm_alpha(0000000A) */
   558 			mm_alpha = _mm_srli_si64(mm_alpha, ashift); /* mm_alpha >> ashift -> mm_alpha(0000000A) */
   559 			mm_alpha = _mm_unpacklo_pi16(mm_alpha, mm_alpha); /* 00000A0A -> mm_alpha */
   559 			mm_alpha = _mm_unpacklo_pi16(mm_alpha, mm_alpha); /* 00000A0A -> mm_alpha */
   560 			mm_alpha2 = _mm_unpacklo_pi32(mm_alpha, mm_alpha); /* 0A0A0A0A -> mm_alpha2 */
   560 			mm_alpha2 = _mm_unpacklo_pi32(mm_alpha, mm_alpha); /* 0A0A0A0A -> mm_alpha2 */
   561 			mm_alpha = _mm_or_si64(mm_alpha2, *(__m64 *) & multmask);	/* 0F0A0A0A -> mm_alpha*/
   561 			mm_alpha = _mm_or_si64(mm_alpha2, *(__m64 *) & multmask);	/* 0F0A0A0A -> mm_alpha */
   562 			mm_alpha2 = _mm_xor_si64(mm_alpha2, *(__m64 *) & multmask2);	/* 255 - mm_alpha -> mm_alpha*/
   562 			mm_alpha2 = _mm_xor_si64(mm_alpha2, *(__m64 *) & multmask2);	/* 255 - mm_alpha -> mm_alpha */
   563 
   563 
   564 
   564 
   565 			/* blend */		    
   565 			/* blend */		    
   566 			src1 = _mm_mullo_pi16(src1, mm_alpha);
   566 			src1 = _mm_mullo_pi16(src1, mm_alpha);
   567 			src1 = _mm_srli_pi16(src1, 8);
   567 			src1 = _mm_srli_pi16(src1, 8);