I hope I didn't messed it up that much.
Prototype, no for cycle yet.
Code: Select all
uint16_t cAlphaBlendA8RGB16(clr_a8rgb16_t fg, uint16_t bg)
{
/* RGB color mixing of "layer" with background data, using layer alpha channel
*
*/
uint8_t alpha = fg.A;
uint32_t fg = fg.rgb16;
uint16_t bg;
uint16_t result = 0;
//The equation below is made from the original color mixing equation, to avoid using devision and fraction
//This equation assumes that background is always not transparent.
//Accuracy of this is very good. 90%of time the 8bits of the color components are the same.
//Difference can maybe occur in the last LSB bit, but that does nearly nothing noticeable.
//MY EXCEL sheet confirms all this.
//This function should make the following equation (with order of execution)
// ____III._______
// _______II.______
// ___I.___ __IV.__
// ((fg*fg.A)+(256-fg.A)*bg)/256
// The final devide by 256 is supplemented by a shifting to right by 8.
//
//Where fg is the layers color component (R, G or B), bg is the background color
//Alpha is the alpha component of layer, bg has no alpha (non transparent)
//result
//%0: result (output)
//%1: fg (input)
//%2: bg (input)
//%3: alpha (input)
__asm__ __volatile__ (
"MOVI %%a0, $0"
"wsr %%a0, %%ACCLO" //sets MAC16 ACC to 0
"isync"
"MOVI %%a1, $256" //puts 256 into reg A3
"MOVI %%a2, %3" //loads a
"SUB %%a1, %%a1, %%a2" //256-fg.A - I.
"MOVI %%a3, %1" //loads fgcolor to A3
"MOVI %%a4, %2" //loads bgcolor to A4
"MOVI %%a5, 31" //Loads 0x11111b mask
//for B
"AND %%a6, %%a3, %a5" //ands mask and fg for Blue components of layer, stores it in A6
"AND %%a7, %%a4, %a5" //ands mask and bg for Blue components of background, stores it in A7
"MULA.AA.HH %%a2, %%a6" //1st multiplication - II.
"MULA.AA.HH %%a1, %%a7" //2nd multiplication, result in ACCLO - III.
"SRLI %%a8, %%ACCLO, 8" //store ACC result in A8 shifted right by 8 - IV.
//for G
"MOVI %%a0, $0"
"wsr %a0, %%ACCLO" //sets MAC16 ACC to 0
"isync"
"MOVI %%a5, $63"
"SLLI %%a5, %%a5, 5"
"AND %%a6, %%a3, %a5" //ands mask and fg for G components, stores it in A6
"AND %%a7, %%a4, %a5" //ands mask and fg for G components, stores it in A7
"MULA.AA.HH %%a2, %%a6" //1st multiplication
"MULA.AA.HH %%a1, %%a7" //2nd multiplication, result in ACCLO
"SRLI %%a9, %%ACCLO, 8" //store ACC result in A9 shifter right by 8
//for R
"MOVI %%a0, $0"
"wsr %a0, %%ACCLO" //sets MAC16 ACC to 0
"isync"
"MOVI %%a5, $31"
"SLLI %%a5, %%a5, 13"
"AND %%a6, %%a3, %a5" //ands mask and fg for G components, stores it in A6
"AND %%a7, %%a4, %a5" //ands mask and fg for G components, stores it in A7
"MULA.AA.HH %%a2, %%a6" //1st multiplication
"MULA.AA.HH %%a1, %%a7" //2nd multiplication, result in ACCLO
"SRLI %%a10, %%ACCLO, 8" //store ACC result in A10 shifter right by 8
//creating the resulted R5G6B5 format
//this section can be different to produce different end formats
"SRLI %%a8, %%a8, 3"
"SRLI %%a9, %%a9, 2"
"SLLI %%a9, %%a9, 5"
"SRLI %%a10, %%a10, 3"
"SLLI %%a10, %%a10, 13"
"ADD %%a8, %%a8, %%a9"
"ADD %3, %%a8, %%a10"
: "=r" (result)
: "r" (alpha), "r" (fg), "r" (bg)
: "%ACCLO", "%a0", "%a1", "%a2", "%a3", "%a4", "%a5", "%a6", "%a7"
);
}