diff options
Diffstat (limited to 'lib/lib8tion/scale8.h')
| -rw-r--r-- | lib/lib8tion/scale8.h | 542 | 
1 files changed, 542 insertions, 0 deletions
diff --git a/lib/lib8tion/scale8.h b/lib/lib8tion/scale8.h new file mode 100644 index 000000000..9895fd4d7 --- /dev/null +++ b/lib/lib8tion/scale8.h @@ -0,0 +1,542 @@ +#ifndef __INC_LIB8TION_SCALE_H +#define __INC_LIB8TION_SCALE_H + +///@ingroup lib8tion + +///@defgroup Scaling Scaling functions +/// Fast, efficient 8-bit scaling functions specifically +/// designed for high-performance LED programming. +/// +/// Because of the AVR(Arduino) and ARM assembly language +/// implementations provided, using these functions often +/// results in smaller and faster code than the equivalent +/// program using plain "C" arithmetic and logic. +///@{ + +///  scale one byte by a second one, which is treated as +///  the numerator of a fraction whose denominator is 256 +///  In other words, it computes i * (scale / 256) +///  4 clocks AVR with MUL, 2 clocks ARM +LIB8STATIC_ALWAYS_INLINE uint8_t scale8( uint8_t i, fract8 scale) +{ +#if SCALE8_C == 1 +#if (FASTLED_SCALE8_FIXED == 1) +    return (((uint16_t)i) * (1+(uint16_t)(scale))) >> 8; +#else +    return ((uint16_t)i * (uint16_t)(scale) ) >> 8; +#endif +#elif SCALE8_AVRASM == 1 +#if defined(LIB8_ATTINY) +#if (FASTLED_SCALE8_FIXED == 1) +    uint8_t work=i; +#else +    uint8_t work=0; +#endif +    uint8_t cnt=0x80; +    asm volatile( +#if (FASTLED_SCALE8_FIXED == 1) +        "  inc %[scale]                 \n\t" +        "  breq DONE_%=                 \n\t" +        "  clr %[work]                  \n\t" +#endif +        "LOOP_%=:                       \n\t" +        /*"  sbrc %[scale], 0             \n\t" +        "  add %[work], %[i]            \n\t" +        "  ror %[work]                  \n\t" +        "  lsr %[scale]                 \n\t" +        "  clc                          \n\t"*/ +        "  sbrc %[scale], 0             \n\t" +        "  add %[work], %[i]            \n\t" +        "  ror %[work]                  \n\t" +        "  lsr %[scale]                 \n\t" +        "  lsr %[cnt]                   \n\t" +        "brcc LOOP_%=                   \n\t" +        "DONE_%=:                       \n\t" +        : [work] "+r" (work), [cnt] "+r" (cnt) +        : [scale] "r" (scale), [i] "r" (i) +        : +      ); +    return work; +#else +    asm volatile( +#if (FASTLED_SCALE8_FIXED==1) +        // Multiply 8-bit i * 8-bit scale, giving 16-bit r1,r0 +        "mul %0, %1          \n\t" +        // Add i to r0, possibly setting the carry flag +        "add r0, %0         \n\t" +        // load the immediate 0 into i (note, this does _not_ touch any flags) +        "ldi %0, 0x00       \n\t" +        // walk and chew gum at the same time +        "adc %0, r1          \n\t" +#else +         /* Multiply 8-bit i * 8-bit scale, giving 16-bit r1,r0 */ +         "mul %0, %1          \n\t" +         /* Move the high 8-bits of the product (r1) back to i */ +         "mov %0, r1          \n\t" +         /* Restore r1 to "0"; it's expected to always be that */ +#endif +         "clr __zero_reg__    \n\t" + +         : "+a" (i)      /* writes to i */ +         : "a"  (scale)  /* uses scale */ +         : "r0", "r1"    /* clobbers r0, r1 */ ); + +    /* Return the result */ +    return i; +#endif +#else +#error "No implementation for scale8 available." +#endif +} + + +///  The "video" version of scale8 guarantees that the output will +///  be only be zero if one or both of the inputs are zero.  If both +///  inputs are non-zero, the output is guaranteed to be non-zero. +///  This makes for better 'video'/LED dimming, at the cost of +///  several additional cycles. +LIB8STATIC_ALWAYS_INLINE uint8_t scale8_video( uint8_t i, fract8 scale) +{ +#if SCALE8_C == 1 || defined(LIB8_ATTINY) +    uint8_t j = (((int)i * (int)scale) >> 8) + ((i&&scale)?1:0); +    // uint8_t nonzeroscale = (scale != 0) ? 1 : 0; +    // uint8_t j = (i == 0) ? 0 : (((int)i * (int)(scale) ) >> 8) + nonzeroscale; +    return j; +#elif SCALE8_AVRASM == 1 +    uint8_t j=0; +    asm volatile( +        "  tst %[i]\n\t" +        "  breq L_%=\n\t" +        "  mul %[i], %[scale]\n\t" +        "  mov %[j], r1\n\t" +        "  clr __zero_reg__\n\t" +        "  cpse %[scale], r1\n\t" +        "  subi %[j], 0xFF\n\t" +        "L_%=: \n\t" +        : [j] "+a" (j) +        : [i] "a" (i), [scale] "a" (scale) +        : "r0", "r1"); + +    return j; +    // uint8_t nonzeroscale = (scale != 0) ? 1 : 0; +    // asm volatile( +    //      "      tst %0           \n" +    //      "      breq L_%=        \n" +    //      "      mul %0, %1       \n" +    //      "      mov %0, r1       \n" +    //      "      add %0, %2       \n" +    //      "      clr __zero_reg__ \n" +    //      "L_%=:                  \n" + +    //      : "+a" (i) +    //      : "a" (scale), "a" (nonzeroscale) +    //      : "r0", "r1"); + +    // // Return the result +    // return i; +#else +#error "No implementation for scale8_video available." +#endif +} + + +/// This version of scale8 does not clean up the R1 register on AVR +/// If you are doing several 'scale8's in a row, use this, and +/// then explicitly call cleanup_R1. +LIB8STATIC_ALWAYS_INLINE uint8_t scale8_LEAVING_R1_DIRTY( uint8_t i, fract8 scale) +{ +#if SCALE8_C == 1 +#if (FASTLED_SCALE8_FIXED == 1) +    return (((uint16_t)i) * ((uint16_t)(scale)+1)) >> 8; +#else +    return ((int)i * (int)(scale) ) >> 8; +#endif +#elif SCALE8_AVRASM == 1 +    asm volatile( +      #if (FASTLED_SCALE8_FIXED==1) +              // Multiply 8-bit i * 8-bit scale, giving 16-bit r1,r0 +              "mul %0, %1          \n\t" +              // Add i to r0, possibly setting the carry flag +              "add r0, %0         \n\t" +              // load the immediate 0 into i (note, this does _not_ touch any flags) +              "ldi %0, 0x00       \n\t" +              // walk and chew gum at the same time +              "adc %0, r1          \n\t" +      #else +         /* Multiply 8-bit i * 8-bit scale, giving 16-bit r1,r0 */ +         "mul %0, %1    \n\t" +         /* Move the high 8-bits of the product (r1) back to i */ +         "mov %0, r1    \n\t" +      #endif +         /* R1 IS LEFT DIRTY HERE; YOU MUST ZERO IT OUT YOURSELF  */ +         /* "clr __zero_reg__    \n\t" */ + +         : "+a" (i)      /* writes to i */ +         : "a"  (scale)  /* uses scale */ +         : "r0", "r1"    /* clobbers r0, r1 */ ); + +    // Return the result +    return i; +#else +#error "No implementation for scale8_LEAVING_R1_DIRTY available." +#endif +} + + +/// This version of scale8_video does not clean up the R1 register on AVR +/// If you are doing several 'scale8_video's in a row, use this, and +/// then explicitly call cleanup_R1. +LIB8STATIC_ALWAYS_INLINE uint8_t scale8_video_LEAVING_R1_DIRTY( uint8_t i, fract8 scale) +{ +#if SCALE8_C == 1 || defined(LIB8_ATTINY) +    uint8_t j = (((int)i * (int)scale) >> 8) + ((i&&scale)?1:0); +    // uint8_t nonzeroscale = (scale != 0) ? 1 : 0; +    // uint8_t j = (i == 0) ? 0 : (((int)i * (int)(scale) ) >> 8) + nonzeroscale; +    return j; +#elif SCALE8_AVRASM == 1 +    uint8_t j=0; +    asm volatile( +        "  tst %[i]\n\t" +        "  breq L_%=\n\t" +        "  mul %[i], %[scale]\n\t" +        "  mov %[j], r1\n\t" +        "  breq L_%=\n\t" +        "  subi %[j], 0xFF\n\t" +        "L_%=: \n\t" +        : [j] "+a" (j) +        : [i] "a" (i), [scale] "a" (scale) +        : "r0", "r1"); + +    return j; +    // uint8_t nonzeroscale = (scale != 0) ? 1 : 0; +    // asm volatile( +    //      "      tst %0           \n" +    //      "      breq L_%=        \n" +    //      "      mul %0, %1       \n" +    //      "      mov %0, r1       \n" +    //      "      add %0, %2       \n" +    //      "      clr __zero_reg__ \n" +    //      "L_%=:                  \n" + +    //      : "+a" (i) +    //      : "a" (scale), "a" (nonzeroscale) +    //      : "r0", "r1"); + +    // // Return the result +    // return i; +#else +#error "No implementation for scale8_video_LEAVING_R1_DIRTY available." +#endif +} + +/// Clean up the r1 register after a series of *LEAVING_R1_DIRTY calls +LIB8STATIC_ALWAYS_INLINE void cleanup_R1(void) +{ +#if CLEANUP_R1_AVRASM == 1 +    // Restore r1 to "0"; it's expected to always be that +    asm volatile( "clr __zero_reg__  \n\t" : : : "r1" ); +#endif +} + + +/// scale a 16-bit unsigned value by an 8-bit value, +///         considered as numerator of a fraction whose denominator +///         is 256. In other words, it computes i * (scale / 256) + +LIB8STATIC_ALWAYS_INLINE uint16_t scale16by8( uint16_t i, fract8 scale ) +{ +#if SCALE16BY8_C == 1 +    uint16_t result; +#if FASTLED_SCALE8_FIXED == 1 +    result = (i * (1+((uint16_t)scale))) >> 8; +#else +    result = (i * scale) / 256; +#endif +    return result; +#elif SCALE16BY8_AVRASM == 1 +#if FASTLED_SCALE8_FIXED == 1 +    uint16_t result = 0; +    asm volatile( +                 // result.A = HighByte( (i.A x scale) + i.A ) +                 "  mul %A[i], %[scale]                 \n\t" +                 "  add r0, %A[i]                       \n\t" +            //   "  adc r1, [zero]                      \n\t" +            //   "  mov %A[result], r1                  \n\t" +                 "  adc %A[result], r1                  \n\t" + +                 // result.A-B += i.B x scale +                 "  mul %B[i], %[scale]                 \n\t" +                 "  add %A[result], r0                  \n\t" +                 "  adc %B[result], r1                  \n\t" + +                 // cleanup r1 +                 "  clr __zero_reg__                    \n\t" + +                 // result.A-B += i.B +                 "  add %A[result], %B[i]               \n\t" +                 "  adc %B[result], __zero_reg__        \n\t" + +                 : [result] "+r" (result) +                 : [i] "r" (i), [scale] "r" (scale) +                 : "r0", "r1" +                 ); +    return result; +#else +    uint16_t result = 0; +    asm volatile( +         // result.A = HighByte(i.A x j ) +         "  mul %A[i], %[scale]                 \n\t" +         "  mov %A[result], r1                  \n\t" +         //"  clr %B[result]                      \n\t" + +         // result.A-B += i.B x j +         "  mul %B[i], %[scale]                 \n\t" +         "  add %A[result], r0                  \n\t" +         "  adc %B[result], r1                  \n\t" + +         // cleanup r1 +         "  clr __zero_reg__                    \n\t" + +         : [result] "+r" (result) +         : [i] "r" (i), [scale] "r" (scale) +         : "r0", "r1" +         ); +    return result; +#endif +#else +    #error "No implementation for scale16by8 available." +#endif +} + +/// scale a 16-bit unsigned value by a 16-bit value, +///         considered as numerator of a fraction whose denominator +///         is 65536. In other words, it computes i * (scale / 65536) + +LIB8STATIC uint16_t scale16( uint16_t i, fract16 scale ) +{ +  #if SCALE16_C == 1 +    uint16_t result; +#if FASTLED_SCALE8_FIXED == 1 +    result = ((uint32_t)(i) * (1+(uint32_t)(scale))) / 65536; +#else +    result = ((uint32_t)(i) * (uint32_t)(scale)) / 65536; +#endif +    return result; +#elif SCALE16_AVRASM == 1 +#if FASTLED_SCALE8_FIXED == 1 +    // implemented sort of like +    //   result = ((i * scale) + i ) / 65536 +    // +    // why not like this, you may ask? +    //   result = (i * (scale+1)) / 65536 +    // the answer is that if scale is 65535, then scale+1 +    // will be zero, which is not what we want. +    uint32_t result; +    asm volatile( +                 // result.A-B  = i.A x scale.A +                 "  mul %A[i], %A[scale]                 \n\t" +                 //  save results... +                 // basic idea: +                 //"  mov %A[result], r0                 \n\t" +                 //"  mov %B[result], r1                 \n\t" +                 // which can be written as... +                 "  movw %A[result], r0                   \n\t" +                 // Because we're going to add i.A-B to +                 // result.A-D, we DO need to keep both +                 // the r0 and r1 portions of the product +                 // UNlike in the 'unfixed scale8' version. +                 // So the movw here is needed. +                 : [result] "=r" (result) +                 : [i] "r" (i), +                 [scale] "r" (scale) +                 : "r0", "r1" +                 ); + +    asm volatile( +                 // result.C-D  = i.B x scale.B +                 "  mul %B[i], %B[scale]                 \n\t" +                 //"  mov %C[result], r0                 \n\t" +                 //"  mov %D[result], r1                 \n\t" +                 "  movw %C[result], r0                   \n\t" +                 : [result] "+r" (result) +                 : [i] "r" (i), +                 [scale] "r" (scale) +                 : "r0", "r1" +                 ); + +    const uint8_t  zero = 0; +    asm volatile( +                 // result.B-D += i.B x scale.A +                 "  mul %B[i], %A[scale]                 \n\t" + +                 "  add %B[result], r0                   \n\t" +                 "  adc %C[result], r1                   \n\t" +                 "  adc %D[result], %[zero]              \n\t" + +                 // result.B-D += i.A x scale.B +                 "  mul %A[i], %B[scale]                 \n\t" + +                 "  add %B[result], r0                   \n\t" +                 "  adc %C[result], r1                   \n\t" +                 "  adc %D[result], %[zero]              \n\t" + +                 // cleanup r1 +                 "  clr r1                               \n\t" + +                 : [result] "+r" (result) +                 : [i] "r" (i), +                 [scale] "r" (scale), +                 [zero] "r" (zero) +                 : "r0", "r1" +                 ); + +    asm volatile( +                 // result.A-D += i.A-B +                 "  add %A[result], %A[i]                \n\t" +                 "  adc %B[result], %B[i]                \n\t" +                 "  adc %C[result], %[zero]              \n\t" +                 "  adc %D[result], %[zero]              \n\t" +                 : [result] "+r" (result) +                 : [i] "r" (i), +                 [zero] "r" (zero) +                 ); + +    result = result >> 16; +    return result; +#else +    uint32_t result; +    asm volatile( +                 // result.A-B  = i.A x scale.A +                 "  mul %A[i], %A[scale]                 \n\t" +                 //  save results... +                 // basic idea: +                 //"  mov %A[result], r0                 \n\t" +                 //"  mov %B[result], r1                 \n\t" +                 // which can be written as... +                 "  movw %A[result], r0                   \n\t" +                 // We actually don't need to do anything with r0, +                 // as result.A is never used again here, so we +                 // could just move the high byte, but movw is +                 // one clock cycle, just like mov, so might as +                 // well, in case we want to use this code for +                 // a generic 16x16 multiply somewhere. + +                 : [result] "=r" (result) +                 : [i] "r" (i), +                   [scale] "r" (scale) +                 : "r0", "r1" +                 ); + +    asm volatile( +                 // result.C-D  = i.B x scale.B +                 "  mul %B[i], %B[scale]                 \n\t" +                 //"  mov %C[result], r0                 \n\t" +                 //"  mov %D[result], r1                 \n\t" +                 "  movw %C[result], r0                   \n\t" +                 : [result] "+r" (result) +                 : [i] "r" (i), +                   [scale] "r" (scale) +                 : "r0", "r1" +                 ); + +    const uint8_t  zero = 0; +    asm volatile( +                 // result.B-D += i.B x scale.A +                 "  mul %B[i], %A[scale]                 \n\t" + +                 "  add %B[result], r0                   \n\t" +                 "  adc %C[result], r1                   \n\t" +                 "  adc %D[result], %[zero]              \n\t" + +                 // result.B-D += i.A x scale.B +                 "  mul %A[i], %B[scale]                 \n\t" + +                 "  add %B[result], r0                   \n\t" +                 "  adc %C[result], r1                   \n\t" +                 "  adc %D[result], %[zero]              \n\t" + +                 // cleanup r1 +                 "  clr r1                               \n\t" + +                 : [result] "+r" (result) +                 : [i] "r" (i), +                   [scale] "r" (scale), +                   [zero] "r" (zero) +                 : "r0", "r1" +                 ); + +    result = result >> 16; +    return result; +#endif +#else +    #error "No implementation for scale16 available." +#endif +} +///@} + +///@defgroup Dimming Dimming and brightening functions +/// +/// Dimming and brightening functions +/// +/// The eye does not respond in a linear way to light. +/// High speed PWM'd LEDs at 50% duty cycle appear far +/// brighter then the 'half as bright' you might expect. +/// +/// If you want your midpoint brightness leve (128) to +/// appear half as bright as 'full' brightness (255), you +/// have to apply a 'dimming function'. +///@{ + +/// Adjust a scaling value for dimming +LIB8STATIC uint8_t dim8_raw( uint8_t x) +{ +    return scale8( x, x); +} + +/// Adjust a scaling value for dimming for video (value will never go below 1) +LIB8STATIC uint8_t dim8_video( uint8_t x) +{ +    return scale8_video( x, x); +} + +/// Linear version of the dimming function that halves for values < 128 +LIB8STATIC uint8_t dim8_lin( uint8_t x ) +{ +    if( x & 0x80 ) { +        x = scale8( x, x); +    } else { +        x += 1; +        x /= 2; +    } +    return x; +} + +/// inverse of the dimming function, brighten a value +LIB8STATIC uint8_t brighten8_raw( uint8_t x) +{ +    uint8_t ix = 255 - x; +    return 255 - scale8( ix, ix); +} + +/// inverse of the dimming function, brighten a value +LIB8STATIC uint8_t brighten8_video( uint8_t x) +{ +    uint8_t ix = 255 - x; +    return 255 - scale8_video( ix, ix); +} + +/// inverse of the dimming function, brighten a value +LIB8STATIC uint8_t brighten8_lin( uint8_t x ) +{ +    uint8_t ix = 255 - x; +    if( ix & 0x80 ) { +        ix = scale8( ix, ix); +    } else { +        ix += 1; +        ix /= 2; +    } +    return 255 - ix; +} + +///@} +#endif  | 
