forked from xiaozhi/xiaozhi-esp32
141 lines
5.2 KiB
C
141 lines
5.2 KiB
C
/**
|
|
* @file gifdec_mve.h
|
|
*
|
|
*/
|
|
|
|
#ifndef GIFDEC_MVE_H
|
|
#define GIFDEC_MVE_H
|
|
|
|
#ifdef __cplusplus
|
|
extern "C" {
|
|
#endif
|
|
|
|
/*********************
|
|
* INCLUDES
|
|
*********************/
|
|
#include <stdint.h>
|
|
#include "../../misc/lv_color.h"
|
|
|
|
/*********************
|
|
* DEFINES
|
|
*********************/
|
|
|
|
#define GIFDEC_FILL_BG(dst, w, h, stride, color, opa) \
|
|
_gifdec_fill_bg_mve(dst, w, h, stride, color, opa)
|
|
|
|
#define GIFDEC_RENDER_FRAME(dst, w, h, stride, frame, pattern, tindex) \
|
|
_gifdec_render_frame_mve(dst, w, h, stride, frame, pattern, tindex)
|
|
|
|
/**********************
|
|
* MACROS
|
|
**********************/
|
|
|
|
/**********************
|
|
* TYPEDEFS
|
|
**********************/
|
|
|
|
/**********************
|
|
* GLOBAL PROTOTYPES
|
|
**********************/
|
|
|
|
static inline void _gifdec_fill_bg_mve(uint8_t * dst, uint16_t w, uint16_t h, uint16_t stride, uint8_t * color,
|
|
uint8_t opa)
|
|
{
|
|
lv_color32_t c = lv_color32_make(*(color + 0), *(color + 1), *(color + 2), opa);
|
|
uint32_t color_32 = *(uint32_t *)&c;
|
|
|
|
__asm volatile(
|
|
".p2align 2 \n"
|
|
"vdup.32 q0, %[src] \n"
|
|
"3: \n"
|
|
"mov r0, %[dst] \n"
|
|
|
|
"wlstp.32 lr, %[w], 1f \n"
|
|
"2: \n"
|
|
|
|
"vstrw.32 q0, [r0], #16 \n"
|
|
"letp lr, 2b \n"
|
|
"1: \n"
|
|
"add %[dst], %[iTargetStride] \n"
|
|
"subs %[h], #1 \n"
|
|
"bne 3b \n"
|
|
: [dst] "+r"(dst),
|
|
[h] "+r"(h)
|
|
: [src] "r"(color_32),
|
|
[w] "r"(w),
|
|
[iTargetStride] "r"(stride * sizeof(uint32_t))
|
|
: "r0", "q0", "memory", "r14", "cc");
|
|
}
|
|
|
|
static inline void _gifdec_render_frame_mve(uint8_t * dst, uint16_t w, uint16_t h, uint16_t stride, uint8_t * frame,
|
|
uint8_t * pattern, uint16_t tindex)
|
|
{
|
|
if(w == 0 || h == 0) {
|
|
return;
|
|
}
|
|
|
|
__asm volatile(
|
|
"vmov.u16 q3, #255 \n"
|
|
"vshl.u16 q3, q3, #8 \n" /* left shift 8 for a*/
|
|
|
|
"mov r0, #2 \n"
|
|
"vidup.u16 q6, r0, #4 \n" /* [2, 6, 10, 14, 18, 22, 26, 30] */
|
|
"mov r0, #0 \n"
|
|
"vidup.u16 q7, r0, #4 \n" /* [0, 4, 8, 12, 16, 20, 24, 28] */
|
|
|
|
"3: \n"
|
|
"mov r1, %[dst] \n"
|
|
"mov r2, %[frame] \n"
|
|
|
|
"wlstp.16 lr, %[w], 1f \n"
|
|
"2: \n"
|
|
|
|
"mov r0, #3 \n"
|
|
"vldrb.u16 q4, [r2], #8 \n"
|
|
"vmul.u16 q5, q4, r0 \n"
|
|
|
|
"mov r0, #1 \n"
|
|
"vldrb.u16 q2, [%[pattern], q5] \n" /* load 8 pixel r*/
|
|
|
|
"vadd.u16 q5, q5, r0 \n"
|
|
"vldrb.u16 q1, [%[pattern], q5] \n" /* load 8 pixel g*/
|
|
|
|
"vadd.u16 q5, q5, r0 \n"
|
|
"vldrb.u16 q0, [%[pattern], q5] \n" /* load 8 pixel b*/
|
|
|
|
"vshl.u16 q1, q1, #8 \n" /* left shift 8 for g*/
|
|
|
|
"vorr.u16 q0, q0, q1 \n" /* make 8 pixel gb*/
|
|
"vorr.u16 q1, q2, q3 \n" /* make 8 pixel ar*/
|
|
|
|
"vcmp.i16 ne, q4, %[tindex] \n"
|
|
"vpstt \n"
|
|
"vstrht.16 q0, [r1, q7] \n"
|
|
"vstrht.16 q1, [r1, q6] \n"
|
|
"add r1, r1, #32 \n"
|
|
|
|
"letp lr, 2b \n"
|
|
|
|
"1: \n"
|
|
"mov r0, %[stride], LSL #2 \n"
|
|
"add %[dst], r0 \n"
|
|
"add %[frame], %[stride] \n"
|
|
"subs %[h], #1 \n"
|
|
"bne 3b \n"
|
|
|
|
: [dst] "+r"(dst),
|
|
[frame] "+r"(frame),
|
|
[h] "+r"(h)
|
|
: [pattern] "r"(pattern),
|
|
[w] "r"(w),
|
|
[stride] "r"(stride),
|
|
[tindex] "r"(tindex)
|
|
: "r0", "r1", "r2", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "memory", "r14", "cc");
|
|
}
|
|
|
|
#ifdef __cplusplus
|
|
} /*extern "C"*/
|
|
#endif
|
|
|
|
#endif /*GIFDEC_MVE_H*/
|