Files
xiaozhi-esp32/main/boards/common/esp32_camera.cc

982 lines
38 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#include <fcntl.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <sys/param.h>
#include <unistd.h>
#include "esp_imgfx_color_convert.h"
#include "esp_video_device.h"
#include "esp_video_init.h"
#include "linux/videodev2.h"
#include "esp32_camera.h"
#include "board.h"
#include "display.h"
#include "lvgl_display.h"
#include "mcp_server.h"
#include "system_info.h"
#include "jpg/image_to_jpeg.h"
#ifdef CONFIG_XIAOZHI_ENABLE_CAMERA_DEBUG_MODE
#undef LOG_LOCAL_LEVEL
#define LOG_LOCAL_LEVEL MAX(CONFIG_LOG_DEFAULT_LEVEL, ESP_LOG_DEBUG)
#endif // CONFIG_XIAOZHI_ENABLE_CAMERA_DEBUG_MODE
#ifdef CONFIG_XIAOZHI_ENABLE_ROTATE_CAMERA_IMAGE
#ifdef CONFIG_IDF_TARGET_ESP32P4
#include "driver/ppa.h"
#if defined(CONFIG_XIAOZHI_CAMERA_IMAGE_ROTATION_ANGLE_90)
#define IMAGE_ROTATION_ANGLE (PPA_SRM_ROTATION_ANGLE_270)
#elif defined(CONFIG_XIAOZHI_CAMERA_IMAGE_ROTATION_ANGLE_270)
#define IMAGE_ROTATION_ANGLE (PPA_SRM_ROTATION_ANGLE_90)
#else
#error "CONFIG_XIAOZHI_CAMERA_IMAGE_ROTATION_ANGLE is not set"
#endif // angle
#else // target
#include "esp_imgfx_rotate.h"
#if defined(CONFIG_XIAOZHI_CAMERA_IMAGE_ROTATION_ANGLE_90)
#define IMAGE_ROTATION_ANGLE (90)
#elif defined(CONFIG_XIAOZHI_CAMERA_IMAGE_ROTATION_ANGLE_270)
#define IMAGE_ROTATION_ANGLE (270)
#else
#error "CONFIG_XIAOZHI_CAMERA_IMAGE_ROTATION_ANGLE is not set"
#endif // angle
#endif // target
#endif // CONFIG_XIAOZHI_ENABLE_ROTATE_CAMERA_IMAGE
#include <errno.h>
#include <esp_heap_caps.h>
#include <esp_log.h>
#include <cstdio>
#include <cstring>
#define TAG "Esp32Camera"
#if defined(CONFIG_CAMERA_SENSOR_SWAP_PIXEL_BYTE_ORDER) || defined(CONFIG_XIAOZHI_ENABLE_CAMERA_ENDIANNESS_SWAP)
#warning \
"CAMERA_SENSOR_SWAP_PIXEL_BYTE_ORDER or CONFIG_XIAOZHI_ENABLE_CAMERA_ENDIANNESS_SWAP is enabled, which may cause image corruption in YUV422 format!"
#endif
#if CONFIG_XIAOZHI_ENABLE_CAMERA_DEBUG_MODE
#define CAM_PRINT_FOURCC(pixelformat) \
char fourcc[5]; \
fourcc[0] = pixelformat & 0xFF; \
fourcc[1] = (pixelformat >> 8) & 0xFF; \
fourcc[2] = (pixelformat >> 16) & 0xFF; \
fourcc[3] = (pixelformat >> 24) & 0xFF; \
fourcc[4] = '\0'; \
ESP_LOGD(TAG, "FOURCC: '%c%c%c%c'", fourcc[0], fourcc[1], fourcc[2], fourcc[3]);
// for compatibility with old esp_video version
#ifndef MAP_FAILED
#define MAP_FAILED nullptr
#endif
__attribute__((weak)) esp_err_t esp_video_deinit(void) {
return ESP_ERR_NOT_SUPPORTED;
}
// end of for compatibility with old esp_video version
static void log_available_video_devices() {
for (int i = 0; i < 50; i++) {
char path[16];
snprintf(path, sizeof(path), "/dev/video%d", i);
int fd = open(path, O_RDONLY);
if (fd >= 0) {
ESP_LOGD(TAG, "found video device: %s", path);
close(fd);
}
}
}
#else
#define CAM_PRINT_FOURCC(pixelformat) (void)0;
#endif // CONFIG_XIAOZHI_ENABLE_CAMERA_DEBUG_MODE
Esp32Camera::Esp32Camera(const esp_video_init_config_t& config) {
if (esp_video_init(&config) != ESP_OK) {
ESP_LOGE(TAG, "esp_video_init failed");
return;
}
#ifdef CONFIG_XIAOZHI_ENABLE_CAMERA_DEBUG_MODE
esp_log_level_set(TAG, ESP_LOG_DEBUG);
#endif // CONFIG_XIAOZHI_ENABLE_CAMERA_DEBUG_MODE
const char* video_device_name = nullptr;
if (false) { /* 用于构建 else if */
}
#if CONFIG_ESP_VIDEO_ENABLE_MIPI_CSI_VIDEO_DEVICE
else if (config.csi != nullptr) {
video_device_name = ESP_VIDEO_MIPI_CSI_DEVICE_NAME;
}
#endif
#if CONFIG_ESP_VIDEO_ENABLE_DVP_VIDEO_DEVICE
else if (config.dvp != nullptr) {
video_device_name = ESP_VIDEO_DVP_DEVICE_NAME;
}
#endif
#if CONFIG_ESP_VIDEO_ENABLE_HW_JPEG_VIDEO_DEVICE
else if (config.jpeg != nullptr) {
video_device_name = ESP_VIDEO_JPEG_DEVICE_NAME;
}
#endif
#if CONFIG_ESP_VIDEO_ENABLE_SPI_VIDEO_DEVICE
else if (config.spi != nullptr) {
video_device_name = ESP_VIDEO_SPI_DEVICE_NAME;
}
#endif
#if CONFIG_ESP_VIDEO_ENABLE_USB_UVC_VIDEO_DEVICE
else if (config.usb_uvc != nullptr) {
video_device_name = ESP_VIDEO_USB_UVC_DEVICE_NAME(config.usb_uvc->uvc.uvc_dev_num);
}
#endif
if (video_device_name == nullptr) {
ESP_LOGE(TAG, "no video device is enabled");
return;
}
video_fd_ = open(video_device_name, O_RDWR);
if (video_fd_ < 0) {
ESP_LOGE(TAG, "open %s failed, errno=%d(%s)", video_device_name, errno, strerror(errno));
#if CONFIG_XIAOZHI_ENABLE_CAMERA_DEBUG_MODE
log_available_video_devices();
#endif // CONFIG_XIAOZHI_ENABLE_CAMERA_DEBUG_MODE
return;
}
struct v4l2_capability cap = {};
if (ioctl(video_fd_, VIDIOC_QUERYCAP, &cap) != 0) {
ESP_LOGE(TAG, "VIDIOC_QUERYCAP failed, errno=%d(%s)", errno, strerror(errno));
close(video_fd_);
video_fd_ = -1;
return;
}
ESP_LOGD(
TAG,
"VIDIOC_QUERYCAP: driver=%s, card=%s, bus_info=%s, version=0x%08lx, capabilities=0x%08lx, device_caps=0x%08lx",
cap.driver, cap.card, cap.bus_info, cap.version, cap.capabilities, cap.device_caps);
struct v4l2_format format = {};
format.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
if (ioctl(video_fd_, VIDIOC_G_FMT, &format) != 0) {
ESP_LOGE(TAG, "VIDIOC_G_FMT failed, errno=%d(%s)", errno, strerror(errno));
close(video_fd_);
video_fd_ = -1;
return;
}
ESP_LOGD(TAG, "VIDIOC_G_FMT: pixelformat=0x%08lx, width=%ld, height=%ld", format.fmt.pix.pixelformat,
format.fmt.pix.width, format.fmt.pix.height);
CAM_PRINT_FOURCC(format.fmt.pix.pixelformat);
struct v4l2_format setformat = {};
setformat.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
#ifdef CONFIG_XIAOZHI_ENABLE_ROTATE_CAMERA_IMAGE
sensor_width_ = format.fmt.pix.width;
sensor_height_ = format.fmt.pix.height;
#endif // CONFIG_XIAOZHI_ENABLE_ROTATE_CAMERA_IMAGE
setformat.fmt.pix.width = format.fmt.pix.width;
setformat.fmt.pix.height = format.fmt.pix.height;
struct v4l2_fmtdesc fmtdesc = {};
fmtdesc.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
fmtdesc.index = 0;
uint32_t best_fmt = 0;
int best_rank = 1 << 30; // large number
// 注: 当前版本 esp_video 中 YUV422P 实际输出为 YUYV。
#if defined(CONFIG_XIAOZHI_ENABLE_ROTATE_CAMERA_IMAGE) && defined(CONFIG_SOC_PPA_SUPPORTED)
auto get_rank = [](uint32_t fmt) -> int {
switch (fmt) {
case V4L2_PIX_FMT_RGB24:
return 0;
case V4L2_PIX_FMT_RGB565:
return 1;
#ifdef CONFIG_XIAOZHI_ENABLE_HARDWARE_JPEG_ENCODER
case V4L2_PIX_FMT_YUV420: // 软件 JPEG 编码器不支持 YUV420 格式
return 2;
#endif // CONFIG_XIAOZHI_ENABLE_HARDWARE_JPEG_ENCODER
case V4L2_PIX_FMT_GREY:
case V4L2_PIX_FMT_YUV422P:
default:
return 1 << 29; // unsupported
}
};
#else
auto get_rank = [](uint32_t fmt) -> int {
switch (fmt) {
case V4L2_PIX_FMT_YUV422P:
return 0;
case V4L2_PIX_FMT_RGB565:
return 1;
case V4L2_PIX_FMT_RGB24:
return 2;
#ifdef CONFIG_XIAOZHI_ENABLE_HARDWARE_JPEG_ENCODER
case V4L2_PIX_FMT_YUV420:
return 3;
#endif // CONFIG_XIAOZHI_ENABLE_HARDWARE_JPEG_ENCODER
case V4L2_PIX_FMT_GREY:
return 4;
default:
return 1 << 29; // unsupported
}
};
#endif
while (ioctl(video_fd_, VIDIOC_ENUM_FMT, &fmtdesc) == 0) {
ESP_LOGD(TAG, "VIDIOC_ENUM_FMT: pixelformat=0x%08lx, description=%s", fmtdesc.pixelformat, fmtdesc.description);
CAM_PRINT_FOURCC(fmtdesc.pixelformat);
int rank = get_rank(fmtdesc.pixelformat);
if (rank < best_rank) {
best_rank = rank;
best_fmt = fmtdesc.pixelformat;
}
fmtdesc.index++;
}
if (best_rank < (1 << 29)) {
setformat.fmt.pix.pixelformat = best_fmt;
sensor_format_ = best_fmt;
}
if (!setformat.fmt.pix.pixelformat) {
ESP_LOGE(TAG, "no supported pixel format found");
close(video_fd_);
video_fd_ = -1;
sensor_format_ = 0;
return;
}
ESP_LOGD(TAG, "selected pixel format: 0x%08lx", setformat.fmt.pix.pixelformat);
if (ioctl(video_fd_, VIDIOC_S_FMT, &setformat) != 0) {
ESP_LOGE(TAG, "VIDIOC_S_FMT failed, errno=%d(%s)", errno, strerror(errno));
close(video_fd_);
video_fd_ = -1;
sensor_format_ = 0;
return;
}
#ifdef CONFIG_XIAOZHI_ENABLE_ROTATE_CAMERA_IMAGE
frame_.width = setformat.fmt.pix.height;
frame_.height = setformat.fmt.pix.width;
#else
frame_.width = setformat.fmt.pix.width;
frame_.height = setformat.fmt.pix.height;
#endif
// 申请缓冲并mmap
struct v4l2_requestbuffers req = {};
req.count = strcmp(video_device_name, ESP_VIDEO_MIPI_CSI_DEVICE_NAME) == 0 ? 2 : 1;
req.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
req.memory = V4L2_MEMORY_MMAP;
if (ioctl(video_fd_, VIDIOC_REQBUFS, &req) != 0) {
ESP_LOGE(TAG, "VIDIOC_REQBUFS failed");
close(video_fd_);
video_fd_ = -1;
sensor_format_ = 0;
return;
}
mmap_buffers_.resize(req.count);
for (uint32_t i = 0; i < req.count; i++) {
struct v4l2_buffer buf = {};
buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
buf.memory = V4L2_MEMORY_MMAP;
buf.index = i;
if (ioctl(video_fd_, VIDIOC_QUERYBUF, &buf) != 0) {
ESP_LOGE(TAG, "VIDIOC_QUERYBUF failed");
close(video_fd_);
video_fd_ = -1;
sensor_format_ = 0;
return;
}
void* start = mmap(NULL, buf.length, PROT_READ | PROT_WRITE, MAP_SHARED, video_fd_, buf.m.offset);
if (start == MAP_FAILED) {
ESP_LOGE(TAG, "mmap failed");
close(video_fd_);
video_fd_ = -1;
sensor_format_ = 0;
return;
}
mmap_buffers_[i].start = start;
mmap_buffers_[i].length = buf.length;
if (ioctl(video_fd_, VIDIOC_QBUF, &buf) != 0) {
ESP_LOGE(TAG, "VIDIOC_QBUF failed");
close(video_fd_);
video_fd_ = -1;
sensor_format_ = 0;
return;
}
}
int type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
if (ioctl(video_fd_, VIDIOC_STREAMON, &type) != 0) {
ESP_LOGE(TAG, "VIDIOC_STREAMON failed");
close(video_fd_);
video_fd_ = -1;
sensor_format_ = 0;
return;
}
#ifdef CONFIG_ESP_VIDEO_ENABLE_ISP_VIDEO_DEVICE
// 当启用 ISP 时ISP 需要一些照片来初始化参数因此开启后后台拍摄5s照片并丢弃
xTaskCreate(
[](void* arg) {
Esp32Camera* self = static_cast<Esp32Camera*>(arg);
uint16_t capture_count = 0;
TickType_t start = xTaskGetTickCount();
TickType_t duration = 5000 / portTICK_PERIOD_MS; // 5s
while ((xTaskGetTickCount() - start) < duration) {
struct v4l2_buffer buf = {};
buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
buf.memory = V4L2_MEMORY_MMAP;
if (ioctl(self->video_fd_, VIDIOC_DQBUF, &buf) != 0) {
ESP_LOGE(TAG, "VIDIOC_DQBUF failed during init");
vTaskDelay(10 / portTICK_PERIOD_MS);
continue;
}
if (ioctl(self->video_fd_, VIDIOC_QBUF, &buf) != 0) {
ESP_LOGE(TAG, "VIDIOC_QBUF failed during init");
}
capture_count++;
}
ESP_LOGI(TAG, "Camera init success, captured %d frames in %dms", capture_count,
(xTaskGetTickCount() - start) * portTICK_PERIOD_MS);
self->streaming_on_ = true;
vTaskDelete(NULL);
},
"CameraInitTask", 4096, this, 5, nullptr);
#else
ESP_LOGI(TAG, "Camera init success");
streaming_on_ = true;
#endif // CONFIG_ESP_VIDEO_ENABLE_ISP_VIDEO_DEVICE
}
Esp32Camera::~Esp32Camera() {
if (streaming_on_ && video_fd_ >= 0) {
int type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
ioctl(video_fd_, VIDIOC_STREAMOFF, &type);
}
for (auto& b : mmap_buffers_) {
if (b.start && b.length) {
munmap(b.start, b.length);
}
}
if (video_fd_ >= 0) {
close(video_fd_);
video_fd_ = -1;
}
sensor_format_ = 0;
esp_video_deinit();
}
void Esp32Camera::SetExplainUrl(const std::string& url, const std::string& token) {
explain_url_ = url;
explain_token_ = token;
}
bool Esp32Camera::Capture() {
if (encoder_thread_.joinable()) {
encoder_thread_.join();
}
if (!streaming_on_ || video_fd_ < 0) {
return false;
}
for (int i = 0; i < 3; i++) {
struct v4l2_buffer buf = {};
buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
buf.memory = V4L2_MEMORY_MMAP;
if (ioctl(video_fd_, VIDIOC_DQBUF, &buf) != 0) {
ESP_LOGE(TAG, "VIDIOC_DQBUF failed");
return false;
}
if (i == 2) {
// 保存帧副本到PSRAM
if (frame_.data) {
heap_caps_free(frame_.data);
frame_.data = nullptr;
frame_.format = 0;
}
frame_.len = buf.bytesused;
frame_.data = (uint8_t*)heap_caps_malloc(frame_.len, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT);
if (!frame_.data) {
ESP_LOGE(TAG, "alloc frame copy failed");
if (ioctl(video_fd_, VIDIOC_QBUF, &buf) != 0) {
ESP_LOGE(TAG, "Cleanup: VIDIOC_QBUF failed");
}
return false;
}
#ifdef CONFIG_XIAOZHI_ENABLE_ROTATE_CAMERA_IMAGE
ESP_LOGW(TAG, "mmap_buffers_[buf.index].length = %d, sensor_width = %d, sensor_height = %d",
mmap_buffers_[buf.index].length, sensor_width_, sensor_height_);
#else
ESP_LOGW(TAG, "mmap_buffers_[buf.index].length = %d, frame.width = %d, frame.height = %d",
mmap_buffers_[buf.index].length, frame_.width, frame_.height);
#endif // CONFIG_XIAOZHI_ENABLE_ROTATE_CAMERA_IMAGE
ESP_LOG_BUFFER_HEXDUMP(TAG, mmap_buffers_[buf.index].start, MIN(mmap_buffers_[buf.index].length, 256),
ESP_LOG_DEBUG);
switch (sensor_format_) {
case V4L2_PIX_FMT_RGB565:
case V4L2_PIX_FMT_RGB24:
case V4L2_PIX_FMT_YUYV:
case V4L2_PIX_FMT_YUV420:
case V4L2_PIX_FMT_GREY:
#ifdef CONFIG_XIAOZHI_ENABLE_CAMERA_ENDIANNESS_SWAP
{
auto src16 = (uint16_t*)mmap_buffers_[buf.index].start;
auto dst16 = (uint16_t*)frame_.data;
size_t count = (size_t)mmap_buffers_[buf.index].length / 2;
for (size_t i = 0; i < count; i++) {
dst16[i] = __builtin_bswap16(src16[i]);
}
}
#else
memcpy(frame_.data, mmap_buffers_[buf.index].start,
MIN(mmap_buffers_[buf.index].length, frame_.len));
#endif // CONFIG_XIAOZHI_ENABLE_CAMERA_ENDIANNESS_SWAP
frame_.format = sensor_format_;
break;
case V4L2_PIX_FMT_YUV422P: {
// 这个格式是 422 YUYV不是 planer
frame_.format = V4L2_PIX_FMT_YUYV;
#ifdef CONFIG_XIAOZHI_ENABLE_CAMERA_ENDIANNESS_SWAP
{
auto src16 = (uint16_t*)mmap_buffers_[buf.index].start;
auto dst16 = (uint16_t*)frame_.data;
size_t count = (size_t)mmap_buffers_[buf.index].length / 2;
for (size_t i = 0; i < count; i++) {
dst16[i] = __builtin_bswap16(src16[i]);
}
}
#else
memcpy(frame_.data, mmap_buffers_[buf.index].start,
MIN(mmap_buffers_[buf.index].length, frame_.len));
#endif // CONFIG_XIAOZHI_ENABLE_CAMERA_ENDIANNESS_SWAP
break;
}
case V4L2_PIX_FMT_RGB565X: {
// 大端序的 RGB565 需要转换为小端序
// 目前 esp_video 的大小端都会返回格式为 RGB565不会返回格式为 RGB565X此 case 用于未来版本兼容
auto src16 = (uint16_t*)mmap_buffers_[buf.index].start;
auto dst16 = (uint16_t*)frame_.data;
size_t pixel_count = (size_t)frame_.width * (size_t)frame_.height;
for (size_t i = 0; i < pixel_count; i++) {
dst16[i] = __builtin_bswap16(src16[i]);
}
frame_.format = V4L2_PIX_FMT_RGB565;
break;
}
default:
ESP_LOGE(TAG, "unsupported sensor format: 0x%08x", sensor_format_);
if (ioctl(video_fd_, VIDIOC_QBUF, &buf) != 0) {
ESP_LOGE(TAG, "Cleanup: VIDIOC_QBUF failed");
}
return false;
}
#ifdef CONFIG_XIAOZHI_ENABLE_ROTATE_CAMERA_IMAGE
#ifndef CONFIG_SOC_PPA_SUPPORTED
uint8_t* rotate_dst =
(uint8_t*)heap_caps_aligned_alloc(64, frame_.len, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT);
if (rotate_dst == nullptr) {
ESP_LOGE(TAG, "Failed to allocate memory for rotate image");
if (ioctl(video_fd_, VIDIOC_QBUF, &buf) != 0) {
ESP_LOGE(TAG, "Cleanup: VIDIOC_QBUF failed");
}
return false;
}
uint8_t* rotate_src = (uint8_t*)frame_.data;
esp_imgfx_rotate_cfg_t rotate_cfg = {
.in_res =
{
.width = static_cast<int16_t>(sensor_width_),
.height = static_cast<int16_t>(sensor_height_),
},
.degree = IMAGE_ROTATION_ANGLE,
};
switch (frame_.format) {
case V4L2_PIX_FMT_RGB565:
rotate_cfg.in_pixel_fmt = ESP_IMGFX_PIXEL_FMT_RGB565_LE;
break;
case V4L2_PIX_FMT_YUYV:
rotate_cfg.in_pixel_fmt = ESP_IMGFX_PIXEL_FMT_RGB565_LE;
break;
case V4L2_PIX_FMT_GREY:
rotate_cfg.in_pixel_fmt = ESP_IMGFX_PIXEL_FMT_Y;
break;
case V4L2_PIX_FMT_RGB24:
rotate_cfg.in_pixel_fmt = ESP_IMGFX_PIXEL_FMT_RGB888;
break;
default:
ESP_LOGE(TAG, "unsupported sensor format: 0x%08x", sensor_format_);
if (ioctl(video_fd_, VIDIOC_QBUF, &buf) != 0) {
ESP_LOGE(TAG, "Cleanup: VIDIOC_QBUF failed");
}
return false;
}
esp_imgfx_rotate_handle_t rotate_handle = nullptr;
esp_imgfx_err_t imgfx_err = esp_imgfx_rotate_open(&rotate_cfg, &rotate_handle);
if (imgfx_err != ESP_IMGFX_ERR_OK || rotate_handle == nullptr) {
ESP_LOGE(TAG, "esp_imgfx_rotate_create failed");
if (ioctl(video_fd_, VIDIOC_QBUF, &buf) != 0) {
ESP_LOGE(TAG, "Cleanup: VIDIOC_QBUF failed");
}
return false;
}
esp_imgfx_data_t rotate_input_data = {
.data = rotate_src,
.data_len = frame_.len,
};
esp_imgfx_data_t rotate_output_data = {
.data = rotate_dst,
.data_len = frame_.len,
};
imgfx_err = esp_imgfx_rotate_process(rotate_handle, &rotate_input_data, &rotate_output_data);
if (imgfx_err != ESP_IMGFX_ERR_OK) {
ESP_LOGE(TAG, "esp_imgfx_rotate_process failed");
heap_caps_free(rotate_dst);
rotate_dst = nullptr;
if (ioctl(video_fd_, VIDIOC_QBUF, &buf) != 0) {
ESP_LOGE(TAG, "Cleanup: VIDIOC_QBUF failed");
}
esp_imgfx_rotate_close(rotate_handle);
rotate_handle = nullptr;
return false;
}
frame_.data = rotate_dst;
heap_caps_free(rotate_src);
rotate_src = nullptr;
esp_imgfx_rotate_close(rotate_handle);
rotate_handle = nullptr;
#else // CONFIG_SOC_PPA_SUPPORTED
uint8_t* rotate_src = nullptr;
ppa_srm_color_mode_t ppa_color_mode;
switch (frame_.format) {
case V4L2_PIX_FMT_RGB565:
rotate_src = (uint8_t*)frame_.data;
ppa_color_mode = PPA_SRM_COLOR_MODE_RGB565;
break;
case V4L2_PIX_FMT_RGB24:
rotate_src = (uint8_t*)frame_.data;
ppa_color_mode = PPA_SRM_COLOR_MODE_RGB888;
break;
case V4L2_PIX_FMT_YUYV: {
ESP_LOGW(TAG, "YUYV format is not supported for PPA rotation, using software conversion to RGB888");
rotate_src = (uint8_t*)heap_caps_malloc(frame_.width * frame_.height * 3,
MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT);
if (rotate_src == nullptr) {
ESP_LOGE(TAG, "Failed to allocate memory for rotate image");
if (ioctl(video_fd_, VIDIOC_QBUF, &buf) != 0) {
ESP_LOGE(TAG, "Cleanup: VIDIOC_QBUF failed");
}
return false;
}
esp_imgfx_color_convert_cfg_t convert_cfg = {
.in_res = {.width = static_cast<int16_t>(frame_.width),
.height = static_cast<int16_t>(frame_.height)},
.in_pixel_fmt = ESP_IMGFX_PIXEL_FMT_YUYV,
.out_pixel_fmt = ESP_IMGFX_PIXEL_FMT_RGB888,
};
esp_imgfx_color_convert_handle_t convert_handle = nullptr;
esp_imgfx_err_t err = esp_imgfx_color_convert_open(&convert_cfg, &convert_handle);
if (err != ESP_IMGFX_ERR_OK || convert_handle == nullptr) {
ESP_LOGE(TAG, "esp_imgfx_color_convert_open failed");
heap_caps_free(rotate_src);
rotate_src = nullptr;
if (ioctl(video_fd_, VIDIOC_QBUF, &buf) != 0) {
ESP_LOGE(TAG, "Cleanup: VIDIOC_QBUF failed");
}
return false;
}
esp_imgfx_data_t convert_input_data = {
.data = frame_.data,
.data_len = frame_.len,
};
esp_imgfx_data_t convert_output_data = {
.data = rotate_src,
.data_len = static_cast<uint32_t>(frame_.width * frame_.height * 3),
};
err = esp_imgfx_color_convert_process(convert_handle, &convert_input_data, &convert_output_data);
if (err != ESP_IMGFX_ERR_OK) {
ESP_LOGE(TAG, "esp_imgfx_color_convert_process failed");
heap_caps_free(rotate_src);
rotate_src = nullptr;
esp_imgfx_color_convert_close(convert_handle);
convert_handle = nullptr;
if (ioctl(video_fd_, VIDIOC_QBUF, &buf) != 0) {
ESP_LOGE(TAG, "Cleanup: VIDIOC_QBUF failed");
}
return false;
}
esp_imgfx_color_convert_close(convert_handle);
convert_handle = nullptr;
ppa_color_mode = PPA_SRM_COLOR_MODE_RGB888;
heap_caps_free(frame_.data);
frame_.data = rotate_src;
frame_.len = frame_.width * frame_.height * 3;
break;
}
default:
ESP_LOGE(TAG, "unsupported sensor format for PPA rotation: 0x%08x", sensor_format_);
if (ioctl(video_fd_, VIDIOC_QBUF, &buf) != 0) {
ESP_LOGE(TAG, "Cleanup: VIDIOC_QBUF failed");
}
return false;
}
uint8_t* rotate_dst = (uint8_t*)heap_caps_malloc(
frame_.width * frame_.height * 2, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT | MALLOC_CAP_CACHE_ALIGNED);
if (rotate_dst == nullptr) {
ESP_LOGE(TAG, "Failed to allocate memory for rotate image");
if (ioctl(video_fd_, VIDIOC_QBUF, &buf) != 0) {
ESP_LOGE(TAG, "Cleanup: VIDIOC_QBUF failed");
}
return false;
}
ppa_client_handle_t ppa_client = nullptr;
ppa_client_config_t client_cfg = {
.oper_type = PPA_OPERATION_SRM,
.max_pending_trans_num = 1,
};
esp_err_t err = ppa_register_client(&client_cfg, &ppa_client);
if (err != ESP_OK || ppa_client == nullptr) {
ESP_LOGE(TAG, "ppa_register_client failed: %d", (int)err);
heap_caps_free(rotate_dst);
rotate_dst = nullptr;
if (ioctl(video_fd_, VIDIOC_QBUF, &buf) != 0) {
ESP_LOGE(TAG, "Cleanup: VIDIOC_QBUF failed");
}
return false;
}
ppa_srm_rotation_angle_t ppa_angle = IMAGE_ROTATION_ANGLE;
ppa_srm_oper_config_t srm_cfg = {};
srm_cfg.in.buffer = (void*)rotate_src;
srm_cfg.in.pic_w = sensor_width_;
srm_cfg.in.pic_h = sensor_height_;
srm_cfg.in.block_w = sensor_width_;
srm_cfg.in.block_h = sensor_height_;
srm_cfg.in.block_offset_x = 0;
srm_cfg.in.block_offset_y = 0;
srm_cfg.in.srm_cm = ppa_color_mode;
srm_cfg.out.buffer = (void*)rotate_dst;
srm_cfg.out.buffer_size = frame_.len;
srm_cfg.out.pic_w = frame_.width;
srm_cfg.out.pic_h = frame_.height;
srm_cfg.out.block_offset_x = 0;
srm_cfg.out.block_offset_y = 0;
srm_cfg.out.srm_cm = PPA_SRM_COLOR_MODE_RGB565;
// 等比例缩放 1.0
srm_cfg.scale_x = 1.0f;
srm_cfg.scale_y = 1.0f;
srm_cfg.rotation_angle = ppa_angle;
srm_cfg.mode = PPA_TRANS_MODE_BLOCKING;
srm_cfg.user_data = nullptr;
err = ppa_do_scale_rotate_mirror(ppa_client, &srm_cfg);
if (err != ESP_OK) {
ESP_LOGE(TAG, "ppa_do_scale_rotate_mirror failed: %d", (int)err);
heap_caps_free(rotate_dst);
rotate_dst = nullptr;
(void)ppa_unregister_client(ppa_client);
if (ioctl(video_fd_, VIDIOC_QBUF, &buf) != 0) {
ESP_LOGE(TAG, "Cleanup: VIDIOC_QBUF failed");
}
return false;
}
(void)ppa_unregister_client(ppa_client);
frame_.data = rotate_dst;
frame_.len = frame_.width * frame_.height * 2;
frame_.format = V4L2_PIX_FMT_RGB565;
heap_caps_free(rotate_src);
rotate_src = nullptr;
#endif // CONFIG_SOC_PPA_SUPPORTED
#endif // CONFIG_XIAOZHI_ENABLE_ROTATE_CAMERA_IMAGE
}
if (ioctl(video_fd_, VIDIOC_QBUF, &buf) != 0) {
ESP_LOGE(TAG, "VIDIOC_QBUF failed");
}
}
// 显示预览图片
auto display = dynamic_cast<LvglDisplay*>(Board::GetInstance().GetDisplay());
if (display != nullptr) {
if (!frame_.data) {
ESP_LOGE(TAG, "frame.data is null");
return false;
}
uint16_t w = frame_.width;
uint16_t h = frame_.height;
size_t lvgl_image_size = frame_.len;
size_t stride = ((w * 2) + 3) & ~3; // 4字节对齐
lv_color_format_t color_format = LV_COLOR_FORMAT_RGB565;
uint8_t* data = nullptr;
switch (frame_.format) {
// LVGL 显示 YUV 系的图像似乎都有问题,暂时转换为 RGB565 显示
case V4L2_PIX_FMT_YUYV:
case V4L2_PIX_FMT_YUV420:
case V4L2_PIX_FMT_RGB24: {
color_format = LV_COLOR_FORMAT_RGB565;
data = (uint8_t*)heap_caps_malloc(w * h * 2, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT);
if (data == nullptr) {
ESP_LOGE(TAG, "Failed to allocate memory for preview image");
return false;
}
esp_imgfx_color_convert_cfg_t convert_cfg = {
.in_res = {.width = static_cast<int16_t>(frame_.width),
.height = static_cast<int16_t>(frame_.height)},
.in_pixel_fmt = static_cast<esp_imgfx_pixel_fmt_t>(frame_.format),
.out_pixel_fmt = ESP_IMGFX_PIXEL_FMT_RGB565_LE,
.color_space_std = ESP_IMGFX_COLOR_SPACE_STD_BT601,
};
esp_imgfx_color_convert_handle_t convert_handle = nullptr;
esp_imgfx_err_t err = esp_imgfx_color_convert_open(&convert_cfg, &convert_handle);
if (err != ESP_IMGFX_ERR_OK || convert_handle == nullptr) {
ESP_LOGE(TAG, "esp_imgfx_color_convert_open failed");
heap_caps_free(data);
data = nullptr;
return false;
}
esp_imgfx_data_t convert_input_data = {
.data = frame_.data,
.data_len = frame_.len,
};
esp_imgfx_data_t convert_output_data = {
.data = data,
.data_len = static_cast<uint32_t>(w * h * 2),
};
err = esp_imgfx_color_convert_process(convert_handle, &convert_input_data, &convert_output_data);
if (err != ESP_IMGFX_ERR_OK) {
ESP_LOGE(TAG, "esp_imgfx_color_convert_process failed");
heap_caps_free(data);
data = nullptr;
esp_imgfx_color_convert_close(convert_handle);
convert_handle = nullptr;
return false;
}
esp_imgfx_color_convert_close(convert_handle);
convert_handle = nullptr;
lvgl_image_size = w * h * 2;
break;
}
case V4L2_PIX_FMT_RGB565:
data = (uint8_t*)heap_caps_malloc(w * h * 2, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT);
if (data == nullptr) {
ESP_LOGE(TAG, "Failed to allocate memory for preview image");
return false;
}
memcpy(data, frame_.data, frame_.len);
lvgl_image_size = frame_.len; // fallthrough 时兼顾 YUYV 与 RGB565
break;
default:
ESP_LOGE(TAG, "unsupported frame format: 0x%08lx", frame_.format);
return false;
}
auto image = std::make_unique<LvglAllocatedImage>(data, lvgl_image_size, w, h, stride, color_format);
display->SetPreviewImage(std::move(image));
}
return true;
}
bool Esp32Camera::SetHMirror(bool enabled) {
if (video_fd_ < 0)
return false;
struct v4l2_ext_controls ctrls = {};
struct v4l2_ext_control ctrl = {};
ctrl.id = V4L2_CID_HFLIP;
ctrl.value = enabled ? 1 : 0;
ctrls.ctrl_class = V4L2_CTRL_CLASS_USER;
ctrls.count = 1;
ctrls.controls = &ctrl;
if (ioctl(video_fd_, VIDIOC_S_EXT_CTRLS, &ctrls) != 0) {
ESP_LOGE(TAG, "set HFLIP failed");
return false;
}
return true;
}
bool Esp32Camera::SetVFlip(bool enabled) {
if (video_fd_ < 0)
return false;
struct v4l2_ext_controls ctrls = {};
struct v4l2_ext_control ctrl = {};
ctrl.id = V4L2_CID_VFLIP;
ctrl.value = enabled ? 1 : 0;
ctrls.ctrl_class = V4L2_CTRL_CLASS_USER;
ctrls.count = 1;
ctrls.controls = &ctrl;
if (ioctl(video_fd_, VIDIOC_S_EXT_CTRLS, &ctrls) != 0) {
ESP_LOGE(TAG, "set VFLIP failed");
return false;
}
return true;
}
/**
* @brief 将摄像头捕获的图像发送到远程服务器进行AI分析和解释
*
* 该函数将当前摄像头缓冲区中的图像编码为JPEG格式并通过HTTP POST请求
* 以multipart/form-data的形式发送到指定的解释服务器。服务器将根据提供的
* 问题对图像进行AI分析并返回结果。
*
* 实现特点:
* - 使用独立线程编码JPEG与主线程分离
* - 采用分块传输编码(chunked transfer encoding)优化内存使用
* - 通过队列机制实现编码线程和发送线程的数据同步
* - 支持设备ID、客户端ID和认证令牌的HTTP头部配置
*
* @param question 要向AI提出的关于图像的问题将作为表单字段发送
* @return std::string 服务器返回的JSON格式响应字符串
* 成功时包含AI分析结果失败时包含错误信息
* 格式示例:{"success": true, "result": "分析结果"}
* {"success": false, "message": "错误信息"}
*
* @note 调用此函数前必须先调用SetExplainUrl()设置服务器URL
* @note 函数会等待之前的编码线程完成后再开始新的处理
* @warning 如果摄像头缓冲区为空或网络连接失败,将返回错误信息
*/
std::string Esp32Camera::Explain(const std::string& question) {
if (explain_url_.empty()) {
throw std::runtime_error("Image explain URL or token is not set");
}
// 创建局部的 JPEG 队列, 40 entries is about to store 512 * 40 = 20480 bytes of JPEG data
QueueHandle_t jpeg_queue = xQueueCreate(40, sizeof(JpegChunk));
if (jpeg_queue == nullptr) {
ESP_LOGE(TAG, "Failed to create JPEG queue");
throw std::runtime_error("Failed to create JPEG queue");
}
// We spawn a thread to encode the image to JPEG using optimized encoder (cost about 500ms and 8KB SRAM)
encoder_thread_ = std::thread([this, jpeg_queue]() {
uint16_t w = frame_.width ? frame_.width : 320;
uint16_t h = frame_.height ? frame_.height : 240;
v4l2_pix_fmt_t enc_fmt = frame_.format;
image_to_jpeg_cb(
frame_.data, frame_.len, w, h, enc_fmt, 80,
[](void* arg, size_t index, const void* data, size_t len) -> size_t {
auto jpeg_queue = (QueueHandle_t)arg;
JpegChunk chunk = {.data = (uint8_t*)heap_caps_aligned_alloc(16, len, MALLOC_CAP_SPIRAM), .len = len};
memcpy(chunk.data, data, len);
xQueueSend(jpeg_queue, &chunk, portMAX_DELAY);
return len;
},
jpeg_queue);
});
auto network = Board::GetInstance().GetNetwork();
auto http = network->CreateHttp(3);
// 构造multipart/form-data请求体
std::string boundary = "----ESP32_CAMERA_BOUNDARY";
// 配置HTTP客户端使用分块传输编码
http->SetHeader("Device-Id", SystemInfo::GetMacAddress().c_str());
http->SetHeader("Client-Id", Board::GetInstance().GetUuid().c_str());
if (!explain_token_.empty()) {
http->SetHeader("Authorization", "Bearer " + explain_token_);
}
http->SetHeader("Content-Type", "multipart/form-data; boundary=" + boundary);
http->SetHeader("Transfer-Encoding", "chunked");
if (!http->Open("POST", explain_url_)) {
ESP_LOGE(TAG, "Failed to connect to explain URL");
// Clear the queue
encoder_thread_.join();
JpegChunk chunk;
while (xQueueReceive(jpeg_queue, &chunk, portMAX_DELAY) == pdPASS) {
if (chunk.data != nullptr) {
heap_caps_free(chunk.data);
} else {
break;
}
}
vQueueDelete(jpeg_queue);
throw std::runtime_error("Failed to connect to explain URL");
}
{
// 第一块question字段
std::string question_field;
question_field += "--" + boundary + "\r\n";
question_field += "Content-Disposition: form-data; name=\"question\"\r\n";
question_field += "\r\n";
question_field += question + "\r\n";
http->Write(question_field.c_str(), question_field.size());
}
{
// 第二块:文件字段头部
std::string file_header;
file_header += "--" + boundary + "\r\n";
file_header += "Content-Disposition: form-data; name=\"file\"; filename=\"camera.jpg\"\r\n";
file_header += "Content-Type: image/jpeg\r\n";
file_header += "\r\n";
http->Write(file_header.c_str(), file_header.size());
}
// 第三块JPEG数据
size_t total_sent = 0;
while (true) {
JpegChunk chunk;
if (xQueueReceive(jpeg_queue, &chunk, portMAX_DELAY) != pdPASS) {
ESP_LOGE(TAG, "Failed to receive JPEG chunk");
break;
}
if (chunk.data == nullptr) {
break; // The last chunk
}
http->Write((const char*)chunk.data, chunk.len);
total_sent += chunk.len;
heap_caps_free(chunk.data);
}
// Wait for the encoder thread to finish
encoder_thread_.join();
// 清理队列
vQueueDelete(jpeg_queue);
{
// 第四块multipart尾部
std::string multipart_footer;
multipart_footer += "\r\n--" + boundary + "--\r\n";
http->Write(multipart_footer.c_str(), multipart_footer.size());
}
// 结束块
http->Write("", 0);
if (http->GetStatusCode() != 200) {
ESP_LOGE(TAG, "Failed to upload photo, status code: %d", http->GetStatusCode());
throw std::runtime_error("Failed to upload photo");
}
std::string result = http->ReadAll();
http->Close();
// Get remain task stack size
size_t remain_stack_size = uxTaskGetStackHighWaterMark(nullptr);
ESP_LOGI(TAG, "Explain image size=%d bytes, compressed size=%d, remain stack size=%d, question=%s\n%s",
(int)frame_.len, (int)total_sent, (int)remain_stack_size, question.c_str(), result.c_str());
return result;
}