feat: support JPEG input (#1455)

This commit is contained in:
laride
2025-11-18 20:34:22 +08:00
committed by GitHub
parent 511349a7bd
commit 860d12a12c
7 changed files with 449 additions and 21 deletions

View File

@@ -3,24 +3,31 @@
#include <sys/mman.h>
#include <sys/param.h>
#include <unistd.h>
#include <errno.h>
#include <esp_heap_caps.h>
#include <cstdio>
#include <cstring>
#include "esp_imgfx_color_convert.h"
#include "esp_video_device.h"
#include "esp_video_init.h"
#include "linux/videodev2.h"
#include "esp32_camera.h"
#include "board.h"
#include "display.h"
#include "esp32_camera.h"
#include "esp_jpeg_common.h"
#include "jpg/image_to_jpeg.h"
#include "jpg/jpeg_to_image.h"
#include "lvgl_display.h"
#include "mcp_server.h"
#include "system_info.h"
#include "jpg/image_to_jpeg.h"
#ifdef CONFIG_XIAOZHI_ENABLE_CAMERA_DEBUG_MODE
#undef LOG_LOCAL_LEVEL
#define LOG_LOCAL_LEVEL MAX(CONFIG_LOG_DEFAULT_LEVEL, ESP_LOG_DEBUG)
#endif // CONFIG_XIAOZHI_ENABLE_CAMERA_DEBUG_MODE
#include <esp_log.h> // should be after LOCAL_LOG_LEVEL definition
#ifdef CONFIG_XIAOZHI_ENABLE_ROTATE_CAMERA_IMAGE
#ifdef CONFIG_IDF_TARGET_ESP32P4
@@ -44,11 +51,6 @@
#endif // target
#endif // CONFIG_XIAOZHI_ENABLE_ROTATE_CAMERA_IMAGE
#include <errno.h>
#include <esp_heap_caps.h>
#include <esp_log.h>
#include <cstdio>
#include <cstring>
#define TAG "Esp32Camera"
@@ -128,7 +130,7 @@ Esp32Camera::Esp32Camera(const esp_video_init_config_t& config) {
#endif
#if CONFIG_ESP_VIDEO_ENABLE_USB_UVC_VIDEO_DEVICE
else if (config.usb_uvc != nullptr) {
video_device_name = ESP_VIDEO_USB_UVC_DEVICE_NAME(config.usb_uvc->uvc.uvc_dev_num);
video_device_name = ESP_VIDEO_USB_UVC_DEVICE_NAME(0);
}
#endif
@@ -196,7 +198,7 @@ Esp32Camera::Esp32Camera(const esp_video_init_config_t& config) {
case V4L2_PIX_FMT_RGB565:
return 1;
#ifdef CONFIG_XIAOZHI_ENABLE_HARDWARE_JPEG_ENCODER
case V4L2_PIX_FMT_YUV420: // 软件 JPEG 编码器不支持 YUV420 格式
case V4L2_PIX_FMT_YUV420: // 软件 JPEG 编码器不支持 YUV420 格式
return 2;
#endif // CONFIG_XIAOZHI_ENABLE_HARDWARE_JPEG_ENCODER
case V4L2_PIX_FMT_GREY:
@@ -209,17 +211,21 @@ case V4L2_PIX_FMT_YUV420: // 软件 JPEG 编码器不支持 YUV420 格式
auto get_rank = [](uint32_t fmt) -> int {
switch (fmt) {
case V4L2_PIX_FMT_YUV422P:
return 0;
return 10;
case V4L2_PIX_FMT_RGB565:
return 1;
return 11;
case V4L2_PIX_FMT_RGB24:
return 2;
return 12;
#ifdef CONFIG_XIAOZHI_ENABLE_HARDWARE_JPEG_ENCODER
case V4L2_PIX_FMT_YUV420:
return 3;
return 13;
#endif // CONFIG_XIAOZHI_ENABLE_HARDWARE_JPEG_ENCODER
#ifdef CONFIG_XIAOZHI_CAMERA_ALLOW_JPEG_INPUT
case V4L2_PIX_FMT_JPEG:
return 5;
#endif // CONFIG_XIAOZHI_CAMERA_ALLOW_JPEG_INPUT
case V4L2_PIX_FMT_GREY:
return 4;
return 20;
default:
return 1 << 29; // unsupported
}
@@ -404,7 +410,7 @@ bool Esp32Camera::Capture() {
frame_.len = buf.bytesused;
frame_.data = (uint8_t*)heap_caps_malloc(frame_.len, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT);
if (!frame_.data) {
ESP_LOGE(TAG, "alloc frame copy failed");
ESP_LOGE(TAG, "alloc frame copy failed: need allocate %d bytes", buf.bytesused);
if (ioctl(video_fd_, VIDIOC_QBUF, &buf) != 0) {
ESP_LOGE(TAG, "Cleanup: VIDIOC_QBUF failed");
}
@@ -427,6 +433,9 @@ bool Esp32Camera::Capture() {
case V4L2_PIX_FMT_YUYV:
case V4L2_PIX_FMT_YUV420:
case V4L2_PIX_FMT_GREY:
#ifdef CONFIG_XIAOZHI_CAMERA_ALLOW_JPEG_INPUT
case V4L2_PIX_FMT_JPEG:
#endif // CONFIG_XIAOZHI_CAMERA_ALLOW_JPEG_INPUT
#ifdef CONFIG_XIAOZHI_ENABLE_CAMERA_ENDIANNESS_SWAP
{
auto src16 = (uint16_t*)mmap_buffers_[buf.index].start;
@@ -791,6 +800,33 @@ bool Esp32Camera::Capture() {
lvgl_image_size = frame_.len; // fallthrough 时兼顾 YUYV 与 RGB565
break;
#ifdef CONFIG_XIAOZHI_CAMERA_ALLOW_JPEG_INPUT
case V4L2_PIX_FMT_JPEG: {
uint8_t* out_data = nullptr; // out data is allocated by jpeg_to_image
size_t out_len = 0;
size_t out_width = 0;
size_t out_height = 0;
size_t out_stride = 0;
esp_err_t ret =
jpeg_to_image(frame_.data, frame_.len, &out_data, &out_len, &out_width, &out_height, &out_stride);
if (ret != ESP_OK) {
ESP_LOGE(TAG, "Failed to decode JPEG image: %d (%s)", (int)ret, esp_err_to_name(ret));
if (out_data) {
heap_caps_free(out_data);
out_data = nullptr;
}
return false;
}
data = out_data;
w = out_width;
h = out_height;
lvgl_image_size = out_len;
stride = out_stride;
break;
}
#endif
default:
ESP_LOGE(TAG, "unsupported frame format: 0x%08lx", frame_.format);
return false;
@@ -876,16 +912,31 @@ std::string Esp32Camera::Explain(const std::string& question) {
uint16_t w = frame_.width ? frame_.width : 320;
uint16_t h = frame_.height ? frame_.height : 240;
v4l2_pix_fmt_t enc_fmt = frame_.format;
image_to_jpeg_cb(
bool ok = image_to_jpeg_cb(
frame_.data, frame_.len, w, h, enc_fmt, 80,
[](void* arg, size_t index, const void* data, size_t len) -> size_t {
auto jpeg_queue = (QueueHandle_t)arg;
JpegChunk chunk = {.data = (uint8_t*)heap_caps_aligned_alloc(16, len, MALLOC_CAP_SPIRAM), .len = len};
memcpy(chunk.data, data, len);
auto jpeg_queue = static_cast<QueueHandle_t>(arg);
JpegChunk chunk = {.data = nullptr, .len = len};
if (index == 0 && data != nullptr && len > 0) {
chunk.data = (uint8_t*)heap_caps_aligned_alloc(16, len, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT);
if (chunk.data == nullptr) {
ESP_LOGE(TAG, "Failed to allocate %zu bytes for JPEG chunk", len);
chunk.len = 0;
} else {
memcpy(chunk.data, data, len);
}
} else {
chunk.len = 0; // Sentinel or error
}
xQueueSend(jpeg_queue, &chunk, portMAX_DELAY);
return len;
},
jpeg_queue);
if (!ok) {
JpegChunk chunk = {.data = nullptr, .len = 0};
xQueueSend(jpeg_queue, &chunk, portMAX_DELAY);
}
});
auto network = Board::GetInstance().GetNetwork();
@@ -938,6 +989,7 @@ std::string Esp32Camera::Explain(const std::string& question) {
// 第三块JPEG数据
size_t total_sent = 0;
bool saw_terminator = false;
while (true) {
JpegChunk chunk;
if (xQueueReceive(jpeg_queue, &chunk, portMAX_DELAY) != pdPASS) {
@@ -945,6 +997,7 @@ std::string Esp32Camera::Explain(const std::string& question) {
break;
}
if (chunk.data == nullptr) {
saw_terminator = true;
break; // The last chunk
}
http->Write((const char*)chunk.data, chunk.len);
@@ -956,6 +1009,11 @@ std::string Esp32Camera::Explain(const std::string& question) {
// 清理队列
vQueueDelete(jpeg_queue);
if (!saw_terminator || total_sent == 0) {
ESP_LOGE(TAG, "JPEG encoder failed or produced empty output");
throw std::runtime_error("Failed to encode image to JPEG");
}
{
// 第四块multipart尾部
std::string multipart_footer;