Files
xiaozhi-esp32/main/boards/common/esp32_camera.cc
2025-05-25 17:07:07 +08:00

213 lines
6.9 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#include "esp32_camera.h"
#include "mcp_server.h"
#include "display.h"
#include "board.h"
#include <esp_log.h>
#include <esp_heap_caps.h>
#include <img_converters.h>
#include <cstring>
#define TAG "Esp32Camera"
Esp32Camera::Esp32Camera(const camera_config_t& config) {
jpeg_queue_ = xQueueCreate(10, sizeof(JpegChunk));
if (jpeg_queue_ == nullptr) {
ESP_LOGE(TAG, "Failed to create JPEG queue");
return;
}
// camera init
esp_err_t err = esp_camera_init(&config); // 配置上面定义的参数
if (err != ESP_OK) {
ESP_LOGE(TAG, "Camera init failed with error 0x%x", err);
return;
}
sensor_t *s = esp_camera_sensor_get(); // 获取摄像头型号
if (s->id.PID == GC0308_PID) {
s->set_hmirror(s, 0); // 这里控制摄像头镜像 写1镜像 写0不镜像
}
// 初始化预览图片的内存
memset(&preview_image_, 0, sizeof(preview_image_));
preview_image_.header.magic = LV_IMAGE_HEADER_MAGIC;
preview_image_.header.cf = LV_COLOR_FORMAT_RGB565;
preview_image_.header.flags = LV_IMAGE_FLAGS_ALLOCATED | LV_IMAGE_FLAGS_MODIFIABLE;
if (config.frame_size == FRAMESIZE_VGA) {
preview_image_.header.w = 640;
preview_image_.header.h = 480;
} else if (config.frame_size == FRAMESIZE_QVGA) {
preview_image_.header.w = 320;
preview_image_.header.h = 240;
}
preview_image_.header.stride = preview_image_.header.w * 2;
preview_image_.data_size = preview_image_.header.w * preview_image_.header.h * 2;
preview_image_.data = (uint8_t*)heap_caps_malloc(preview_image_.data_size, MALLOC_CAP_SPIRAM);
if (preview_image_.data == nullptr) {
ESP_LOGE(TAG, "Failed to allocate memory for preview image");
return;
}
}
Esp32Camera::~Esp32Camera() {
if (fb_) {
esp_camera_fb_return(fb_);
fb_ = nullptr;
}
if (preview_image_.data) {
heap_caps_free((void*)preview_image_.data);
preview_image_.data = nullptr;
}
esp_camera_deinit();
if (jpeg_queue_ != nullptr) {
vQueueDelete(jpeg_queue_);
jpeg_queue_ = nullptr;
}
}
void Esp32Camera::SetExplainUrl(const std::string& url, const std::string& token) {
explain_url_ = url;
explain_token_ = token;
}
bool Esp32Camera::Capture() {
if (preview_thread_.joinable()) {
preview_thread_.join();
}
if (encoder_thread_.joinable()) {
encoder_thread_.join();
}
int frames_to_get = 2;
// Try to get a stable frame
for (int i = 0; i < frames_to_get; i++) {
if (fb_ != nullptr) {
esp_camera_fb_return(fb_);
}
fb_ = esp_camera_fb_get();
if (fb_ == nullptr) {
ESP_LOGE(TAG, "Camera capture failed");
return false;
}
}
preview_thread_ = std::thread([this]() {
// 显示预览图片
auto display = Board::GetInstance().GetDisplay();
if (display != nullptr) {
auto src = (uint16_t*)fb_->buf;
auto dst = (uint16_t*)preview_image_.data;
size_t pixel_count = fb_->len / 2;
for (size_t i = 0; i < pixel_count; i++) {
// 交换每个16位字内的字节
dst[i] = __builtin_bswap16(src[i]);
}
display->SetPreviewImage(&preview_image_);
}
});
return true;
}
std::string Esp32Camera::Explain(const std::string& question) {
if (explain_url_.empty() || explain_token_.empty()) {
return "{\"success\": false, \"message\": \"Image explain URL or token is not set\"}";
}
// We spawn a thread to encode the image to JPEG
encoder_thread_ = std::thread([this]() {
frame2jpg_cb(fb_, 80, [](void* arg, size_t index, const void* data, size_t len) -> unsigned int {
auto jpeg_queue = (QueueHandle_t)arg;
JpegChunk chunk = {
.data = (uint8_t*)heap_caps_malloc(len, MALLOC_CAP_SPIRAM),
.len = len
};
memcpy(chunk.data, data, len);
xQueueSend(jpeg_queue, &chunk, portMAX_DELAY);
return len;
}, jpeg_queue_);
});
auto http = Board::GetInstance().CreateHttp();
// 构造multipart/form-data请求体
std::string boundary = "----ESP32_CAMERA_BOUNDARY";
// 构造question字段
std::string question_field;
question_field += "--" + boundary + "\r\n";
question_field += "Content-Disposition: form-data; name=\"question\"\r\n";
question_field += "\r\n";
question_field += question + "\r\n";
// 构造文件字段头部
std::string file_header;
file_header += "--" + boundary + "\r\n";
file_header += "Content-Disposition: form-data; name=\"file\"; filename=\"camera.jpg\"\r\n";
file_header += "Content-Type: image/jpeg\r\n";
file_header += "\r\n";
// 构造尾部
std::string multipart_footer;
multipart_footer += "\r\n--" + boundary + "--\r\n";
// 配置HTTP客户端使用分块传输编码
http->SetHeader("Authorization", "Bearer " + explain_token_);
http->SetHeader("Content-Type", "multipart/form-data; boundary=" + boundary);
http->SetHeader("Transfer-Encoding", "chunked");
if (!http->Open("POST", explain_url_)) {
ESP_LOGE(TAG, "Failed to connect to explain URL");
// Clear the queue
encoder_thread_.join();
JpegChunk chunk;
while (xQueueReceive(jpeg_queue_, &chunk, portMAX_DELAY) == pdPASS) {
if (chunk.data != nullptr) {
heap_caps_free(chunk.data);
} else {
break;
}
}
return "{\"success\": false, \"message\": \"Failed to connect to explain URL\"}";
}
// 第一块question字段
http->Write(question_field.c_str(), question_field.size());
// 第二块:文件字段头部
http->Write(file_header.c_str(), file_header.size());
// 第三块JPEG数据
size_t total_sent = 0;
while (true) {
JpegChunk chunk;
if (xQueueReceive(jpeg_queue_, &chunk, portMAX_DELAY) != pdPASS) {
ESP_LOGE(TAG, "Failed to receive JPEG chunk");
break;
}
if (chunk.data == nullptr) {
break; // The last chunk
}
http->Write((const char*)chunk.data, chunk.len);
total_sent += chunk.len;
heap_caps_free(chunk.data);
}
// 第四块multipart尾部
http->Write(multipart_footer.c_str(), multipart_footer.size());
// 结束块
http->Write("", 0);
if (http->GetStatusCode() != 200) {
ESP_LOGE(TAG, "Failed to upload photo, status code: %d", http->GetStatusCode());
return "{\"success\": false, \"message\": \"Failed to upload photo\"}";
}
std::string result = http->ReadAll();
http->Close();
ESP_LOGI(TAG, "Explain image size=%dx%d, compressed size=%d, question=%s\n%s", fb_->width, fb_->height, total_sent, question.c_str(), result.c_str());
return result;
}