diff --git a/main/application.cc b/main/application.cc index 5a51c742..3cdfe974 100644 --- a/main/application.cc +++ b/main/application.cc @@ -788,13 +788,35 @@ bool Application::UpgradeFirmware(Ota& ota, const std::string& url) { } void Application::WakeWordInvoke(const std::string& wake_word) { + if (!protocol_) { + return; + } + if (device_state_ == kDeviceStateIdle) { - ToggleChatState(); - Schedule([this, wake_word]() { - if (protocol_) { - protocol_->SendWakeWordDetected(wake_word); + audio_service_.EncodeWakeWord(); + + if (!protocol_->IsAudioChannelOpened()) { + SetDeviceState(kDeviceStateConnecting); + if (!protocol_->OpenAudioChannel()) { + audio_service_.EnableWakeWordDetection(true); + return; } - }); + } + + ESP_LOGI(TAG, "Wake word detected: %s", wake_word.c_str()); +#if CONFIG_USE_AFE_WAKE_WORD || CONFIG_USE_CUSTOM_WAKE_WORD + // Encode and send the wake word data to the server + while (auto packet = audio_service_.PopWakeWordPacket()) { + protocol_->SendAudio(std::move(packet)); + } + // Set the chat state to wake word detected + protocol_->SendWakeWordDetected(wake_word); + SetListeningMode(aec_mode_ == kAecOff ? kListeningModeAutoStop : kListeningModeRealtime); +#else + SetListeningMode(aec_mode_ == kAecOff ? kListeningModeAutoStop : kListeningModeRealtime); + // Play the pop up sound to indicate the wake word is detected + audio_service_.PlaySound(Lang::Sounds::OGG_POPUP); +#endif } else if (device_state_ == kDeviceStateSpeaking) { Schedule([this]() { AbortSpeaking(kAbortReasonNone); diff --git a/main/boards/sensecap-watcher/sensecap_watcher.cc b/main/boards/sensecap-watcher/sensecap_watcher.cc index bdc73511..2b1c56d0 100644 --- a/main/boards/sensecap-watcher/sensecap_watcher.cc +++ b/main/boards/sensecap-watcher/sensecap_watcher.cc @@ -12,7 +12,7 @@ #include "lvgl_theme.h" #include -#include "esp_check.h" +#include #include #include #include @@ -28,6 +28,7 @@ #include #include #include +#include #include "assets/lang_config.h" @@ -492,6 +493,47 @@ private: }; ESP_ERROR_CHECK(esp_console_cmd_register(&cmd5)); + const esp_console_cmd_t cmd6 = { + .command = "version", + .help = "Read version info", + .hint = NULL, + .func = NULL, + .argtable = NULL, + .func_w_context = [](void *context,int argc, char** argv) -> int { + auto self = static_cast(context); + auto app_desc = esp_app_get_description(); + const char* region = "UNKNOWN"; + #if defined(CONFIG_LANGUAGE_ZH_CN) + region = "CN"; + #elif defined(CONFIG_LANGUAGE_EN_US) + region = "US"; + #elif defined(CONFIG_LANGUAGE_JA_JP) + region = "JP"; + #elif defined(CONFIG_LANGUAGE_ES_ES) + region = "ES"; + #elif defined(CONFIG_LANGUAGE_DE_DE) + region = "DE"; + #elif defined(CONFIG_LANGUAGE_FR_FR) + region = "FR"; + #elif defined(CONFIG_LANGUAGE_IT_IT) + region = "IT"; + #elif defined(CONFIG_LANGUAGE_PT_PT) + region = "PT"; + #elif defined(CONFIG_LANGUAGE_RU_RU) + region = "RU"; + #elif defined(CONFIG_LANGUAGE_KO_KR) + region = "KR"; + #endif + printf("{\"type\":0,\"name\":\"VER?\",\"code\":0,\"data\":{\"software\":\"%s\",\"hardware\":\"watcher xiaozhi agent\",\"camera\":%d,\"region\":\"%s\"}}\n", + app_desc->version, + self->GetCamera() == nullptr ? 0 : 1, + region); + return 0; + }, + .context =this + }; + ESP_ERROR_CHECK(esp_console_cmd_register(&cmd6)); + esp_console_dev_uart_config_t hw_config = ESP_CONSOLE_DEV_UART_CONFIG_DEFAULT(); ESP_ERROR_CHECK(esp_console_new_repl_uart(&hw_config, &repl_config, &repl)); ESP_ERROR_CHECK(esp_console_start_repl(repl)); diff --git a/main/boards/sensecap-watcher/sscma_camera.cc b/main/boards/sensecap-watcher/sscma_camera.cc index 07998a29..2d875c5b 100644 --- a/main/boards/sensecap-watcher/sscma_camera.cc +++ b/main/boards/sensecap-watcher/sscma_camera.cc @@ -5,10 +5,12 @@ #include "board.h" #include "system_info.h" #include "config.h" +#include "settings.h" #include #include #include +#include "application.h" #define TAG "SscmaCamera" @@ -47,28 +49,190 @@ SscmaCamera::SscmaCamera(esp_io_expander_handle_t io_exp_handle) { sscma_client_callback_t callback = {0}; + detection_state = SscmaCamera::IDLE; + state_start_time = 0; + need_start_cooldown = false; callback.on_event = [](sscma_client_handle_t client, const sscma_client_reply_t *reply, void *user_ctx) { SscmaCamera* self = static_cast(user_ctx); if (!self) return; + char *img = NULL; int img_size = 0; - if (sscma_utils_fetch_image_from_reply(reply, &img, &img_size) == ESP_OK) - { - ESP_LOGI(TAG, "image_size: %d\n", img_size); - // 将数据通过队列发送出去 - SscmaData data; - data.img = (uint8_t*)img; - data.len = img_size; + int box_count = 0; + sscma_client_box_t *boxes = NULL; + int class_count = 0; + sscma_client_class_t *classes = NULL; + int point_count = 0; + sscma_client_point_t *points = NULL; + int model_type = 0; + int obj_cnt = 0; - // 清空队列,保证只保存最新的数据 - SscmaData dummy; - while (xQueueReceive(self->sscma_data_queue_, &dummy, 0) == pdPASS) { - if (dummy.img) { - heap_caps_free(dummy.img); + int width = 0, height = 0; + cJSON *data = cJSON_GetObjectItem(reply->payload, "data"); + if (data != NULL && cJSON_IsObject(data)) { + cJSON *resolution = cJSON_GetObjectItem(data, "resolution"); + if (data != NULL && cJSON_IsArray(resolution) && cJSON_GetArraySize(resolution) == 2) { + width = cJSON_GetArrayItem(resolution, 0)->valueint; + height = cJSON_GetArrayItem(resolution, 1)->valueint; + } + } + + switch ((width+height)) { + case (416+416): + { + bool is_object_detected = false; + bool is_need_wake = false; + + // 定期更新检测配置参数,避免频繁NVS访问 + int64_t cur_tm = esp_timer_get_time(); + + // 尝试获取检测框数据(目标检测模型) + if (sscma_utils_fetch_boxes_from_reply(reply, &boxes, &box_count) == ESP_OK && box_count > 0) { + for (int i = 0; i < box_count; i++) { + ESP_LOGI(TAG, "[box %d]: x=%d, y=%d, w=%d, h=%d, score=%d, target=%d", i, \ + boxes[i].x, boxes[i].y, boxes[i].w, boxes[i].h, boxes[i].score, boxes[i].target); + if (boxes[i].target == self->detect_target && boxes[i].score > self->detect_threshold) { + is_object_detected = true; + model_type = 0; + obj_cnt++; + break; + } + } + + } else if (sscma_utils_fetch_classes_from_reply(reply, &classes, &class_count) == ESP_OK && class_count > 0) { + // 尝试获取分类数据(分类模型) + for (int i = 0; i < class_count; i++) { + ESP_LOGI(TAG, "[class %d]: target=%d, score=%d", i, + classes[i].target, classes[i].score); + if (classes[i].target == self->detect_target && classes[i].score > self->detect_threshold) { + is_object_detected = true; + model_type = 1; + obj_cnt++; + } + } + } else if (sscma_utils_fetch_points_from_reply(reply, &points, &point_count) == ESP_OK && point_count > 0) { + // 尝试获取关键点数据(姿态估计模型) + for (int i = 0; i < point_count; i++) { + ESP_LOGI(TAG, "[point %d]: x=%d, y=%d, z=%d, score=%d, target=%d", i, + points[i].x, points[i].y, points[i].z, points[i].score, points[i].target); + if (points[i].target == self->detect_target && points[i].score > self->detect_threshold) { + is_object_detected = true; + model_type = 2; + obj_cnt++; + } + } + } + + // 如果需要开始冷却期,现在开始计时 + if (self->need_start_cooldown) { // 回调暂停,标志保持,等待回调恢复后开始计时 + self->state_start_time = cur_tm; + self->need_start_cooldown = false; + ESP_LOGI(TAG, "Starting cooldown timer"); + } + + // 状态机驱动的检测逻辑 - 只在人员出现时触发 + switch (self->detection_state) { + case SscmaCamera::IDLE: + if (is_object_detected) { + // 人员出现,开始验证(这是从无到有的转换) + self->detection_state = SscmaCamera::VALIDATING; + self->state_start_time = cur_tm; // 记录物体出现时间 + self->last_detected_time = cur_tm; // 初始化最后检测时间 + ESP_LOGI(TAG, "object appeared, starting validation"); + } + break; + + case SscmaCamera::VALIDATING: + if (is_object_detected) { + // 更新最后检测到的时间 + self->last_detected_time = cur_tm; + // 检查是否验证足够时间 + if ((cur_tm - self->state_start_time) >= (self->detect_duration_sec * 1000000)) { + is_need_wake = true; + } + } else { + // 验证期间人员离开,检查去抖动时间 + if (self->last_detected_time > 0 && + (cur_tm - self->last_detected_time) >= self->detect_debounce_sec * 1000000LL) { + // 去抖动时间已过,确认人员已离开,回到空闲 + self->detection_state = SscmaCamera::IDLE; + self->last_detected_time = 0; + ESP_LOGI(TAG, "object left during validation (debounced), back to idle"); + } + } + break; + + case SscmaCamera::COOLDOWN: + // 冷却期,需要满足两个条件:1)object离开 2)过了15秒 + if (!is_object_detected && + (cur_tm - self->state_start_time) >= (self->detect_invoke_interval_sec * 1000000LL)) { + // object离开且冷却时间到,回到空闲状态 + self->detection_state = SscmaCamera::IDLE; + ESP_LOGI(TAG, "Cooldown complete and object left, back to idle - ready for next appearance"); + } + // 其他情况继续保持冷却状态 + break; + } + + + if( is_need_wake ) { + ESP_LOGI(TAG, "Validation complete, triggering conversation (type=%d, res=%dx%d)", + self->detect_target, width, height); + + // 触发对话 + std::string wake_word; + if ( model_type == 0 ) { + std::string cached_target_name = "object"; + if( self->model != NULL && self->model->classes[self->detect_target] != NULL ) { + cached_target_name = self->model->classes[self->detect_target]; + } + wake_word = "" + std::to_string(obj_cnt) + " " + cached_target_name + " detected "; + } else if ( model_type == 1 ) { + std::string cached_target_name = "object"; + if( self->model != NULL && self->model->classes[self->detect_target] != NULL ) { + cached_target_name = self->model->classes[self->detect_target]; + } + wake_word = "" + std::to_string(obj_cnt) + " " + cached_target_name + " detected "; + } else if ( model_type == 2 ) { + std::string cached_target_name = "object"; + if( self->model != NULL && self->model->classes[self->detect_target] != NULL ) { + cached_target_name = self->model->classes[self->detect_target]; + } + wake_word = "" + std::to_string(obj_cnt) + " " + cached_target_name + " detected "; + } + printf("wake_word:%s\n", wake_word.c_str()); + Application::GetInstance().WakeWordInvoke(wake_word); + + // 进入冷却状态,标记需要开始冷却期;如下变量将在会话结束后被使用,等待回调恢复后开始计时 + self->detection_state = SscmaCamera::COOLDOWN; + self->need_start_cooldown = true; } } - xQueueSend(self->sscma_data_queue_, &data, 0); - // 注意:img 的释放由接收方负责 + break; + case (640+480): + + if (sscma_utils_fetch_image_from_reply(reply, &img, &img_size) == ESP_OK) + { + ESP_LOGI(TAG, "image_size: %d\n", img_size); + // 将数据通过队列发送出去 + SscmaData data; + data.img = (uint8_t*)img; + data.len = img_size; + + // 清空队列,保证只保存最新的数据 + SscmaData dummy; + while (xQueueReceive(self->sscma_data_queue_, &dummy, 0) == pdPASS) { + if (dummy.img) { + heap_caps_free(dummy.img); + } + } + xQueueSend(self->sscma_data_queue_, &data, 0); + // 注意:img 的释放由接收方负责 + } + break; + default: + ESP_LOGI(TAG, "unknown resolution"); + break; } }; callback.on_connect = [](sscma_client_handle_t client, const sscma_client_reply_t *reply, void *user_ctx) { @@ -148,6 +312,57 @@ SscmaCamera::SscmaCamera(esp_io_expander_handle_t io_exp_handle) { ESP_LOGE(TAG, "Failed to allocate memory for preview image"); return; } + + sscma_client_set_model(sscma_client_handle_, 4); + model_class_cnt = 0; + if (sscma_client_get_model(sscma_client_handle_, &model, true) == ESP_OK) { + printf("ID: %d\n", model->id ? model->id : -1); + printf("UUID: %s\n", model->uuid ? model->uuid : "N/A"); + printf("Name: %s\n", model->name ? model->name : "N/A"); + printf("Version: %s\n", model->ver ? model->ver : "N/A"); + printf("URL: %s\n", model->url ? model->url : "N/A"); + printf("Checksum: %s\n", model->checksum ? model->checksum : "N/A"); + printf("Classes:\n"); + if (model->classes[0] != NULL) + { + for (int i = 0; model->classes[i] != NULL; i++) + { + printf(" - %s\n", model->classes[i]); + model_class_cnt++; + } + } else { + printf(" N/A\n"); + } + } else { + printf("get model failed\n"); + } + + ESP_LOGI(TAG, "initialize mcp tools"); + InitializeMcpTools(); + + xTaskCreate([](void* arg) { + auto this_ = (SscmaCamera*)arg; + bool is_inference = false; + while (true) + { + if (this_->inference_en && Application::GetInstance().GetDeviceState() == kDeviceStateIdle ) { + if (!is_inference) { + ESP_LOGI(TAG, "Start inference (enable=1)"); + sscma_client_break(this_->sscma_client_handle_); + sscma_client_set_model(this_->sscma_client_handle_, 4); + sscma_client_set_sensor(this_->sscma_client_handle_, 1, 1, true); // 设置分辨率 416X416 + sscma_client_invoke(this_->sscma_client_handle_, -1, false, true); + is_inference = true; + } + } else if (is_inference && (!this_->inference_en || Application::GetInstance().GetDeviceState() != kDeviceStateIdle)) { + ESP_LOGI(TAG, "Stop inference (enable=%d state=%d)", this_->inference_en, Application::GetInstance().GetDeviceState()); + is_inference = false; + sscma_client_break(this_->sscma_client_handle_); + } + vTaskDelay(pdMS_TO_TICKS(200)); + } + }, "sscma_camera", 4096, this, 1, nullptr); + } SscmaCamera::~SscmaCamera() { @@ -179,6 +394,121 @@ SscmaCamera::~SscmaCamera() { } } +void SscmaCamera::InitializeMcpTools() { + + Settings settings("model", false); + detect_threshold = settings.GetInt("threshold", 75); + detect_invoke_interval_sec = settings.GetInt("interval", 8); + detect_duration_sec = settings.GetInt("duration", 2); + detect_target = settings.GetInt("target", 0); + inference_en = settings.GetInt("enable", 0); + + auto& mcp_server = McpServer::GetInstance(); + // 获取模型参数配置 + mcp_server.AddTool("self.model.param_get", + "获取模型参数配置\n" + " `threshold`: 检测置信度阈值 (0-100, 默认 75);\n" + " `interval`: 对话结束后的冷却时间,防止频繁打断 (默认 8 秒);\n" + " `duration`: 检测持续时间 (默认 2 秒);\n" + " `target`: 检测目标 (默认 0);", + PropertyList(), + [this](const PropertyList& properties) -> ReturnValue { + Settings settings("model", false); + int threshold = settings.GetInt("threshold", 75); + int interval = settings.GetInt("interval", 8); + int duration = settings.GetInt("duration", 2); + int target_type = settings.GetInt("target", 0); + + std::string result = "{\"threshold\":" + std::to_string(threshold) + + ",\"interval\":" + std::to_string(interval) + + ",\"duration\":" + std::to_string(duration) + + ",\"target_type\":" + std::to_string(target_type) + "}"; + return result; + }); + + + // 设置模型参数配置 + mcp_server.AddTool("self.model.param_set", + "模型参数设置\n" + " `threshold`: 检测置信度阈值 (单位百分比, 默认 75);" + " `interval`: 对话结束后的冷却时间,防止频繁打断 (单位秒,默认 8 秒);" + " `duration`: 检测持续时间 (单位秒,默认 2 秒);" + " `target`: 检测目标 (默认 0);", + PropertyList({ + Property("threshold", kPropertyTypeInteger, 75, 0, 100), + Property("interval", kPropertyTypeInteger, 8, 1, 60), + Property("duration", kPropertyTypeInteger, 2, 1, 60), + Property("target", kPropertyTypeInteger, 0, 0, this->model_class_cnt > 0 ? this->model_class_cnt - 1 : 0) + }), + [this](const PropertyList& properties) -> ReturnValue { + Settings settings("model", true); + try { + const Property& threshold_prop = properties["threshold"]; + int threshold = threshold_prop.value(); + settings.SetInt("threshold", threshold); + this->detect_threshold = threshold; + ESP_LOGI(TAG, "Set detection threshold to %d", threshold); + } catch (const std::runtime_error&) { + // threshold parameter not provided, skip + } + + try { + const Property& interval_prop = properties["interval"]; + int interval = interval_prop.value(); + settings.SetInt("interval", interval); + this->detect_invoke_interval_sec = interval; + ESP_LOGI(TAG, "Set detection interval to %d", interval); + } catch (const std::runtime_error&) { + // interval parameter not provided, skip + } + + try { + const Property& duration_prop = properties["duration"]; + int duration = duration_prop.value(); + settings.SetInt("duration", duration); + this->detect_duration_sec = duration; + } catch (const std::runtime_error&) { + // duration parameter not provided, skip + } + + try { + const Property& target_prop = properties["target"]; + int target = target_prop.value(); + settings.SetInt("target", target); + this->detect_target = target; + ESP_LOGI(TAG, "Set detection target to %d", target); + } catch (const std::runtime_error&) { + // target_type parameter not provided, skip + } + + return "{\"status\": \"success\", \"message\": \"Detection configuration updated\"}"; + }); + + // 推理开关获取 + mcp_server.AddTool("self.model.enable", + "控制推理开关\n" + " 读取/设置推理是否开启; 0=关闭, 1=开启\n" + "可选字段: `enable`\n", + PropertyList({ + Property("enable", kPropertyTypeInteger, inference_en, 0, 1) + }), + [this](const PropertyList& properties) -> ReturnValue { + Settings settings("model", true); + try { + const Property& enable_prop = properties["enable"]; + int en = enable_prop.value(); + settings.SetInt("enable", en); + this->inference_en = en; + ESP_LOGI(TAG, "Set inference enable to %d", en); + } catch (const std::runtime_error&) { + // enable not provided -> treat as query + } + // 返回当前配置 + int cur_en = settings.GetInt("enable", this->inference_en); + return std::string("{\"enable\":") + std::to_string(cur_en) + "}"; + }); +} + void SscmaCamera::SetExplainUrl(const std::string& url, const std::string& token) { explain_url_ = url; explain_token_ = token; @@ -194,8 +524,11 @@ bool SscmaCamera::Capture() { return false; } + if (sscma_client_set_sensor(sscma_client_handle_, 1, 3, true)) { + ESP_LOGE(TAG, "Failed to set sensor"); + return false; + } ESP_LOGI(TAG, "Capturing image..."); - // himax 有缓存数据,需要拍两张照片, 只获取最新的照片即可. if (sscma_client_sample(sscma_client_handle_, 2) ) { ESP_LOGE(TAG, "Failed to capture image from SSCMA client"); @@ -245,7 +578,19 @@ bool SscmaCamera::Capture() { // 显示预览图片 auto display = dynamic_cast(Board::GetInstance().GetDisplay()); if (display != nullptr) { - auto image = std::make_unique(&preview_image_); + uint16_t w = preview_image_.header.w; + uint16_t h = preview_image_.header.h; + size_t image_size = w * h * 2; + size_t stride = preview_image_.header.w * 2; + + uint8_t* data = (uint8_t*)heap_caps_malloc(image_size, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT); + if (data == nullptr) { + ESP_LOGE(TAG, "Failed to allocate memory for display image"); + return true; + } + memcpy(data, preview_image_.data, image_size); + + auto image = std::make_unique(data, image_size, w, h, stride, LV_COLOR_FORMAT_RGB565); display->SetPreviewImage(std::move(image)); } return true; diff --git a/main/boards/sensecap-watcher/sscma_camera.h b/main/boards/sensecap-watcher/sscma_camera.h index 731b8da0..e798508c 100644 --- a/main/boards/sensecap-watcher/sscma_camera.h +++ b/main/boards/sensecap-watcher/sscma_camera.h @@ -1,6 +1,7 @@ #ifndef SSCMA_CAMERA_H #define SSCMA_CAMERA_H +#include #include #include #include @@ -35,9 +36,31 @@ private: jpeg_dec_handle_t jpeg_dec_; jpeg_dec_io_t *jpeg_io_; jpeg_dec_header_info_t *jpeg_out_; + // 检测状态机 + enum DetectionState { + IDLE, // 空闲状态 + VALIDATING, // 验证中(连续检测3秒) + COOLDOWN // 冷却期(等待重新检测) + }; + + DetectionState detection_state = IDLE; + int64_t state_start_time = 0; + bool need_start_cooldown = false; // 是否需要开始冷却期 + int64_t last_detected_time = 0; // 验证期间最后一次检测到物体的时间 + + int detect_target = 0; + int detect_threshold = 75; + int detect_duration_sec = 2; // 检测持续时间2秒,确认人员持续存在 + int detect_invoke_interval_sec = 8; // 默认15秒冷却期,避免频繁开始会话 + int detect_debounce_sec = 1; // 验证期间人员离开的去抖动时间1秒 + int inference_en = 0; // 推理使能开关(0: 关闭, 1: 开启) + + sscma_client_model_t *model; + int model_class_cnt = 0; public: SscmaCamera(esp_io_expander_handle_t io_exp_handle); ~SscmaCamera(); + void InitializeMcpTools(); virtual void SetExplainUrl(const std::string& url, const std::string& token); virtual bool Capture(); @@ -45,6 +68,7 @@ public: virtual bool SetHMirror(bool enabled) override; virtual bool SetVFlip(bool enabled) override; virtual std::string Explain(const std::string& question); + }; #endif // ESP32_CAMERA_H