From 5d3f5971375bc037392f4d14314c1202b9f3daf9 Mon Sep 17 00:00:00 2001
From: Xiaoxia <terrence@tenclass.com>
Date: Thu, 4 Sep 2025 12:30:26 +0800
Subject: [PATCH] Bump to v1.9.0 (#1157)

* update v2 partition table readme

* feat: Add user only tool

* Add image cache

* smaller cache and buffer, more heap

* use MAIN_EVENT_CLOCK_TICK to avoid audio glitches

* fix: esp_psram_get_size not found in c3

* Bump to 1.9.0
---
 main/application.cc                |  36 +++++-----
 main/application.h                 |   4 +-
 main/audio/audio_service.cc        |   4 +-
 main/boards/common/esp32_camera.cc |   3 +
 main/display/lcd_display.cc        |  28 ++++++--
 main/display/oled_display.cc       |   1 -
 main/idf_component.yml             |   4 +-
 main/mcp_server.cc                 |  13 +++-
 main/mcp_server.h                  |  13 ++++
 partitions/v2/README.md            | 103 +++++++++++++++++++++++------
 sdkconfig.defaults                 |   6 ++
 sdkconfig.defaults.esp32s3         |   1 -
 12 files changed, 163 insertions(+), 53 deletions(-)

diff --git a/main/application.cc b/main/application.cc
index 1e882a44..adb4eb91 100644
--- a/main/application.cc
+++ b/main/application.cc
@@ -49,7 +49,7 @@ Application::Application() {
     esp_timer_create_args_t clock_timer_args = {
         .callback = [](void* arg) {
             Application* app = (Application*)arg;
-            app->OnClockTimer();
+            xEventGroupSetBits(app->event_group_, MAIN_EVENT_CLOCK_TICK);
         },
         .arg = this,
         .dispatch_method = ESP_TIMER_TASK,
@@ -496,6 +496,8 @@ void Application::Start() {
     });
     bool protocol_started = protocol_->Start();
 
+    // Print heap stats
+    SystemInfo::PrintHeapStats();
     SetDeviceState(kDeviceStateIdle);
 
     has_server_time_ = ota.HasServerTime();
@@ -506,23 +508,6 @@ void Application::Start() {
         // Play the success sound to indicate the device is ready
         audio_service_.PlaySound(Lang::Sounds::OGG_SUCCESS);
     }
-
-    // Print heap stats
-    SystemInfo::PrintHeapStats();
-}
-
-void Application::OnClockTimer() {
-    clock_ticks_++;
-
-    auto display = Board::GetInstance().GetDisplay();
-    display->UpdateStatusBar();
-
-    // Print the debug info every 10 seconds
-    if (clock_ticks_ % 10 == 0) {
-        // SystemInfo::PrintTaskCpuUsage(pdMS_TO_TICKS(1000));
-        // SystemInfo::PrintTaskList();
-        SystemInfo::PrintHeapStats();
-    }
 }
 
 // Add a async task to MainLoop
@@ -546,7 +531,9 @@ void Application::MainEventLoop() {
             MAIN_EVENT_SEND_AUDIO |
             MAIN_EVENT_WAKE_WORD_DETECTED |
             MAIN_EVENT_VAD_CHANGE |
+            MAIN_EVENT_CLOCK_TICK |
             MAIN_EVENT_ERROR, pdTRUE, pdFALSE, portMAX_DELAY);
+
         if (bits & MAIN_EVENT_ERROR) {
             SetDeviceState(kDeviceStateIdle);
             Alert(Lang::Strings::ERROR, last_error_message_.c_str(), "circle_xmark", Lang::Sounds::OGG_EXCLAMATION);
@@ -579,6 +566,19 @@ void Application::MainEventLoop() {
                 task();
             }
         }
+
+        if (bits & MAIN_EVENT_CLOCK_TICK) {
+            clock_ticks_++;
+            auto display = Board::GetInstance().GetDisplay();
+            display->UpdateStatusBar();
+        
+            // Print the debug info every 10 seconds
+            if (clock_ticks_ % 10 == 0) {
+                // SystemInfo::PrintTaskCpuUsage(pdMS_TO_TICKS(1000));
+                // SystemInfo::PrintTaskList();
+                SystemInfo::PrintHeapStats();
+            }
+        }
     }
 }
 
diff --git a/main/application.h b/main/application.h
index 0fb898bb..f5659d6b 100644
--- a/main/application.h
+++ b/main/application.h
@@ -16,12 +16,15 @@
 #include "audio_service.h"
 #include "device_state_event.h"
 
+
 #define MAIN_EVENT_SCHEDULE (1 << 0)
 #define MAIN_EVENT_SEND_AUDIO (1 << 1)
 #define MAIN_EVENT_WAKE_WORD_DETECTED (1 << 2)
 #define MAIN_EVENT_VAD_CHANGE (1 << 3)
 #define MAIN_EVENT_ERROR (1 << 4)
 #define MAIN_EVENT_CHECK_NEW_VERSION_DONE (1 << 5)
+#define MAIN_EVENT_CLOCK_TICK (1 << 6)
+
 
 enum AecMode {
     kAecOff,
@@ -83,7 +86,6 @@ private:
     void OnWakeWordDetected();
     void CheckNewVersion(Ota& ota);
     void ShowActivationCode(const std::string& code, const std::string& message);
-    void OnClockTimer();
     void SetListeningMode(ListeningMode mode);
 };
 
diff --git a/main/audio/audio_service.cc b/main/audio/audio_service.cc
index c1c7fb80..3081e6ad 100644
--- a/main/audio/audio_service.cc
+++ b/main/audio/audio_service.cc
@@ -111,7 +111,7 @@ void AudioService::Start() {
         AudioService* audio_service = (AudioService*)arg;
         audio_service->AudioOutputTask();
         vTaskDelete(NULL);
-    }, "audio_output", 2048 * 2, this, 3, &audio_output_task_handle_);
+    }, "audio_output", 2048 * 2, this, 4, &audio_output_task_handle_);
 #else
     /* Start the audio input task */
     xTaskCreate([](void* arg) {
@@ -125,7 +125,7 @@ void AudioService::Start() {
         AudioService* audio_service = (AudioService*)arg;
         audio_service->AudioOutputTask();
         vTaskDelete(NULL);
-    }, "audio_output", 2048, this, 3, &audio_output_task_handle_);
+    }, "audio_output", 2048, this, 4, &audio_output_task_handle_);
 #endif
 
     /* Start the opus codec task */
diff --git a/main/boards/common/esp32_camera.cc b/main/boards/common/esp32_camera.cc
index 38cd0bf7..68bde38a 100644
--- a/main/boards/common/esp32_camera.cc
+++ b/main/boards/common/esp32_camera.cc
@@ -89,6 +89,7 @@ bool Esp32Camera::Capture() {
         encoder_thread_.join();
     }
 
+    auto start_time = esp_timer_get_time();
     int frames_to_get = 2;
     // Try to get a stable frame
     for (int i = 0; i < frames_to_get; i++) {
@@ -101,6 +102,8 @@ bool Esp32Camera::Capture() {
             return false;
         }
     }
+    auto end_time = esp_timer_get_time();
+    ESP_LOGI(TAG, "Camera captured %d frames in %d ms", frames_to_get, int((end_time - start_time) / 1000));
 
     // 如果预览图片 buffer 为空，则跳过预览
     // 但仍返回 true，因为此时图像可以上传至服务器
diff --git a/main/display/lcd_display.cc b/main/display/lcd_display.cc
index 230303ce..5599a193 100644
--- a/main/display/lcd_display.cc
+++ b/main/display/lcd_display.cc
@@ -1,4 +1,6 @@
 #include "lcd_display.h"
+#include "assets/lang_config.h"
+#include "settings.h"
 
 #include <vector>
 #include <algorithm>
@@ -6,10 +8,8 @@
 #include <esp_log.h>
 #include <esp_err.h>
 #include <esp_lvgl_port.h>
-#include <esp_heap_caps.h>
-#include "assets/lang_config.h"
+#include <esp_psram.h>
 #include <cstring>
-#include "settings.h"
 
 #include "board.h"
 
@@ -102,10 +102,21 @@ SpiLcdDisplay::SpiLcdDisplay(esp_lcd_panel_io_handle_t panel_io, esp_lcd_panel_h
     ESP_LOGI(TAG, "Initialize LVGL library");
     lv_init();
 
+#if CONFIG_SPIRAM
+    // lv image cache, currently only PNG is supported
+    size_t psram_size_mb = esp_psram_get_size() / 1024 / 1024;
+    if (psram_size_mb >= 8) {
+        lv_image_cache_resize(2 * 1024 * 1024, true);
+        ESP_LOGI(TAG, "Use 2MB of PSRAM for image cache");
+    } else if (psram_size_mb >= 2) {
+        lv_image_cache_resize(512 * 1024, true);
+        ESP_LOGI(TAG, "Use 512KB of PSRAM for image cache");
+    }
+#endif
+
     ESP_LOGI(TAG, "Initialize LVGL port");
     lvgl_port_cfg_t port_cfg = ESP_LVGL_PORT_INIT_CONFIG();
     port_cfg.task_priority = 1;
-    port_cfg.timer_period_ms = 40;
     lvgl_port_init(&port_cfg);
 
     ESP_LOGI(TAG, "Adding LCD display");
@@ -621,6 +632,9 @@ void LcdDisplay::SetPreviewImage(const lv_img_dsc_t* img_dsc) {
         
         // 设置自定义属性标记气泡类型
         lv_obj_set_user_data(img_bubble, (void*)"image");
+
+        // Create the image object inside the bubble
+        lv_obj_t* preview_image = lv_image_create(img_bubble);
         
         // Create the image object inside the bubble
         lv_obj_t* preview_image = lv_image_create(img_bubble);
@@ -816,8 +830,10 @@ void LcdDisplay::SetPreviewImage(const lv_img_dsc_t* img_dsc) {
     if (img_dsc != nullptr) {
         // 设置图片源并显示预览图片
         lv_image_set_src(preview_image_, img_dsc);
-        // zoom factor 0.5
-        lv_image_set_scale(preview_image_, 128 * width_ / img_dsc->header.w);
+        if (img_dsc->header.w > 0) {
+            // zoom factor 0.5
+            lv_image_set_scale(preview_image_, 128 * width_ / img_dsc->header.w);
+        }
         lv_obj_remove_flag(preview_image_, LV_OBJ_FLAG_HIDDEN);
         // 隐藏emotion_label_
         if (emotion_label_ != nullptr) {
diff --git a/main/display/oled_display.cc b/main/display/oled_display.cc
index 8e19e4b9..d3fb3aaf 100644
--- a/main/display/oled_display.cc
+++ b/main/display/oled_display.cc
@@ -23,7 +23,6 @@ OledDisplay::OledDisplay(esp_lcd_panel_io_handle_t panel_io, esp_lcd_panel_handl
     lvgl_port_cfg_t port_cfg = ESP_LVGL_PORT_INIT_CONFIG();
     port_cfg.task_priority = 1;
     port_cfg.task_stack = 6144;
-    port_cfg.timer_period_ms = 40;
     lvgl_port_init(&port_cfg);
 
     ESP_LOGI(TAG, "Adding OLED display");
diff --git a/main/idf_component.yml b/main/idf_component.yml
index cd781afe..7810f423 100644
--- a/main/idf_component.yml
+++ b/main/idf_component.yml
@@ -15,8 +15,8 @@ dependencies:
   78/esp_lcd_nv3023: ~1.0.0
   78/esp-wifi-connect: ~2.5.2
   78/esp-opus-encoder: ~2.4.1
-  78/esp-ml307: ~3.3.0
-  78/xiaozhi-fonts: ~1.5.0
+  78/esp-ml307: ~3.3.1
+  78/xiaozhi-fonts: ~1.5.2
   espressif/led_strip: ~3.0.1
   espressif/esp_codec_dev: ~1.4.0
   espressif/esp-sr: ~2.1.5
diff --git a/main/mcp_server.cc b/main/mcp_server.cc
index df37729e..2d86e212 100644
--- a/main/mcp_server.cc
+++ b/main/mcp_server.cc
@@ -29,12 +29,17 @@ McpServer::~McpServer() {
 }
 
 void McpServer::AddCommonTools() {
-    // To speed up the response time, we add the common tools to the beginning of
+    // *Important* To speed up the response time, we add the common tools to the beginning of
     // the tools list to utilize the prompt cache.
+    // **重要** 为了提升响应速度，我们把常用的工具放在前面，利用 prompt cache 的特性。
+
     // Backup the original tools list and restore it after adding the common tools.
     auto original_tools = std::move(tools_);
     auto& board = Board::GetInstance();
 
+    // Do not add custom tools here.
+    // Custom tools must be added in the board's InitializeTools function.
+
     AddTool("self.get_device_status",
         "Provides the real-time information of the device, including the current status of the audio speaker, screen, battery, network, etc.\n"
         "Use this tool for: \n"
@@ -122,6 +127,12 @@ void McpServer::AddTool(const std::string& name, const std::string& description,
     AddTool(new McpTool(name, description, properties, callback));
 }
 
+void McpServer::AddUserOnlyTool(const std::string& name, const std::string& description, const PropertyList& properties, std::function<ReturnValue(const PropertyList&)> callback) {
+    auto tool = new McpTool(name, description, properties, callback);
+    tool->set_user_only(true);
+    AddTool(tool);
+}
+
 void McpServer::ParseMessage(const std::string& message) {
     cJSON* json = cJSON_Parse(message.c_str());
     if (json == nullptr) {
diff --git a/main/mcp_server.h b/main/mcp_server.h
index 27ace329..ac2b936f 100644
--- a/main/mcp_server.h
+++ b/main/mcp_server.h
@@ -177,6 +177,7 @@ private:
     std::string description_;
     PropertyList properties_;
     std::function<ReturnValue(const PropertyList&)> callback_;
+    bool user_only_ = false;
 
 public:
     McpTool(const std::string& name, 
@@ -188,9 +189,11 @@ public:
         properties_(properties), 
         callback_(callback) {}
 
+    void set_user_only(bool user_only) { user_only_ = user_only; }
     inline const std::string& name() const { return name_; }
     inline const std::string& description() const { return description_; }
     inline const PropertyList& properties() const { return properties_; }
+    inline bool user_only() const { return user_only_; }
 
     std::string to_json() const {
         std::vector<std::string> required = properties_.GetRequired();
@@ -214,6 +217,15 @@ public:
         }
         
         cJSON_AddItemToObject(json, "inputSchema", input_schema);
+
+        // Add audience annotation if the tool is user only (invisible to AI)
+        if (user_only_) {
+            cJSON *annotations = cJSON_CreateObject();
+            cJSON *audience = cJSON_CreateArray();
+            cJSON_AddItemToArray(audience, cJSON_CreateString("user"));
+            cJSON_AddItemToObject(annotations, "audience", audience);
+            cJSON_AddItemToObject(json, "annotations", annotations);
+        }
         
         char *json_str = cJSON_PrintUnformatted(json);
         std::string result(json_str);
@@ -259,6 +271,7 @@ public:
     void AddCommonTools();
     void AddTool(McpTool* tool);
     void AddTool(const std::string& name, const std::string& description, const PropertyList& properties, std::function<ReturnValue(const PropertyList&)> callback);
+    void AddUserOnlyTool(const std::string& name, const std::string& description, const PropertyList& properties, std::function<ReturnValue(const PropertyList&)> callback);
     void ParseMessage(const cJSON* json);
     void ParseMessage(const std::string& message);
 
diff --git a/partitions/v2/README.md b/partitions/v2/README.md
index a373e530..0b56851e 100644
--- a/partitions/v2/README.md
+++ b/partitions/v2/README.md
@@ -1,46 +1,107 @@
 # Version 2 Partition Table
 
-This version introduces significant improvements over v1 by adding an `assets` partition to support network-loadable content.
+This version introduces significant improvements over v1 by adding an `assets` partition to support network-loadable content and optimizing partition layouts for different flash sizes.
 
 ## Key Changes from v1
 
-### Added Assets Partition
-The v2 partition table includes a new `assets` partition that stores:
+### Major Improvements
+1. **Added Assets Partition**: New `assets` partition for network-loadable content
+2. **Replaced Model Partition**: The old `model` partition (960KB) is replaced with a larger `assets` partition
+3. **Optimized App Partitions**: Reduced application partition sizes to accommodate assets
+4. **Enhanced Flexibility**: Support for dynamic content updates without reflashing
+
+### Assets Partition Features
+The `assets` partition stores:
 - **Wake word models**: Customizable wake word models that can be loaded from the network
 - **Theme files**: Complete theming system including:
-  - Fonts
-  - Audio effects
-  - Background images
+  - Fonts (text and icon fonts)
+  - Audio effects and sound files
+  - Background images and UI elements
   - Custom emoji packs
+  - Language configuration files
+- **Dynamic Content**: All content can be updated over-the-air via HTTP downloads
 
-### Partition Layout Comparison
+## Partition Layout Comparison
 
-#### v1 Layout (16MB)
+### v1 Layout (16MB)
 - `nvs`: 16KB (non-volatile storage)
 - `otadata`: 8KB (OTA data)
 - `phy_init`: 4KB (PHY initialization data)
-- `model`: 960KB (model storage)
+- `model`: 960KB (model storage - fixed content)
 - `ota_0`: 6MB (application partition 0)
 - `ota_1`: 6MB (application partition 1)
 
-#### v2 Layout (16MB)
+### v2 Layout (16MB)
 - `nvs`: 16KB (non-volatile storage)
 - `otadata`: 8KB (OTA data)
 - `phy_init`: 4KB (PHY initialization data)
-- `model`: 960KB (model storage)
 - `ota_0`: 4MB (application partition 0)
 - `ota_1`: 4MB (application partition 1)
-- `assets`: 7MB (network-loadable assets)
+- `assets`: 8MB (network-loadable assets)
 
-### Benefits
+## Available Configurations
 
-1. **Dynamic Content**: Users can download and update wake word models and themes without reflashing
-2. **Reduced App Size**: Application partitions are smaller, allowing more space for assets
-3. **Customization**: Support for custom themes and wake words enhances user experience
-4. **Network Flexibility**: Assets can be updated independently of the main application
+### 8MB Flash Devices (`8m.csv`)
+- `nvs`: 16KB
+- `otadata`: 8KB
+- `phy_init`: 4KB
+- `ota_0`: 3MB
+- `ota_1`: 3MB
+- `assets`: 2MB
 
-### Available Configurations
+### 16MB Flash Devices (`16m.csv`) - Standard
+- `nvs`: 16KB
+- `otadata`: 8KB
+- `phy_init`: 4KB
+- `ota_0`: 4MB
+- `ota_1`: 4MB
+- `assets`: 8MB
 
-- `8m.csv`: For 8MB flash devices
-- `16m.csv`: For 16MB flash devices (standard)
-- `16m_c3.csv`: For 16MB flash devices with ESP32-C3 optimization 
\ No newline at end of file
+### 16MB Flash Devices (`16m_c3.csv`) - ESP32-C3 Optimized
+- `nvs`: 16KB
+- `otadata`: 8KB
+- `phy_init`: 4KB
+- `ota_0`: 4MB
+- `ota_1`: 4MB
+- `assets`: 4MB (4000K - limited by available mmap pages)
+
+### 32MB Flash Devices (`32m.csv`)
+- `nvsfactory`: 200KB
+- `nvs`: 840KB
+- `otadata`: 8KB
+- `phy_init`: 4KB
+- `ota_0`: 4MB
+- `ota_1`: 4MB
+- `assets`: 16MB
+
+## Benefits
+
+1. **Dynamic Content Management**: Users can download and update wake word models, themes, and other assets without reflashing the device
+2. **Reduced App Size**: Application partitions are optimized, allowing more space for dynamic content
+3. **Enhanced Customization**: Support for custom themes, wake words, and language packs enhances user experience
+4. **Network Flexibility**: Assets can be updated independently of the main application firmware
+5. **Better Resource Utilization**: Efficient use of flash memory with configurable asset storage
+6. **OTA Asset Updates**: Assets can be updated over-the-air via HTTP downloads
+
+## Technical Details
+
+- **Partition Type**: Assets partition uses `spiffs` subtype for SPIFFS filesystem compatibility
+- **Memory Mapping**: Assets are memory-mapped for efficient access during runtime
+- **Checksum Validation**: Built-in integrity checking ensures asset data validity
+- **Progressive Download**: Assets can be downloaded progressively with progress tracking
+- **Fallback Support**: Graceful fallback to default assets if network updates fail
+
+## Migration from v1
+
+When upgrading from v1 to v2:
+1. **Backup Important Data**: Ensure any important data in the old `model` partition is backed up
+2. **Flash New Partition Table**: Use the appropriate v2 partition table for your flash size
+3. **Download Assets**: The device will automatically download required assets on first boot
+4. **Verify Functionality**: Ensure all features work correctly with the new partition layout
+
+## Usage Notes
+
+- The `assets` partition size varies by configuration to optimize for different flash sizes
+- ESP32-C3 devices use a smaller assets partition (4MB) due to limited available mmap pages in the system
+- 32MB devices get the largest assets partition (16MB) for maximum content storage
+- All partition tables maintain proper alignment for optimal flash performance 
\ No newline at end of file
diff --git a/sdkconfig.defaults b/sdkconfig.defaults
index 93a47d1f..2fa1abbe 100644
--- a/sdkconfig.defaults
+++ b/sdkconfig.defaults
@@ -39,6 +39,10 @@ CONFIG_UART_ISR_IN_IRAM=y
 # Fix ESP_SSL error
 CONFIG_MBEDTLS_SSL_RENEGOTIATION=n
 
+# ESP32 Camera
+CONFIG_CAMERA_NO_AFFINITY=y
+CONFIG_CAMERA_DMA_BUFFER_SIZE_MAX=8192
+
 # LVGL 9.2.2
 
 CONFIG_LV_OS_NONE=y
@@ -49,6 +53,8 @@ CONFIG_LV_USE_CLIB_SPRINTF=y
 CONFIG_LV_USE_IMGFONT=y
 CONFIG_LV_USE_ASSERT_STYLE=y
 CONFIG_LV_USE_GIF=y
+CONFIG_LV_USE_LODEPNG=y
+CONFIG_LV_USE_TJPGD=y
 
 # Use compressed font
 CONFIG_LV_FONT_FMT_TXT_LARGE=y
diff --git a/sdkconfig.defaults.esp32s3 b/sdkconfig.defaults.esp32s3
index 40036e44..d1ec5589 100644
--- a/sdkconfig.defaults.esp32s3
+++ b/sdkconfig.defaults.esp32s3
@@ -13,7 +13,6 @@ CONFIG_SPIRAM_MEMTEST=n
 CONFIG_MBEDTLS_EXTERNAL_MEM_ALLOC=y
 
 CONFIG_ESP32S3_INSTRUCTION_CACHE_32KB=y
-CONFIG_ESP32S3_DATA_CACHE_64KB=y
 CONFIG_ESP32S3_DATA_CACHE_LINE_64B=y
 
 CONFIG_SR_WN_WN9_NIHAOXIAOZHI_TTS=y