Files
xiaozhi-esp32/main/Application.h

143 lines
3.8 KiB
C
Raw Normal View History

2024-08-31 18:00:23 +08:00
#ifndef _APPLICATION_H_
#define _APPLICATION_H_
2024-10-03 06:39:22 +08:00
#include <OpusEncoder.h>
#include <OpusResampler.h>
#include <WebSocket.h>
#include <opus.h>
#include <resampler_structs.h>
#include <freertos/event_groups.h>
#include <freertos/task.h>
2024-08-31 18:00:23 +08:00
#include <mutex>
2024-09-03 13:57:18 +08:00
#include <list>
2024-10-03 06:39:22 +08:00
#include <condition_variable>
2024-10-29 00:22:29 +08:00
#include "AudioDevice.h"
2024-10-03 06:39:22 +08:00
#include "Display.h"
2024-10-29 00:22:29 +08:00
#include "Board.h"
2024-10-03 06:39:22 +08:00
#include "FirmwareUpgrade.h"
#ifdef CONFIG_USE_AFE_SR
#include "WakeWordDetect.h"
#include "AudioProcessor.h"
#endif
#include "Button.h"
2024-08-31 18:00:23 +08:00
#define DETECTION_RUNNING 1
#define COMMUNICATION_RUNNING 2
2024-09-25 03:44:28 +08:00
2024-10-30 06:58:29 +08:00
#define PROTOCOL_VERSION 3
struct BinaryProtocol3 {
uint8_t type;
uint8_t reserved;
uint16_t payload_size;
2024-09-25 03:44:28 +08:00
uint8_t payload[];
} __attribute__((packed));
2024-08-31 18:00:23 +08:00
2024-10-03 06:39:22 +08:00
enum AudioPacketType {
kAudioPacketTypeUnkonwn = 0,
kAudioPacketTypeStart,
kAudioPacketTypeStop,
kAudioPacketTypeData,
kAudioPacketTypeSentenceStart,
kAudioPacketTypeSentenceEnd
};
struct AudioPacket {
AudioPacketType type = kAudioPacketTypeUnkonwn;
std::string text;
std::vector<uint8_t> opus;
std::vector<int16_t> pcm;
uint32_t timestamp;
};
2024-08-31 18:00:23 +08:00
enum ChatState {
2024-10-29 00:22:29 +08:00
kChatStateUnknown,
2024-08-31 18:00:23 +08:00
kChatStateIdle,
kChatStateConnecting,
kChatStateListening,
kChatStateSpeaking,
2024-09-14 14:58:03 +08:00
kChatStateWakeWordDetected,
kChatStateUpgrading
2024-08-31 18:00:23 +08:00
};
class Application {
public:
2024-09-10 05:58:56 +08:00
static Application& GetInstance() {
static Application instance;
return instance;
}
2024-08-31 18:00:23 +08:00
void Start();
2024-10-29 00:22:29 +08:00
ChatState GetChatState() const { return chat_state_; }
Display& GetDisplay() { return display_; }
void Schedule(std::function<void()> callback);
void SetChatState(ChatState state);
2024-10-30 06:58:29 +08:00
void Alert(const std::string&& title, const std::string&& message);
2024-10-31 05:57:13 +08:00
void AbortSpeaking();
2024-09-10 05:58:56 +08:00
// 删除拷贝构造函数和赋值运算符
Application(const Application&) = delete;
Application& operator=(const Application&) = delete;
2024-08-31 18:00:23 +08:00
private:
2024-09-10 05:58:56 +08:00
Application();
~Application();
2024-10-15 03:56:35 +08:00
Button boot_button_;
Button volume_up_button_;
2024-10-24 09:53:08 +08:00
Button volume_down_button_;
2024-10-29 00:22:29 +08:00
AudioDevice* audio_device_ = nullptr;
2024-10-24 09:53:08 +08:00
Display display_;
2024-10-03 06:39:22 +08:00
#ifdef CONFIG_USE_AFE_SR
WakeWordDetect wake_word_detect_;
AudioProcessor audio_processor_;
2024-10-01 14:16:12 +08:00
#endif
2024-09-05 17:22:01 +08:00
FirmwareUpgrade firmware_upgrade_;
2024-10-03 06:39:22 +08:00
std::mutex mutex_;
std::condition_variable_any cv_;
std::list<std::function<void()>> main_tasks_;
2024-10-01 14:16:12 +08:00
WebSocket* ws_client_ = nullptr;
2024-08-31 18:00:23 +08:00
EventGroupHandle_t event_group_;
2024-10-29 00:22:29 +08:00
volatile ChatState chat_state_ = kChatStateUnknown;
2024-10-03 06:39:22 +08:00
volatile bool break_speaking_ = false;
bool skip_to_end_ = false;
2024-10-29 00:22:29 +08:00
esp_timer_handle_t update_display_timer_ = nullptr;
2024-08-31 18:00:23 +08:00
// Audio encode / decode
2024-10-03 06:39:22 +08:00
TaskHandle_t audio_encode_task_ = nullptr;
2024-08-31 18:00:23 +08:00
StaticTask_t audio_encode_task_buffer_;
StackType_t* audio_encode_task_stack_ = nullptr;
2024-10-03 06:39:22 +08:00
std::list<std::vector<int16_t>> audio_encode_queue_;
std::list<AudioPacket*> audio_decode_queue_;
std::list<AudioPacket*> audio_play_queue_;
2024-08-31 18:00:23 +08:00
OpusEncoder opus_encoder_;
OpusDecoder* opus_decoder_ = nullptr;
int opus_duration_ms_ = 60;
2024-10-29 00:22:29 +08:00
int opus_decode_sample_rate_ = AUDIO_OUTPUT_SAMPLE_RATE;
2024-10-24 09:53:08 +08:00
OpusResampler input_resampler_;
OpusResampler output_resampler_;
2024-10-01 14:16:12 +08:00
TaskHandle_t check_new_version_task_ = nullptr;
StaticTask_t check_new_version_task_buffer_;
StackType_t* check_new_version_task_stack_ = nullptr;
2024-09-03 13:57:18 +08:00
2024-10-03 06:39:22 +08:00
void MainLoop();
2024-10-30 06:58:29 +08:00
BinaryProtocol3* AllocateBinaryProtocol3(const uint8_t* payload, size_t payload_size);
void ParseBinaryProtocol3(const char* data, size_t size);
2024-09-01 13:24:45 +08:00
void SetDecodeSampleRate(int sample_rate);
2024-08-31 18:00:23 +08:00
void StartWebSocketClient();
void CheckNewVersion();
2024-10-03 06:39:22 +08:00
2024-08-31 18:00:23 +08:00
void AudioEncodeTask();
2024-10-03 06:39:22 +08:00
void AudioPlayTask();
void HandleAudioPacket(AudioPacket* packet);
2024-10-30 06:58:29 +08:00
void PlayLocalFile(const char* data, size_t size);
2024-08-31 18:00:23 +08:00
};
#endif // _APPLICATION_H_