1. 优化了音频输出输出模块的内容
2. 新增了对模型动作组管理的接口,方便后续模型的动作管理
This commit is contained in:
@@ -10,6 +10,8 @@
|
||||
#include <QTimer>
|
||||
#include <QDir>
|
||||
#include <vector>
|
||||
#include <QScopedPointer>
|
||||
#include <QMutex>
|
||||
|
||||
/**
|
||||
* @brief 录音模块
|
||||
@@ -21,15 +23,15 @@
|
||||
class AudioInput : public QObject
|
||||
{
|
||||
Q_OBJECT
|
||||
|
||||
Q_DISABLE_COPY(AudioInput) // 禁用拷贝
|
||||
private:
|
||||
/**
|
||||
* @brief 构造函数
|
||||
* @param parent
|
||||
*/
|
||||
explicit AudioInput(QObject *parent = nullptr);
|
||||
static AudioInput* instance;
|
||||
|
||||
static QScopedPointer<AudioInput> instance;
|
||||
static QMutex mutex;
|
||||
public:
|
||||
/**
|
||||
* @brief 获取实例
|
||||
@@ -145,5 +147,6 @@ private:
|
||||
std::vector<qreal> m_rmsValues; /// RMS值vector
|
||||
qreal m_silenceThreshold = 1200; /// 静音阈值
|
||||
int m_silenceDuration = 1500; /// 静音持续时间
|
||||
qreal m_smoothRms = 0.0; /// 平滑RMS值(用于防止低频杂波突然打断静音检测)
|
||||
};
|
||||
|
||||
|
||||
@@ -9,6 +9,8 @@
|
||||
#include <QAudioSink> // 音频输出组件, 用于原始数据播放
|
||||
#include <QUrl>
|
||||
#include <QBuffer>
|
||||
#include <QScopedPointer>
|
||||
#include <QMutex>
|
||||
|
||||
/**
|
||||
* @brief 音频播放模块
|
||||
@@ -23,6 +25,7 @@
|
||||
class AudioOutput : public QObject
|
||||
{
|
||||
Q_OBJECT
|
||||
Q_DISABLE_COPY(AudioOutput) // 禁用拷贝
|
||||
private:
|
||||
/**
|
||||
* 构造函数私有化
|
||||
@@ -30,7 +33,8 @@ private:
|
||||
*/
|
||||
explicit AudioOutput(QObject *parent = nullptr); // 并不将本模块挂在对象树当中,因为本模块为单例类,内存自行管理
|
||||
|
||||
static AudioOutput *instance; // 单例类
|
||||
static QScopedPointer<AudioOutput> instance; // 单例类
|
||||
static QMutex mutex;
|
||||
public:
|
||||
static AudioOutput *getInstance();
|
||||
|
||||
|
||||
@@ -7,14 +7,18 @@
|
||||
#include <QtMath>
|
||||
#include <QtEndian> // 用于处理字节序
|
||||
|
||||
AudioInput *AudioInput::instance = nullptr;
|
||||
QScopedPointer<AudioInput> AudioInput::instance;
|
||||
QMutex AudioInput::mutex;
|
||||
AudioInput *AudioInput::getInstance()
|
||||
{
|
||||
// 懒汉式 依旧单线程无需加锁
|
||||
if (instance == nullptr) {
|
||||
instance = new AudioInput();
|
||||
if (instance.isNull()) {
|
||||
QMutexLocker locker(&mutex);
|
||||
if (instance.isNull()) {
|
||||
instance.reset(new AudioInput);
|
||||
}
|
||||
}
|
||||
return instance;
|
||||
return instance.data();
|
||||
}
|
||||
|
||||
AudioInput::AudioInput(QObject *parent) : QObject(parent)
|
||||
@@ -79,7 +83,7 @@ void AudioInput::startAudio()
|
||||
// 调大缓冲区以避免溢出
|
||||
m_audioSource->setBufferSize(128000);
|
||||
|
||||
// start() 返回一个 QIODevice,我们可以从中读取数据
|
||||
// start() 返回一个 QIODevice,可以从中读取数据
|
||||
m_ioDevice = m_audioSource->start();
|
||||
|
||||
if (m_ioDevice) {
|
||||
@@ -107,8 +111,8 @@ void AudioInput::stopAudio()
|
||||
if (!m_rawPCMData.isEmpty()) {
|
||||
wavData = generateWavHeader(m_rawPCMData.size());
|
||||
wavData.append(m_rawPCMData);
|
||||
|
||||
// 如果需要保存文件
|
||||
#ifdef QT_DEBUG
|
||||
// 如果需要保存文件(Debug下启用)
|
||||
if (!m_outputFilePath.isEmpty()) {
|
||||
QFile file(m_outputFilePath);
|
||||
if (file.open(QIODevice::WriteOnly)) {
|
||||
@@ -117,10 +121,9 @@ void AudioInput::stopAudio()
|
||||
qDebug() << "Saved WAV to:" << m_outputFilePath;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
m_rawPCMData.clear();
|
||||
}
|
||||
|
||||
isAutoRecording = false;
|
||||
isAutoThreshold = false;
|
||||
|
||||
@@ -138,18 +141,28 @@ void AudioInput::onReadyRead()
|
||||
QByteArray data = m_ioDevice->readAll();
|
||||
if (data.isEmpty()) return;
|
||||
|
||||
// 1. 保存原始 PCM 数据
|
||||
// 保存原始 PCM 数据
|
||||
m_rawPCMData.append(data);
|
||||
|
||||
// 2. 计算 RMS (仅用于分析,取最后一小段或者整体计算,这里计算当前块的RMS)
|
||||
m_rmsValue = calculateRMS(data);
|
||||
// 计算 RMS (仅用于分析,计算当前块的RMS)
|
||||
const qreal currentRms = calculateRMS(data);
|
||||
m_rmsValue = currentRms;
|
||||
// 计算平滑RMS (用于防止低频杂波突然打断静音检测)
|
||||
constexpr qreal alpha = 0.3; // 70% 历史权重, 30% 当前权重
|
||||
if (qFuzzyIsNull(m_smoothRms)) {
|
||||
// 如果是第一帧数据,直接赋值,避免从0开始慢慢爬升
|
||||
m_smoothRms = currentRms;
|
||||
} else {
|
||||
// 新值 = (旧值 * (1 - alpha)) + (当前值 * alpha)
|
||||
m_smoothRms = (m_smoothRms * (1.0 - alpha)) + (currentRms * alpha);
|
||||
}
|
||||
|
||||
// 3. 自动停止逻辑 (VAD)
|
||||
// 自动停止逻辑 (VAD)
|
||||
if (isAutoRecording) {
|
||||
// 输出 RMS 用于调试
|
||||
// qDebug() << "RMS:" << m_rmsValue;
|
||||
qDebug() << "Raw:" << currentRms << " Smooth:" << m_smoothRms;
|
||||
|
||||
if (m_rmsValue < m_silenceThreshold) {
|
||||
if (m_smoothRms < m_silenceThreshold) {
|
||||
// 静音状态
|
||||
if (!m_silenceTimer->isActive()) {
|
||||
m_silenceTimer->start(m_silenceDuration);
|
||||
@@ -161,10 +174,10 @@ void AudioInput::onReadyRead()
|
||||
}
|
||||
}
|
||||
|
||||
// 4. 自动阈值计算逻辑
|
||||
// 自动阈值计算逻辑
|
||||
if (isAutoThreshold) {
|
||||
m_rmsValues.push_back(m_rmsValue);
|
||||
emit rmsRealValue(m_rmsValue);
|
||||
m_rmsValues.push_back(m_smoothRms);
|
||||
emit rmsRealValue(m_smoothRms);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -172,7 +185,7 @@ qreal AudioInput::calculateRMS(const QByteArray& buffer)
|
||||
{
|
||||
if (buffer.isEmpty()) return 0;
|
||||
|
||||
// 假设是 Int16 格式 (16位深)
|
||||
// 设定为 Int16 格式 (16位深)
|
||||
// 如果是 Stereo,数据排列是 L R L R...
|
||||
// 简单的 RMS 计算可以将所有通道数据视为一个长序列
|
||||
|
||||
@@ -242,7 +255,7 @@ void AudioInput::startAutoStopAudio(const qreal silenceThreshold, const int sile
|
||||
}
|
||||
|
||||
// 启动阈值计算
|
||||
void AudioInput::startAutoThresholdClu(int Duration)
|
||||
void AudioInput::startAutoThresholdClu(const int Duration)
|
||||
{
|
||||
isAutoThreshold = true;
|
||||
m_rmsValues.clear();
|
||||
@@ -250,19 +263,36 @@ void AudioInput::startAutoThresholdClu(int Duration)
|
||||
m_thresholdTimer->start(Duration);
|
||||
}
|
||||
|
||||
/**
|
||||
* 2025.12.30重构 Misaki
|
||||
* 从均值阈值计算的基础上增加了N倍标准差
|
||||
* 即阈值 = 均值 + N * 标准差(N取3)
|
||||
*/
|
||||
void AudioInput::thresholdTimeout()
|
||||
{
|
||||
isAutoThreshold = false;
|
||||
stopAudio(); // 内部会处理 stop
|
||||
|
||||
if (!m_rmsValues.empty()) {
|
||||
const double sum = std::accumulate(m_rmsValues.begin(), m_rmsValues.end(), 0.0);
|
||||
const double avg = sum / m_rmsValues.size();
|
||||
m_silenceThreshold = avg + 500.0; // 这里的 500 是经验值,可以根据需要调整
|
||||
emit thresholdCalculated(m_silenceThreshold);
|
||||
} else {
|
||||
if (m_rmsValues.empty()) {
|
||||
emit thresholdCalculated(0);
|
||||
return;
|
||||
}
|
||||
// 计算均值
|
||||
const double mean = std::accumulate(m_rmsValues.begin(), m_rmsValues.end(), 0.0) / m_rmsValues.size();
|
||||
// 计算标准差
|
||||
const double sq_sum = std::inner_product(m_rmsValues.begin(), m_rmsValues.end(), m_rmsValues.begin(), 0.0);
|
||||
double variance = (sq_sum / m_rmsValues.size()) - (mean * mean);
|
||||
// 防止浮点误差导致负数
|
||||
if (variance < 0) variance = 0;
|
||||
const double stdDev = std::sqrt(variance);
|
||||
// 阈值 = 均值 + 2 * 标准差
|
||||
const double bestThreshold = mean + 3 * stdDev;
|
||||
m_silenceThreshold = std::max(bestThreshold, 150.0);
|
||||
m_silenceThreshold = std::min(m_silenceThreshold, 30000.0);
|
||||
qDebug() << "Auto Threshold Calc -> Mean:" << mean
|
||||
<< " StdDev:" << stdDev
|
||||
<< " Result:" << m_silenceThreshold;
|
||||
emit thresholdCalculated(m_silenceThreshold);
|
||||
}
|
||||
|
||||
QByteArray AudioInput::generateWavHeader(const quint32 dataSize) const {
|
||||
@@ -285,7 +315,7 @@ QByteArray AudioInput::generateWavHeader(const quint32 dataSize) const {
|
||||
|
||||
header.numChannels = static_cast<quint16>(m_format.channelCount());
|
||||
header.sampleRate = static_cast<quint32>(m_format.sampleRate());
|
||||
header.bitsPerSample = 16; // 我们强制使用了 Int16
|
||||
header.bitsPerSample = 16; // 强制使用了 Int16
|
||||
|
||||
header.byteRate = header.sampleRate * header.numChannels * (header.bitsPerSample / 8);
|
||||
header.blockAlign = header.numChannels * (header.bitsPerSample / 8);
|
||||
|
||||
@@ -6,15 +6,20 @@
|
||||
#include <QMediaDevices>
|
||||
#include <QDataStream>
|
||||
|
||||
AudioOutput *AudioOutput::instance = nullptr;
|
||||
QScopedPointer<AudioOutput> AudioOutput::instance; // 使用QScopedPointer去管理单例,自动析构
|
||||
QMutex AudioOutput::mutex;
|
||||
|
||||
AudioOutput *AudioOutput::getInstance()
|
||||
{
|
||||
// 懒汉式(单线程播放,无需考虑加锁)
|
||||
if (instance == nullptr) {
|
||||
instance = new AudioOutput();
|
||||
if (instance.isNull()) { // 若未访问
|
||||
QMutexLocker locker(&mutex);
|
||||
if (instance.isNull()) {
|
||||
// 使用reset初始化
|
||||
instance.reset(new AudioOutput());
|
||||
}
|
||||
}
|
||||
return instance;
|
||||
return instance.data(); // 返回单例实例
|
||||
}
|
||||
|
||||
AudioOutput::AudioOutput(QObject *parent) : QObject(parent), mediaPlayer(nullptr), audioOutput(nullptr), audioSink(nullptr), audioBuffer(nullptr)
|
||||
@@ -39,7 +44,6 @@ AudioOutput::AudioOutput(QObject *parent) : QObject(parent), mediaPlayer(nullptr
|
||||
format.setSampleFormat(QAudioFormat::Int16); // 采样格式
|
||||
audioSink = new QAudioSink(QMediaDevices::defaultAudioOutput(), format, this);
|
||||
audioBuffer = new QBuffer(this);
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -58,8 +58,8 @@ struct ControlDataPacket {
|
||||
*/
|
||||
class NetworkDO final : public QObject
|
||||
{
|
||||
Q_OBJECT
|
||||
Q_DISABLE_COPY(NetworkDO) // 禁用拷贝
|
||||
Q_OBJECT
|
||||
Q_DISABLE_COPY(NetworkDO) // 禁用拷贝
|
||||
|
||||
public:
|
||||
// 单例访问点
|
||||
@@ -80,9 +80,9 @@ public:
|
||||
|
||||
signals:
|
||||
// 业务接收信号
|
||||
void audioPacketReceived(const AudioDataPacket& packet);
|
||||
void controlPacketReceived(const ControlDataPacket& packet);
|
||||
void errorOccurred(const QString& errorMsg);
|
||||
void audioPacketReceived(const AudioDataPacket& packet); // 音频数据准备完成信号
|
||||
void controlPacketReceived(const ControlDataPacket& packet); // 控制数据准备完成信号
|
||||
void errorOccurred(const QString& errorMsg); // 错误信号
|
||||
|
||||
public slots:
|
||||
// 接收底层 JSON 数据
|
||||
|
||||
@@ -59,11 +59,11 @@ public:
|
||||
void setGlobalFont(const QFont &newFont);
|
||||
|
||||
/**
|
||||
* 参数建议值:
|
||||
效果类型 gravity dampFactor holdDuration
|
||||
柔和下落 600.0f 0.85f 1.0f
|
||||
快速坠落 1200.0f 0.6f 0.3f
|
||||
弹性效果 900.0f 0.75f 0.8f
|
||||
* 参数建议值:<br>
|
||||
效果类型 gravity dampFactor holdDuration <br>
|
||||
柔和下落 600.0f 0.85f 1.0f <br>
|
||||
快速坠落 1200.0f 0.6f 0.3f <br>
|
||||
弹性效果 900.0f 0.75f 0.8f <br>
|
||||
真实物理模拟 980.0f 0.82f 0.5f
|
||||
*/
|
||||
void setHoldDuration(const float seconds) { defaultHoldDuration = seconds; }
|
||||
|
||||
@@ -83,7 +83,8 @@ AudioPage::AudioPage(QWidget* parent)
|
||||
|
||||
ElaScrollPageArea* audioInputProgressBarArea = new ElaScrollPageArea(this);
|
||||
QHBoxLayout* audioInputProgressBarLayout = new QHBoxLayout(audioInputProgressBarArea);
|
||||
ElaText* audioInputProgressBarText = new ElaText("录音阈值", this);
|
||||
ElaText* audioInputProgressBarText = new ElaText("静音检测阈值", this);
|
||||
audioInputProgressBarText->setToolTip("测试当前环境的静音阈值,用于对话中的静音检测");
|
||||
audioInputProgressBarText->setTextPixelSize(15);
|
||||
audioInputProgressBarLayout->addWidget(audioInputProgressBarText);
|
||||
audioInputProgressBarLayout->addWidget(audioInputProgressBar, 1);
|
||||
|
||||
Reference in New Issue
Block a user