327 lines
11 KiB
C++
327 lines
11 KiB
C++
// Copyright 2007-2023 The Mumble Developers. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style license
|
|
// that can be found in the LICENSE file at the root of the
|
|
// Mumble source tree or at <https://www.mumble.info/LICENSE>.
|
|
|
|
#ifndef MUMBLE_MUMBLE_AUDIOINPUT_H_
|
|
#define MUMBLE_MUMBLE_AUDIOINPUT_H_
|
|
|
|
#include <QElapsedTimer>
|
|
#include <QObject>
|
|
#include <QThread>
|
|
|
|
#include <boost/array.hpp>
|
|
#include <boost/shared_ptr.hpp>
|
|
|
|
#include <cstdint>
|
|
#include <fstream>
|
|
#include <list>
|
|
#include <memory>
|
|
#include <mutex>
|
|
#include <vector>
|
|
|
|
#include <speex/speex_echo.h>
|
|
#include <speex/speex_preprocess.h>
|
|
#include <speex/speex_resampler.h>
|
|
|
|
#include "Audio.h"
|
|
#include "AudioOutputToken.h"
|
|
#include "EchoCancelOption.h"
|
|
#include "MumbleProtocol.h"
|
|
#include "Settings.h"
|
|
#include "Timer.h"
|
|
|
|
class AudioInput;
|
|
struct OpusEncoder;
|
|
struct ReNameNoiseDenoiseState;
|
|
typedef boost::shared_ptr< AudioInput > AudioInputPtr;
|
|
|
|
/**
|
|
* A chunk of audio data to process
|
|
* This struct wraps pointers to two dynamically allocated arrays, containing
|
|
* PCM samples of microphone and speaker readback data (for echo cancellation).
|
|
* Does not handle pointer ownership, so you'll have to deallocate them yourself.
|
|
*/
|
|
struct AudioChunk {
|
|
AudioChunk() : mic(nullptr), speaker(nullptr) {}
|
|
explicit AudioChunk(short *mic) : mic(mic), speaker(nullptr) {}
|
|
AudioChunk(short *mic, short *speaker) : mic(mic), speaker(speaker) {}
|
|
bool empty() const { return mic == nullptr; }
|
|
|
|
short *mic; ///< Pointer to microphone samples
|
|
short *speaker; ///< Pointer to speaker samples, nullptr if echo cancellation is disabled
|
|
};
|
|
|
|
/*
|
|
* According to https://www.speex.org/docs/manual/speex-manual/node7.html
|
|
* "It is important that, at any time, any echo that is present in the input
|
|
* has already been sent to the echo canceller as echo_frame."
|
|
* Thus, we artificially introduce a small lag in the microphone by means of
|
|
* a queue, so as to be sure the speaker data always precedes the microphone.
|
|
*
|
|
* There are conflicting requirements for the queue:
|
|
* - it has to be small enough not to cause a noticeable lag in the voice
|
|
* - it has to be large enough not to force us to drop packets frequently
|
|
* when the addMic() and addEcho() callbacks are called in a jittery way
|
|
* - its fill level must be controlled so it does not operate towards zero
|
|
* elements size, as this would not provide the lag required for the
|
|
* echo canceller to work properly.
|
|
*
|
|
* The current implementation uses a 5 elements queue, with a control
|
|
* statemachine that introduces packet drops to control the fill level
|
|
* to at least 2 (plus or minus one) and less than 4 elements.
|
|
* With a 10ms chunk, this queue should introduce a ~20ms lag to the voice.
|
|
*/
|
|
class Resynchronizer {
|
|
public:
|
|
/**
|
|
* Add a microphone sample to the resynchronizer queue
|
|
* The resynchronizer may decide to drop the sample, and in that case
|
|
* the pointer will be deallocated not lo leak memory
|
|
*
|
|
* \param mic pointer to a dynamically allocated array with PCM data
|
|
*/
|
|
void addMic(short *mic);
|
|
|
|
/**
|
|
* Add a speaker sample to the resynchronizer
|
|
* The resynchronizer may decide to drop the sample, and in that case
|
|
* the pointer will be deallocated not lo leak memory
|
|
*
|
|
* \param mic pointer to a dynamically allocated array with PCM data
|
|
* \return If microphone data is available, the resynchronizer will return a
|
|
* valid audio chunk to encode, otherwise an empty chunk will be returned
|
|
*/
|
|
AudioChunk addSpeaker(short *speaker);
|
|
|
|
/**
|
|
* Reinitialize the resynchronizer, emptying the queue in the process.
|
|
*/
|
|
void reset();
|
|
|
|
/**
|
|
* \return the nominal lag that the resynchronizer tries to enforce on the
|
|
* microphone data, in order to make sure the speaker data is always passed
|
|
* first to the echo canceller
|
|
*/
|
|
int getNominalLag() const { return 2; }
|
|
|
|
~Resynchronizer();
|
|
|
|
bool bDebugPrintQueue = false; ///< Enables printing queue fill level stats
|
|
|
|
private:
|
|
/**
|
|
* Print queue level stats for debugging purposes
|
|
* \param mic used to distinguish between addMic() and addSpeaker()
|
|
*/
|
|
void printQueue(char who);
|
|
|
|
// TODO: there was a mutex (qmEcho), but can the callbacks be called concurrently?
|
|
mutable std::mutex m;
|
|
std::list< short * > micQueue; ///< Queue of microphone samples
|
|
enum { S0, S1a, S1b, S2, S3, S4a, S4b, S5 } state = S0; ///< Queue fill control statemachine
|
|
};
|
|
|
|
class AudioInputRegistrar {
|
|
private:
|
|
Q_DISABLE_COPY(AudioInputRegistrar)
|
|
public:
|
|
static QMap< QString, AudioInputRegistrar * > *qmNew;
|
|
static QString current;
|
|
static AudioInputPtr newFromChoice(QString choice = QString());
|
|
|
|
const QString name;
|
|
int priority;
|
|
|
|
/// A list of echo cancellation options available for this backend.
|
|
std::vector< EchoCancelOptionID > echoOptions;
|
|
|
|
AudioInputRegistrar(const QString &n, int priority = 0);
|
|
virtual ~AudioInputRegistrar();
|
|
virtual AudioInput *create() = 0;
|
|
virtual const QVariant getDeviceChoice() = 0;
|
|
virtual const QList< audioDevice > getDeviceChoices() = 0;
|
|
virtual void setDeviceChoice(const QVariant &, Settings &) = 0;
|
|
|
|
/// Check that given combination of echoOption and outputSystem combination is suitable for echo cancellation
|
|
virtual bool canEcho(EchoCancelOptionID echoOptionId, const QString &outputSystem) const = 0;
|
|
virtual bool canExclusive() const;
|
|
|
|
/**
|
|
* Check if Mumble's microphone access has been denied by the OS.
|
|
* Both Windows and macOS have builtin privacy safeguards that display a message asking for users'
|
|
* consent when apps are trying to use the microphone, and/or provide ways to deny the microphone
|
|
* access of some apps.
|
|
* This function should check if Mumble has the permission to use the microphone.
|
|
* Note: It is possible that this result could only be known after trying to initialize the audio backend.
|
|
* Generally, call this function after attempts to initialize the AudioInput have been made.
|
|
* @return true if microphone access is denied.
|
|
*/
|
|
virtual bool isMicrophoneAccessDeniedByOS() = 0;
|
|
};
|
|
|
|
class AudioInput : public QThread {
|
|
friend class AudioNoiseWidget;
|
|
friend class AudioEchoWidget;
|
|
friend class AudioStats;
|
|
friend class AudioInputDialog;
|
|
|
|
private:
|
|
Q_OBJECT
|
|
Q_DISABLE_COPY(AudioInput)
|
|
protected:
|
|
typedef enum { SampleShort, SampleFloat } SampleFormat;
|
|
typedef void (*inMixerFunc)(float *RESTRICT, const void *RESTRICT, unsigned int, unsigned int, quint64);
|
|
|
|
private:
|
|
bool bDebugDumpInput; ///< When true, dump pcm data to debug the echo canceller
|
|
std::ofstream outMic, outSpeaker, outProcessed; ///< Files to dump raw pcm data
|
|
|
|
SpeexResamplerState *srsMic, *srsEcho;
|
|
|
|
std::unique_ptr< Mumble::Protocol::byte[] > m_legacyBuffer;
|
|
Mumble::Protocol::UDPAudioEncoder< Mumble::Protocol::Role::Client > m_udpEncoder;
|
|
|
|
unsigned int iMicFilled, iEchoFilled;
|
|
inMixerFunc imfMic, imfEcho;
|
|
inMixerFunc chooseMixer(const unsigned int nchan, SampleFormat sf, quint64 mask);
|
|
void resetAudioProcessor();
|
|
|
|
OpusEncoder *opusState;
|
|
#ifdef USE_RENAMENOISE
|
|
ReNameNoiseDenoiseState *denoiseState;
|
|
#endif
|
|
bool selectCodec();
|
|
void selectNoiseCancel();
|
|
|
|
typedef boost::array< unsigned char, 960 > EncodingOutputBuffer;
|
|
|
|
int encodeOpusFrame(short *source, int size, EncodingOutputBuffer &buffer);
|
|
|
|
QElapsedTimer qetLastMuteCue;
|
|
|
|
AudioOutputToken m_activeAudioCue;
|
|
|
|
protected:
|
|
Mumble::Protocol::AudioCodec m_codec;
|
|
SampleFormat eMicFormat, eEchoFormat;
|
|
|
|
unsigned int iMicChannels, iEchoChannels;
|
|
unsigned int iMicFreq, iEchoFreq;
|
|
unsigned int iMicLength, iEchoLength;
|
|
unsigned int iMicSampleSize, iEchoSampleSize;
|
|
unsigned int iEchoMCLength, iEchoFrameSize;
|
|
quint64 uiMicChannelMask, uiEchoChannelMask;
|
|
|
|
bool bEchoMulti;
|
|
Settings::NoiseCancel noiseCancel;
|
|
// Standard microphone sample rate (samples/s)
|
|
static const unsigned int iSampleRate = SAMPLE_RATE;
|
|
/// Based the sample rate, 48,000 samples/s = 48 samples/ms.
|
|
/// For each 10 ms, this yields 480 samples. This corresponds numerically with the calculation:
|
|
/// iFrameSize = 48000 / 100 = 480 samples, allowing a consistent 10ms of audio data per frame.
|
|
static const int iFrameSize = SAMPLE_RATE / 100;
|
|
|
|
QMutex qmSpeex;
|
|
SpeexPreprocessState *sppPreprocess;
|
|
SpeexEchoState *sesEcho;
|
|
|
|
/// bResetEncoder is a flag that notifies
|
|
/// our encoder functions that the encoder
|
|
/// needs to be reset.
|
|
bool bResetEncoder;
|
|
|
|
/// Encoded audio rate in bit/s
|
|
int iAudioQuality;
|
|
bool bAllowLowDelay;
|
|
/// Number of 10ms audio "frames" per packet (!= frames in packet)
|
|
int iAudioFrames;
|
|
|
|
/// The minimum time in ms that has to pass between the playback of two consecutive mute cues.
|
|
static constexpr unsigned int MUTE_CUE_DELAY = 5000;
|
|
|
|
float *pfMicInput;
|
|
float *pfEchoInput;
|
|
|
|
Resynchronizer resync;
|
|
std::vector< short > opusBuffer;
|
|
|
|
void encodeAudioFrame(AudioChunk chunk);
|
|
void addMic(const void *data, unsigned int nsamp);
|
|
void addEcho(const void *data, unsigned int nsamp);
|
|
|
|
volatile bool bRunning;
|
|
volatile bool bPreviousVoice;
|
|
volatile bool previousPTT;
|
|
|
|
int iFrameCounter;
|
|
int iSilentFrames;
|
|
int iHoldFrames;
|
|
int iBufferedFrames;
|
|
|
|
QList< QByteArray > qlFrames;
|
|
void flushCheck(const QByteArray &, bool terminator, std::int32_t voiceTargetID);
|
|
|
|
void initializeMixer();
|
|
|
|
static void adjustBandwidth(int bitspersec, int &bitrate, int &frames, bool &allowLowDelay);
|
|
|
|
bool bUserIsMuted;
|
|
|
|
signals:
|
|
void doDeaf();
|
|
void doMute();
|
|
void doMuteCue();
|
|
/// A signal emitted if audio input is being encountered
|
|
///
|
|
/// @param inputPCM The encountered input PCM
|
|
/// @param sampleCount The amount of samples in the input
|
|
/// @param channelCount The amount of channels in the input
|
|
/// @param sampleRate The used sample rate in Hz
|
|
/// @param isSpeech Whether Mumble considers the input to be speech
|
|
void audioInputEncountered(short *inputPCM, unsigned int sampleCount, unsigned int channelCount,
|
|
unsigned int sampleRate, bool isSpeech);
|
|
|
|
public:
|
|
typedef enum { ActivityStateIdle, ActivityStateReturnedFromIdle, ActivityStateActive } ActivityState;
|
|
|
|
ActivityState activityState;
|
|
|
|
bool bResetProcessor;
|
|
|
|
Timer tIdle;
|
|
|
|
int iBitrate;
|
|
float dPeakSpeaker, dPeakSignal, dMaxMic, dPeakMic, dPeakCleanMic;
|
|
float fSpeechProb;
|
|
|
|
static int getNetworkBandwidth(int bitrate, int frames);
|
|
static void setMaxBandwidth(int bitspersec);
|
|
|
|
/// Construct an AudioInput.
|
|
///
|
|
/// This constructor is only ever called by Audio::startInput(), and is guaranteed
|
|
/// to be called on the application's main thread.
|
|
AudioInput();
|
|
|
|
/// Destroy an AudioInput.
|
|
///
|
|
/// This destructor is only ever called by Audio::stopInput() and Audio::stop(),
|
|
/// and is guaranteed to be called on the application's main thread.
|
|
~AudioInput() Q_DECL_OVERRIDE;
|
|
void run() Q_DECL_OVERRIDE = 0;
|
|
virtual bool isAlive() const;
|
|
bool isTransmitting() const;
|
|
|
|
void updateUserMuteDeafState(const ClientUser *user);
|
|
|
|
protected:
|
|
virtual void onUserMutedChanged();
|
|
|
|
public slots:
|
|
void onUserMuteDeafStateChanged();
|
|
};
|
|
|
|
#endif
|