Ever wanted to build a performant audio capturing system, but don't know where to start? It definitely happened to me.
Due to lack of modern C++ implementations on this, I decided to share what I've learned when making it and give you a starting group off of what I've managed to build!
DISCLAIMER: Knowledge about OOP, basic C++ principles or overall general programming knowledge and GRPC is advised!
The motivation
The motivation is for a way larger project I'm working on. The source code for all of the code is going to be linked below at the end of the post! 😊
The CMake configuration
Every modern C/C++ application requires some type of configuration, this is where I chose CMake since It's perfect for cross platform compilation!
# Define the minimum required version
# and the project version
cmake_minimum_required(VERSION 3.16)
project(audio_capture VERSION 1.0)
# Set C standards
set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
#Include additional common cmake code for working with GRPC
include(./cmake/common.cmake)
#Include FetchContent for fetching code and files remotely.
include(FetchContent)
# Find all .proto files in the protos directory
file(GLOB_RECURSE PROTO_FILES "${CMAKE_SOURCE_DIR}/protos/*.proto")
# Create lists to store generated files
set(PROTO_SRCS)
set(PROTO_HDRS)
set(GRPC_SRCS)
set(GRPC_HDRS)
# Generate protocol buffer and gRPC code for each .proto file
foreach(proto_file ${PROTO_FILES})
get_filename_component(proto_path "${proto_file}" PATH)
get_filename_component(proto_name "${proto_file}" NAME_WE)
list(APPEND PROTO_SRCS "${CMAKE_CURRENT_BINARY_DIR}/${proto_name}.pb.cc")
list(APPEND PROTO_HDRS "${CMAKE_CURRENT_BINARY_DIR}/${proto_name}.pb.h")
list(APPEND GRPC_SRCS "${CMAKE_CURRENT_BINARY_DIR}/${proto_name}.grpc.pb.cc")
list(APPEND GRPC_HDRS "${CMAKE_CURRENT_BINARY_DIR}/${proto_name}.grpc.pb.h")
add_custom_command(
OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${proto_name}.pb.cc"
"${CMAKE_CURRENT_BINARY_DIR}/${proto_name}.pb.h"
"${CMAKE_CURRENT_BINARY_DIR}/${proto_name}.grpc.pb.cc"
"${CMAKE_CURRENT_BINARY_DIR}/${proto_name}.grpc.pb.h"
COMMAND ${_PROTOBUF_PROTOC}
ARGS --grpc_out=generate_mock_code=false:"${CMAKE_CURRENT_BINARY_DIR}" # Disable server stub
--cpp_out "${CMAKE_CURRENT_BINARY_DIR}"
-I "${proto_path}"
--plugin=protoc-gen-grpc="${_GRPC_CPP_PLUGIN_EXECUTABLE}"
"${proto_file}"
DEPENDS "${proto_file}")
endforeach()
# Create protocol buffer library
add_library(audio_proto
${GRPC_SRCS}
${GRPC_HDRS}
${PROTO_SRCS}
${PROTO_HDRS})
target_link_libraries(audio_proto
absl::check
${_REFLECTION}
${_GRPC_GRPCPP}
${_PROTOBUF_LIBPROTOBUF})
# Configure PortAudio
FetchContent_Declare(
portaudio
GIT_REPOSITORY https://github.com/PortAudio/portaudio.git
GIT_TAG v19.7.0
)
# Configure PortAudio build options
set(PA_BUILD_SHARED OFF CACHE BOOL "")
set(PA_BUILD_TESTS OFF CACHE BOOL "")
set(PA_BUILD_EXAMPLES OFF CACHE BOOL "")
# Make PortAudio available
FetchContent_MakeAvailable(portaudio)
# Configure spdlog
FetchContent_Declare(
spdlog
GIT_REPOSITORY https://github.com/gabime/spdlog.git
GIT_TAG v1.15.1
)
FetchContent_MakeAvailable(spdlog)
# Find FFTW3
find_package(PkgConfig REQUIRED)
pkg_check_modules(FFTW3 REQUIRED fftw3)
# Include directories
include_directories(
${CMAKE_CURRENT_BINARY_DIR}
${CMAKE_SOURCE_DIR}/includes
${FFTW3_INCLUDE_DIRS}
${portaudio_SOURCE_DIR}/source
)
# Create main executable
file(GLOB_RECURSE SRC_FILES src/*.cc)
add_executable(audio_capture ${SRC_FILES})
# Link dependencies
target_link_libraries(audio_capture PRIVATE
audio_proto
portaudio_static
spdlog::spdlog
${FFTW3_LIBRARIES}
absl::check
absl::flags
absl::flags_parse
absl::log
${_REFLECTION}
${_GRPC_GRPCPP}
${_PROTOBUF_LIBPROTOBUF}
)
# Platform-specific configuration
if(WIN32)
target_link_libraries(audio_capture PRIVATE winmm)
elseif(UNIX AND NOT APPLE)
find_package(Threads REQUIRED)
target_link_libraries(audio_capture PRIVATE
pthread
fftw3
)
endif()
# Add FFTW3 library directories
link_directories(${FFTW3_LIBRARY_DIRS})
The project architecture
The whole project architecture is quite simple, maintainable and made to scale.
|-- CMakeLists.txt
|-- Makefile
|-- cmake
| `-- common.cmake
|-- includes
| |-- audio_capture.h
| `-- logger.h
|-- protos
| `-- device_stream.proto
`-- src
|-- audio_capture.cc
|-- logger.cc
`-- main.cc
The logger
I've used the spdlog for beautiful logging messages that come with it!
Here is the .h definition (all of the source code is on github)
#pragma once
#include <cstddef>
#include <memory>
#include <spdlog/sinks/basic_file_sink.h>
#include <spdlog/spdlog.h>
#include <string>
class Logger {
public:
static void init();
static std::shared_ptr<Logger> get();
static constexpr size_t queue_items_max = 8192;
static constexpr size_t backing_thread_count = 1;
static constexpr size_t max_file_size = 10 * 1024 * 1024;
static constexpr size_t max_files = 5;
static constexpr std::string logs_path = "logs/app.log";
static void info(const std::string &message) {
logger_->info(message);
}
static void warn(const std::string &message) {
logger_->warn(message);
}
static void error(const std::string &message) {
logger_->error(message);
}
private:
static std::shared_ptr<spdlog::logger> logger_;
};
The logger implementation:
#include "logger.h"
#include "spdlog/async.h"
#include "spdlog/common.h"
#include "spdlog/sinks/rotating_file_sink.h"
#include "spdlog/sinks/stdout_color_sinks.h"
#include "spdlog/spdlog.h"
#include <iostream>
#include <memory>
// Initialize logger as nullptr
std::shared_ptr<spdlog::logger> Logger::logger_ = nullptr;
void Logger::init() {
try {
spdlog::init_thread_pool(queue_items_max, backing_thread_count);
auto console_sink = std::make_shared<spdlog::sinks::stdout_color_sink_mt>();
auto file_rotating_sink =
std::make_shared<spdlog::sinks::rotating_file_sink_mt>(
logs_path, max_file_size, max_files);
console_sink->set_level(spdlog::level::info);
file_rotating_sink->set_level(spdlog::level::debug);
logger_ = std::make_shared<spdlog::async_logger>(
"main_logger",
spdlog::sinks_init_list{console_sink, file_rotating_sink},
spdlog::thread_pool(), spdlog::async_overflow_policy::block);
spdlog::set_default_logger(logger_);
spdlog::set_pattern("[%Y-%m-%d %H:%M:%S] [%^%l%$] [thread %t] %v");
#ifdef NDEBUG
spdlog::set_level(spdlog::level::info);
#else
spdlog::set_level(spdlog::level::debug);
#endif
spdlog::info("Logger initialized successfully!");
} catch (const spdlog::spdlog_ex &ex) {
std::cerr << "Log initialization failed: " << ex.what() << std::endl;
exit(EXIT_FAILURE);
}
}
std::shared_ptr<Logger> Logger::get() {
static std::shared_ptr<Logger> instance = std::make_shared<Logger>();
return instance;
}
The protobuf GRPC messages & RPCs
The current implementation of this is very simple and straight forward. Both the server and the client share the same GRPC file and structure to communicate with streams.
syntax = "proto3";
package audio_stream;
service AudioStream {
rpc StreamAudio(stream AudioChunk) returns (StreamResponse);
}
message AudioChunk {
bytes audio_data = 1;
repeated double spectral_data = 2;
double energy = 3;
double zero_crossings = 4;
double speech_band_energy = 5;
bool voice_detected = 6;
int64 timestamp = 7;
}
message StreamResponse {
bool success = 1;
string response = 2;
}
The audio capture system
The audio capture system is implemented using portaudio.h which is a library used for cross platform audio development!
Here is my implementation of the .h file (header):
#pragma once
#include "portaudio.h"
#include <atomic>
#include <condition_variable>
#include <cstddef>
#include <memory>
#include <mutex>
#include <queue>
#include <unordered_map>
#include <vector>
#include "device_stream.grpc.pb.h" // gRPC generated header
// Forward declaration of Logger
class Logger;
// AudioCapture class for handling audio streaming
class AudioCapture {
public:
AudioCapture();
~AudioCapture();
static constexpr size_t sample_rate = 44100; // Audio sample rate
static constexpr size_t frames_per_buffer = 512; // Frames per buffer
static constexpr int numInputChannels = 1; // Number of input channels (mono)
static constexpr int numOutputChannels = 0; // Number of output channels (no output)
// Starts the audio stream
bool start();
// Stops the audio stream
void stop();
// Checks if the stream is running
bool isRunning() const;
// Retrieves available audio devices
std::unordered_map<size_t, std::shared_ptr<const PaDeviceInfo>> getDevices();
// Opens the audio stream with the specified device index
void openAudioStream(int deviceIndex);
// Gets the total number of audio devices
int getDeviceCount() const;
// Creates stream parameters for a given device and number of channels
std::unique_ptr<PaStreamParameters> createStreamParameters(int device, int numChannels);
// Sends the audio stream to the gRPC server
void sendAudioStream(std::shared_ptr<audio_stream::AudioStream::Stub> stub);
// Closes the audio stream
void closeAudioStream();
// Retrieves the next audio chunk from the queue
std::vector<float> getNextAudioChunk();
private:
// Audio callback function for PortAudio
static int audioCallback(const void *input, void *, size_t,
const PaStreamCallbackTimeInfo *,
PaStreamCallbackFlags, void *userData);
// Queue to hold audio chunks
std::queue<std::vector<float>> audioQueue;
// Mutex and condition variable for thread synchronization
std::mutex queueMutex;
std::condition_variable queueCondition;
// Atomic flag to manage the running state of the capture
std::atomic<bool> running_;
// Stream parameters for input and output
std::unique_ptr<PaStreamParameters> inputParameters;
std::unique_ptr<PaStreamParameters> outputParameters;
// Logger for logging events
std::shared_ptr<Logger> logger_;
// Pointer to the PortAudio stream
PaStream *stream_;
};
The implementation of audio capture:
// AudioCapture.cpp
#include "audio_capture.h"
#include "device_stream.pb.h"
#include "logger.h"
#include "portaudio.h"
#include <algorithm>
#include <cstddef>
#include <grpcpp/client_context.h>
#include <grpcpp/support/sync_stream.h>
#include <mutex>
#include <optional>
#include <string>
#include <utility>
#include <vector>
static void handleAudioError(PaError err, std::shared_ptr<Logger> logger) {
if (err != paNoError) {
logger->error("PortAudio error: " + std::string(Pa_GetErrorText(err)));
}
}
AudioCapture::AudioCapture()
: stream_(nullptr), running_(false), logger_(std::make_shared<Logger>()) {
PaError err = Pa_Initialize();
handleAudioError(err, logger_);
}
AudioCapture::~AudioCapture() {
PaError err = Pa_Terminate();
handleAudioError(err, logger_);
}
bool AudioCapture::start() {
if (running_)
return false;
handleAudioError(Pa_OpenDefaultStream(
&stream_, numInputChannels, numOutputChannels, paFloat32,
sample_rate, frames_per_buffer, audioCallback, this),
logger_);
PaError err = Pa_StartStream(stream_);
if (err != paNoError) {
logger_->error("Failed to start audio stream: " +
std::string(Pa_GetErrorText(err)));
return false;
}
running_ = true;
return true;
}
void AudioCapture::stop() {
if (!running_) {
return;
}
handleAudioError(Pa_StopStream(stream_), logger_);
Pa_CloseStream(stream_);
running_ = false;
}
bool AudioCapture::isRunning() const { return running_; }
int AudioCapture::getDeviceCount() const {
PaDeviceIndex deviceCount = Pa_GetDeviceCount();
return static_cast<int>(deviceCount);
}
std::vector<float> AudioCapture::getNextAudioChunk() {
std::unique_lock<std::mutex> lock(queueMutex);
queueCondition.wait(lock,
[this] { return !audioQueue.empty() || !running_; });
if (!audioQueue.empty()) {
std::vector<float> chunk = std::move(audioQueue.front());
audioQueue.pop();
return chunk;
}
return std::vector<float>();
}
void AudioCapture::closeAudioStream() {
if (Pa_IsStreamActive(stream_) == 1) {
Pa_StopStream(stream_);
}
Pa_CloseStream(stream_);
stream_ = nullptr;
}
void AudioCapture::openAudioStream(int deviceIndex) {
if (stream_) {
if (Pa_IsStreamActive(stream_) == 1) {
handleAudioError(Pa_StopStream(stream_), logger_);
}
Pa_CloseStream(stream_);
stream_ = nullptr;
}
// Reset parameters
inputParameters.reset();
outputParameters.reset();
if (deviceIndex < 0 || deviceIndex >= getDeviceCount()) {
logger_->error("Invalid device index");
}
const PaDeviceInfo *deviceInfo = Pa_GetDeviceInfo(deviceIndex);
if (!deviceIndex) {
logger_->error("Failed to get device info");
return;
}
logger_->info("Device Info: ");
logger_->info("Name: " + std::string(deviceInfo->name));
logger_->info("Max Input Channels: " +
std::to_string(deviceInfo->maxInputChannels));
logger_->info("Max Output Channels: " +
std::to_string(deviceInfo->maxOutputChannels));
logger_->info("Default Sample Rate: " +
std::to_string(deviceInfo->defaultSampleRate));
std::optional<PaStreamParameters> inputParams;
std::optional<PaStreamParameters> outputParams;
if (deviceInfo->maxInputChannels > 0) {
inputParameters =
createStreamParameters(deviceIndex, deviceInfo->maxInputChannels);
inputParams = *inputParameters;
}
if (deviceInfo->maxOutputChannels > 0) {
outputParameters =
createStreamParameters(deviceIndex, deviceInfo->maxOutputChannels);
outputParams = *outputParameters;
}
double deviceSampleRate = deviceInfo->defaultSampleRate;
handleAudioError(
Pa_OpenStream(&stream_, inputParams ? &*inputParams : nullptr,
outputParams ? &*outputParams : nullptr, deviceSampleRate,
frames_per_buffer, paClipOff, audioCallback, this),
logger_);
std::string deviceName = std::string(deviceInfo->name);
logger_->info("Stream successfully opened for device" + deviceName);
Pa_StartStream(stream_);
}
std::unique_ptr<PaStreamParameters>
AudioCapture::createStreamParameters(int device, int numChannels) {
auto parameters = std::make_unique<PaStreamParameters>();
const PaDeviceInfo *deviceInfo = Pa_GetDeviceInfo(device);
if (!deviceInfo) {
logger_->error("Failed to get device information for device: " +
std::to_string(device));
return nullptr;
}
int channels = std::min(numChannels, deviceInfo->maxInputChannels);
parameters->device = device;
parameters->channelCount = channels;
parameters->sampleFormat = paFloat32;
parameters->suggestedLatency = deviceInfo->defaultHighInputLatency;
parameters->hostApiSpecificStreamInfo = nullptr;
return parameters;
}
std::unordered_map<size_t, std::shared_ptr<const PaDeviceInfo>>
AudioCapture::getDevices() {
std::unordered_map<size_t, std::shared_ptr<const PaDeviceInfo>> devices;
PaDeviceIndex deviceCount = Pa_GetDeviceCount();
if (deviceCount < 0) {
logger_->error("Failed to retrieve PortAudio device count: " +
std::to_string(deviceCount));
return devices;
}
for (size_t i = 0; i < static_cast<size_t>(deviceCount); i++) {
const PaDeviceInfo *deviceInfo = Pa_GetDeviceInfo(i);
if (deviceInfo) {
devices.emplace(i, std::make_shared<const PaDeviceInfo>(*deviceInfo));
} else {
logger_->warn("Warning: Failed to get device info for device " +
std::to_string(i));
}
}
return devices;
}
int AudioCapture::audioCallback(const void *input, void *, size_t,
const PaStreamCallbackTimeInfo *,
PaStreamCallbackFlags, void *userData) {
if (!input) {
return paContinue;
}
auto *self = static_cast<AudioCapture *>(userData);
std::vector<float> buffer(static_cast<const float *>(input),
static_cast<const float *>(input) +
frames_per_buffer);
{
std::lock_guard<std::mutex> lock(self->queueMutex);
self->audioQueue.push(buffer);
}
self->queueCondition.notify_one();
return paContinue;
}
void AudioCapture::sendAudioStream(
std::shared_ptr<audio_stream::AudioStream::Stub> stub) {
grpc::ClientContext context;
audio_stream::StreamResponse response; // Define the response here
// Pass the response pointer as the second argument to StreamAudio
std::unique_ptr<grpc::ClientWriter<audio_stream::AudioChunk>> writer(
stub->StreamAudio(&context, &response));
if (!writer) {
logger_->error("Failed to create gRPC writer.");
return;
}
// Sending audio chunks in a stream
while (isRunning()) {
std::vector<float> chunk = getNextAudioChunk();
if (!chunk.empty()) {
audio_stream::AudioChunk audioChunk;
// Convert float vector to byte array (audio data)
std::string audioData(reinterpret_cast<const char *>(chunk.data()),
chunk.size() * sizeof(float));
audioChunk.set_audio_data(audioData);
// Fill in other fields like spectral data, energy, etc.
audioChunk.add_spectral_data(0.0); // Example spectral data
audioChunk.set_energy(0.0); // Example energy
audioChunk.set_zero_crossings(0.0); // Example zero crossings
audioChunk.set_speech_band_energy(0.0); // Example speech band energy
audioChunk.set_voice_detected(false); // Example voice detection
audioChunk.set_timestamp(0); // Example timestamp
if (!writer->Write(audioChunk)) {
logger_->error("Failed to send audio chunk.");
break;
}
}
// Optional: sleep to avoid overloading the server
std::this_thread::sleep_for(std::chrono::milliseconds(10)); // Adjust as necessary
}
writer->WritesDone();
// Now calling Finish with response object passed as the first argument
grpc::Status status = writer->Finish();
if (!status.ok()) {
logger_->error("gRPC failed: " + status.error_message());
} else {
// Optionally handle the StreamResponse from the server
logger_->info("Audio stream successfully sent and received response.");
// If you want to log response details, you can use `response` here.
}
}
The main
The main executable then connects to a GRPC server (which iss written in Golang) which is out of the scope of this blog post, so I will include the source code for it in the github repository.
#include "audio_capture.h"
#include "device_stream.grpc.pb.h"
#include "logger.h"
#include <cstdlib>
#include <grpcpp/grpcpp.h>
int main() {
Logger::init();
auto logger = Logger::get();
AudioCapture audioCapture; // Only one instance is needed
auto devices = audioCapture.getDevices();
if (devices.empty()) {
logger->error("No audio devices found.");
return EXIT_FAILURE;
}
logger->info("Available Audio Devices");
for (const auto &[index, deviceInfo] : devices) {
std::string deviceName =
"Device " + std::to_string(index) + ": " + deviceInfo->name + "\n";
logger->info(deviceName);
logger->warn("Opening audio stream on device " + std::to_string(index));
audioCapture.openAudioStream(index);
}
// Create a gRPC channel to the server
std::shared_ptr<grpc::Channel> channel = grpc::CreateChannel(
"localhost:50051", grpc::InsecureChannelCredentials());
// Create the gRPC client stub
std::shared_ptr<audio_stream::AudioStream::Stub> stub =
audio_stream::AudioStream::NewStub(channel);
// Start capturing audio
audioCapture.start();
// Send audio stream to the external gRPC server
audioCapture.sendAudioStream(stub);
return 0;
}
Running and building the code
For running and building the code I use a simple Makefile with a few commands:
# Compiler optimizations
CMAKE_FLAGS=-G Ninja -DCMAKE_BUILD_TYPE=Release
BUILD_DIR=build
NUM_CORES=$(shell nproc)
.PHONY: build run run_detailed rerun remove rebuild
build:
@mkdir -p $(BUILD_DIR)
@cd $(BUILD_DIR) && cmake $(CMAKE_FLAGS) .. && cmake --build . -j$(NUM_CORES)
run:
@cd $(BUILD_DIR) && ./audio_capture 2>/dev/null
run_detailed:
@cd $(BUILD_DIR) && ./audio_capture
rerun: rebuild run
remove:
@rm -rf $(BUILD_DIR)
rebuild:
@cd $(BUILD_DIR) && cmake --build . --clean-first -j$(NUM_CORES) || { rm -rf $(BUILD_DIR) && make build; }
Running the code should be as easy as make $COMMAND
Thank you for reading this far ⭐
I want to thank you, the reader, for reading this far! If you've made it this far, here is the link to the repository of all of the source code 😊!
Top comments (1)
Sexy man