Intro
Especially when I don't use video with WebRTC, I can't identify who is vocalizing.
So I try to determine this from the client's volume this time.
Examples
Sharing connected client names
Because there is no specification for sharing client names in WebRTC, I will share them with SSE.
sseClient.go
...
type ClientName struct {
Name string `json:"name"`
}
type ClientNames struct {
Names []ClientName `json:"names"`
}
...
sseHub.go
...
func (h *SSEHub) run() {
...
for {
select {
case client := <-h.register:
h.clients[client] = true
signalPeerConnections(h)
sendClientNames(h)
case client := <-h.unregister:
if _, ok := h.clients[client]; ok {
delete(h.clients, client)
signalPeerConnections(h)
sendClientNames(h)
}
case track := <-h.addTrack:
...
}
}
}
...
func sendClientNames(h *SSEHub) {
names := ClientNames{
Names: make([]ClientName, len(h.clients)),
}
i := 0
for ps := range h.clients {
names.Names[i] = ClientName{
Name: ps.client.userName,
}
i += 1
}
message, err := NewClientNameMessageJSON(names)
if err != nil {
log.Printf("Error sendClientNames Message: %s", err.Error())
return
}
for ps := range h.clients {
flusher, _ := ps.client.w.(http.Flusher)
fmt.Fprintf(ps.client.w, "data: %s\n\n", message)
flusher.Flush()
}
}
main.view.ts
...
type ConnectedClient = {
name: ClientName,
element: HTMLElement,
};
export class MainView {
...
private clientArea: HTMLElement;
private connectedClients: ConnectedClient[];
public constructor() {
...
this.clientArea = document.getElementById("client_names") as HTMLElement;
this.connectedClients = new Array<ConnectedClient>();
}
...
public updateClientNames(names: ClientNames): void {
if(names == null) {
console.warn("updateClientNames were null");
return;
}
const newClients = new Array<ConnectedClient>();
for(const c of this.connectedClients) {
const clientName = c.name.name;
if(names.names.some(n => n.name === clientName)) {
newClients.push(c);
} else {
this.clientArea.removeChild(c.element);
}
}
for(const n of names.names) {
const clientName = n;
if(this.connectedClients.some(c => c.name.name === clientName.name) === false) {
const newElement = document.createElement("div");
newElement.textContent = clientName.name;
this.clientArea.appendChild(newElement);
this.connectedClients.push({
name: clientName,
element: newElement,
});
}
}
}
...
Getting audio level
Audio levels can be obtained in several ways.
And I also can get them from local media stream tracks or remote media stream tracks.
Because retrieving audio levels from the remote media stream tracks would require processing the same number of times as the number of connections, I decided to retrieve them from the local media stream tracks this time.
Getting audio level by "RTCPeerConnection.getStats()"
I can get statistics of RTCPeerConnection.
I can get audio levels from audio media stream tracks.
0: "RTCAudioSource_1"
1:
audioLevel: 0.15381328775902586
echoReturnLoss: -30
echoReturnLossEnhancement: 0.17551203072071075
id: "RTCAudioSource_1"
kind: "audio"
timestamp: 1659880489574
totalAudioEnergy: 0.06016985176246171
totalSamplesDuration: 2.1399999999999983
trackIdentifier: "f987f34e-ef52-4a27-a73e-910f00bfd090"
type: "media-source"
webrtc.controller.ts
...
public init(videoUsed: boolean) {
...
let audioTrack: MediaStreamTrack|null = null;
navigator.mediaDevices.getUserMedia({ video: videoUsed, audio: true })
.then(stream => {
this.webcamStream = stream;
const audios = this.webcamStream.getAudioTracks();
for(const a of audios) {
audioTrack = a;
}
});
setInterval(() => {
if(this.peerConnection == null ||
this.peerConnection.connectionState !== "connected") {
return;
}
this.peerConnection.getStats(audioTrack).then((stats) => {
for(const report of stats) {
for(const r of report) {
const audioLevel = this.getAudioLevel(r);
if(audioLevel != null &&
audioLevel > 0.0) {
// If the threshold established between 0 and 1 is exceeded,
// it is considered to be talking
console.log(audioLevel);
}
}
}
});
}, 500);
}
...
private getAudioLevel(stat: any): number|null {
if(stat == null ||
typeof stat !== "object") {
return null;
}
if(!("kind" in stat) ||
stat.kind !== "audio" ||
!("audioLevel" in stat)) {
return null;
}
if(typeof stat.audioLevel === "number") {
return stat.audioLevel;
}
if(typeof stat.audioLevel === "string") {
const parsedResult = parseFloat(stat.audioLevel);
if(isNaN(parsedResult) === false) {
return parsedResult;
}
}
return null;
}
}
- Identifiers for WebRTC's Statistics API - W3C
- WebRTC 1.0: Real-Time Communication Between Browsers - W3C
- WebRTC Statistics API - MDN
Because the code is redundant and runs in the main thread, I selected to use a different way.
AudioWorkletNode and AudioWorkletProcessor
I also can get audio levels by "AudioWorkletNode" and "AudioWorkletProcessor".
They provide custom nodes what are worked on "AudioWorkletGlobalScope".
To use them, I will need to add a JavaScript file separate from the one running on the Main Global Scope.
To implement this, I refer to the sample of "GoogleChromeLabs".
volume-measurer-processor.js
// This code is based on GoogleChromeLabs/web-audio-samples(Copyright (c) 2022 The Chromium Authors) for reference
// https://github.com/GoogleChromeLabs/web-audio-samples
/* global currentTime */
const FRAME_INTERVAL = 1 / 60;
/**
* Measure microphone volume.
*
* @class VolumeMeter
* @extends AudioWorkletProcessor
*/
class VolumeMeasurer extends AudioWorkletProcessor {
constructor() {
super();
this._lastUpdate = currentTime;
}
calculateRMS(inputChannelData) {
// Calculate the squared-sum.
let sum = 0;
// the value of "inputChannelData.length" is 128 by default.
for (let i = 0; i < inputChannelData.length; i++) {
sum += inputChannelData[i] * inputChannelData[i];
}
// Calculate the RMS(Root Mean Square) level.
return Math.sqrt(sum / inputChannelData.length);
}
// "output" and "parameters" can be omitted
process(inputs) {
// This example only handles mono channel.
const inputChannelData = inputs[0][0];
// Calculate and post the RMS level every 16ms.
if (currentTime - this._lastUpdate > FRAME_INTERVAL) {
const volume = this.calculateRMS(inputChannelData);
this.port.postMessage(volume);
this._lastUpdate = currentTime;
}
return true;
}
}
registerProcessor("volume-measurer", VolumeMeasurer);
webrtc.controller.ts
...
export class WebRtcController {
private webcamStream: MediaStream | null = null;
private peerConnection: RTCPeerConnection | null = null;
...
private localAudioContext: AudioContext;
private localAudioNode: MediaStreamAudioSourceNode|null = null;
public constructor() {
this.localVideo = document.getElementById("local_video") as HTMLVideoElement;
this.localAudioContext = new AudioContext();
}
public init(videoUsed: boolean) {
...
navigator.mediaDevices.getUserMedia({ video: videoUsed, audio: true })
.then(async stream => {
this.webcamStream = stream;
// the AudioWorkletProcessor sub classes must be added as modules before creating AudioWorkletNode.
await this.localAudioContext.audioWorklet.addModule("./js/volume-measurer-processor.js");
// Create a MediaStreamAudioSourceNode and connect AudioWorkletNode to use the AudioWorkletProcessor sub classes.
this.localAudioNode = this.localAudioContext.createMediaStreamSource(stream);
const volumeMeterNode = new AudioWorkletNode(this.localAudioContext, "volume-measurer");
// MainGlobalScope and AudioWorkletGlobalScope are communicated by "postMessage" and "onmessage".
volumeMeterNode.port.onmessage = async ({data}) => {
if(this.peerConnection?.connectionState === "connected") {
// If the threshold established between 0 and 1 is exceeded,
// it is considered to be talking.
if(data > 0.05) {
console.log(`talking V:${data}`);
}
}
};
this.localAudioNode.connect(volumeMeterNode).connect(this.localAudioContext.destination);
});
}
...
public connect() {
...
this.peerConnection.onconnectionstatechange = () => {
if(this.peerConnection?.connectionState === "connected") {
// start VolumeMeasurer
this.localAudioContext.resume();
} else {
// stop VolumeMeasurer
this.localAudioContext.suspend();
}
};
...
}
}
Top comments (0)