Skip to content

Commit 26d7725

Browse files
authored
Audio visualization helpers (#474)
1 parent 4a73d39 commit 26d7725

File tree

7 files changed

+363
-51
lines changed

7 files changed

+363
-51
lines changed

Sources/LiveKit/Convenience/AudioProcessing.swift

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,3 +96,122 @@ public extension Sequence where Iterator.Element == AudioLevel {
9696
peak: totalSums.peakSum / Float(count))
9797
}
9898
}
99+
100+
public class AudioVisualizeProcessor {
101+
static let bufferSize = 1024
102+
103+
// MARK: - Public
104+
105+
public let minFrequency: Float
106+
public let maxFrequency: Float
107+
public let minDB: Float
108+
public let maxDB: Float
109+
public let bandsCount: Int
110+
public let isCentered: Bool
111+
public let smoothingFactor: Float
112+
113+
private var bands: [Float]?
114+
115+
// MARK: - Private
116+
117+
private let ringBuffer = RingBuffer<Float>(size: AudioVisualizeProcessor.bufferSize)
118+
private let processor: FFTProcessor
119+
120+
public init(minFrequency: Float = 10,
121+
maxFrequency: Float = 8000,
122+
minDB: Float = -32.0,
123+
maxDB: Float = 32.0,
124+
bandsCount: Int = 100,
125+
isCentered: Bool = false,
126+
smoothingFactor: Float = 0.3) // Smoothing factor for smoother transitions
127+
{
128+
self.minFrequency = minFrequency
129+
self.maxFrequency = maxFrequency
130+
self.minDB = minDB
131+
self.maxDB = maxDB
132+
self.bandsCount = bandsCount
133+
self.isCentered = isCentered
134+
self.smoothingFactor = smoothingFactor
135+
136+
processor = FFTProcessor(bufferSize: Self.bufferSize)
137+
bands = [Float](repeating: 0.0, count: bandsCount)
138+
}
139+
140+
public func process(pcmBuffer: AVAudioPCMBuffer) -> [Float]? {
141+
guard let pcmBuffer = pcmBuffer.convert(toCommonFormat: .pcmFormatFloat32) else { return nil }
142+
guard let floatChannelData = pcmBuffer.floatChannelData else { return nil }
143+
144+
// Get the float array.
145+
let floats = Array(UnsafeBufferPointer(start: floatChannelData[0], count: Int(pcmBuffer.frameLength)))
146+
ringBuffer.write(floats)
147+
148+
// Get full-size buffer if available, otherwise return
149+
guard let buffer = ringBuffer.read() else { return nil }
150+
151+
// Process FFT and compute frequency bands
152+
let fftRes = processor.process(buffer: buffer)
153+
let bands = fftRes.computeBands(
154+
minFrequency: minFrequency,
155+
maxFrequency: maxFrequency,
156+
bandsCount: bandsCount,
157+
sampleRate: Float(pcmBuffer.format.sampleRate)
158+
)
159+
160+
let headroom = maxDB - minDB
161+
162+
// Normalize magnitudes (already in decibels)
163+
var normalizedBands = bands.magnitudes.map { magnitude in
164+
let adjustedMagnitude = max(0, magnitude + abs(minDB))
165+
return min(1.0, adjustedMagnitude / headroom)
166+
}
167+
168+
// If centering is enabled, rearrange the normalized bands
169+
if isCentered {
170+
normalizedBands.sort(by: >)
171+
normalizedBands = centerBands(normalizedBands)
172+
}
173+
174+
// Smooth transition using an easing function
175+
self.bands = zip(self.bands ?? [], normalizedBands).map { old, new in
176+
_smoothTransition(from: old, to: new, factor: smoothingFactor)
177+
}
178+
179+
return self.bands
180+
}
181+
182+
/// Centers the sorted bands by placing higher values in the middle.
183+
private func centerBands(_ sortedBands: [Float]) -> [Float] {
184+
var centeredBands = [Float](repeating: 0, count: sortedBands.count)
185+
var leftIndex = sortedBands.count / 2
186+
var rightIndex = leftIndex
187+
188+
for (index, value) in sortedBands.enumerated() {
189+
if index % 2 == 0 {
190+
// Place value to the right
191+
centeredBands[rightIndex] = value
192+
rightIndex += 1
193+
} else {
194+
// Place value to the left
195+
leftIndex -= 1
196+
centeredBands[leftIndex] = value
197+
}
198+
}
199+
200+
return centeredBands
201+
}
202+
203+
/// Applies an easing function to smooth the transition.
204+
private func _smoothTransition(from oldValue: Float, to newValue: Float, factor: Float) -> Float {
205+
// Calculate the delta change between the old and new value
206+
let delta = newValue - oldValue
207+
// Apply an ease-in-out cubic easing curve
208+
let easedFactor = _easeInOutCubic(t: factor)
209+
// Calculate and return the smoothed value
210+
return oldValue + delta * easedFactor
211+
}
212+
213+
/// Easing function: ease-in-out cubic
214+
private func _easeInOutCubic(t: Float) -> Float {
215+
t < 0.5 ? 4 * t * t * t : 1 - pow(-2 * t + 2, 3) / 2
216+
}
217+
}

Sources/LiveKit/Protocols/AudioCustomProcessingDelegate.swift

Lines changed: 10 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -39,31 +39,29 @@ public protocol AudioCustomProcessingDelegate {
3939
func audioProcessingRelease()
4040
}
4141

42-
class AudioCustomProcessingDelegateAdapter: NSObject, LKRTCAudioCustomProcessingDelegate {
42+
class AudioCustomProcessingDelegateAdapter: MulticastDelegate<AudioRenderer>, LKRTCAudioCustomProcessingDelegate {
4343
// MARK: - Public
4444

4545
public var target: AudioCustomProcessingDelegate? { _state.target }
4646

47-
// MARK: - Internal
48-
49-
let audioRenderers = MulticastDelegate<AudioRenderer>(label: "AudioRenderer")
50-
5147
// MARK: - Private
5248

5349
private struct State {
5450
weak var target: AudioCustomProcessingDelegate?
5551
}
5652

57-
private var _state: StateSync<State>
58-
59-
init(target: AudioCustomProcessingDelegate? = nil) {
60-
_state = StateSync(State(target: target))
61-
}
53+
private var _state = StateSync(State())
6254

6355
public func set(target: AudioCustomProcessingDelegate?) {
6456
_state.mutate { $0.target = target }
6557
}
6658

59+
init() {
60+
super.init(label: "AudioCustomProcessingDelegateAdapter")
61+
}
62+
63+
// MARK: - AudioCustomProcessingDelegate
64+
6765
func audioProcessingInitialize(sampleRate sampleRateHz: Int, channels: Int) {
6866
target?.audioProcessingInitialize(sampleRate: sampleRateHz, channels: channels)
6967
}
@@ -73,24 +71,12 @@ class AudioCustomProcessingDelegateAdapter: NSObject, LKRTCAudioCustomProcessing
7371
target?.audioProcessingProcess(audioBuffer: lkAudioBuffer)
7472

7573
// Convert to pcmBuffer and notify only if an audioRenderer is added.
76-
if audioRenderers.isDelegatesNotEmpty, let pcmBuffer = lkAudioBuffer.toAVAudioPCMBuffer() {
77-
audioRenderers.notify { $0.render(pcmBuffer: pcmBuffer) }
74+
if isDelegatesNotEmpty, let pcmBuffer = lkAudioBuffer.toAVAudioPCMBuffer() {
75+
notify { $0.render(pcmBuffer: pcmBuffer) }
7876
}
7977
}
8078

8179
func audioProcessingRelease() {
8280
target?.audioProcessingRelease()
8381
}
84-
85-
// Proxy the equality operators
86-
87-
override func isEqual(_ object: Any?) -> Bool {
88-
guard let other = object as? AudioCustomProcessingDelegateAdapter else { return false }
89-
return target === other.target
90-
}
91-
92-
override var hash: Int {
93-
guard let target else { return 0 }
94-
return ObjectIdentifier(target).hashValue
95-
}
9682
}

Sources/LiveKit/Protocols/AudioRenderer.swift

Lines changed: 8 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -29,26 +29,17 @@ public protocol AudioRenderer {
2929
func render(pcmBuffer: AVAudioPCMBuffer)
3030
}
3131

32-
class AudioRendererAdapter: NSObject, LKRTCAudioRenderer {
33-
private weak var target: AudioRenderer?
34-
private let targetHashValue: Int
32+
class AudioRendererAdapter: MulticastDelegate<AudioRenderer>, LKRTCAudioRenderer {
33+
//
34+
typealias Delegate = AudioRenderer
3535

36-
init(target: AudioRenderer) {
37-
self.target = target
38-
targetHashValue = ObjectIdentifier(target).hashValue
36+
init() {
37+
super.init(label: "AudioRendererAdapter")
3938
}
4039

41-
func render(pcmBuffer: AVAudioPCMBuffer) {
42-
target?.render(pcmBuffer: pcmBuffer)
43-
}
40+
// MARK: - LKRTCAudioRenderer
4441

45-
// Proxy the equality operators
46-
override func isEqual(_ object: Any?) -> Bool {
47-
guard let other = object as? AudioRendererAdapter else { return false }
48-
return targetHashValue == other.targetHashValue
49-
}
50-
51-
override var hash: Int {
52-
targetHashValue
42+
func render(pcmBuffer: AVAudioPCMBuffer) {
43+
notify { $0.render(pcmBuffer: pcmBuffer) }
5344
}
5445
}
Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
/*
2+
* Copyright 2024 LiveKit
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
import Accelerate
18+
import AVFoundation
19+
20+
extension Float {
21+
var nyquistFrequency: Float { self / 2.0 }
22+
}
23+
24+
public struct FFTComputeBandsResult {
25+
let count: Int
26+
let magnitudes: [Float]
27+
let frequencies: [Float]
28+
}
29+
30+
public class FFTResult {
31+
public let magnitudes: [Float]
32+
33+
init(magnitudes: [Float]) {
34+
self.magnitudes = magnitudes
35+
}
36+
37+
func computeBands(minFrequency: Float, maxFrequency: Float, bandsCount: Int, sampleRate: Float) -> FFTComputeBandsResult {
38+
let actualMaxFrequency = min(sampleRate.nyquistFrequency, maxFrequency)
39+
var bandMagnitudes = [Float](repeating: 0.0, count: bandsCount)
40+
var bandFrequencies = [Float](repeating: 0.0, count: bandsCount)
41+
42+
let magLowerRange = _magnitudeIndex(for: minFrequency, sampleRate: sampleRate)
43+
let magUpperRange = _magnitudeIndex(for: actualMaxFrequency, sampleRate: sampleRate)
44+
let ratio = Float(magUpperRange - magLowerRange) / Float(bandsCount)
45+
46+
return magnitudes.withUnsafeBufferPointer { magnitudesPtr in
47+
for i in 0 ..< bandsCount {
48+
let magsStartIdx = vDSP_Length(floorf(Float(i) * ratio)) + magLowerRange
49+
let magsEndIdx = vDSP_Length(floorf(Float(i + 1) * ratio)) + magLowerRange
50+
51+
let count = magsEndIdx - magsStartIdx
52+
if count > 0 {
53+
var sum: Float = 0
54+
vDSP_sve(magnitudesPtr.baseAddress! + Int(magsStartIdx), 1, &sum, count)
55+
bandMagnitudes[i] = sum / Float(count)
56+
} else {
57+
bandMagnitudes[i] = magnitudes[Int(magsStartIdx)]
58+
}
59+
60+
// Compute average frequency
61+
let bandwidth = sampleRate.nyquistFrequency / Float(magnitudes.count)
62+
bandFrequencies[i] = (bandwidth * Float(magsStartIdx) + bandwidth * Float(magsEndIdx)) / 2
63+
}
64+
65+
return FFTComputeBandsResult(count: bandsCount, magnitudes: bandMagnitudes, frequencies: bandFrequencies)
66+
}
67+
}
68+
69+
@inline(__always) private func _magnitudeIndex(for frequency: Float, sampleRate: Float) -> vDSP_Length {
70+
vDSP_Length(Float(magnitudes.count) * frequency / sampleRate.nyquistFrequency)
71+
}
72+
}
73+
74+
class FFTProcessor {
75+
public enum WindowType {
76+
case none
77+
case hanning
78+
case hamming
79+
}
80+
81+
public let bufferSize: vDSP_Length
82+
public let windowType: WindowType
83+
84+
private let bufferHalfSize: vDSP_Length
85+
private let bufferLog2Size: vDSP_Length
86+
private var window: [Float] = []
87+
private var fftSetup: FFTSetup
88+
private var realBuffer: [Float]
89+
private var imaginaryBuffer: [Float]
90+
private var zeroDBReference: Float = 1.0
91+
92+
init(bufferSize: Int, windowType: WindowType = .hanning) {
93+
self.bufferSize = vDSP_Length(bufferSize)
94+
self.windowType = windowType
95+
96+
bufferHalfSize = vDSP_Length(bufferSize / 2)
97+
bufferLog2Size = vDSP_Length(log2f(Float(bufferSize)))
98+
99+
realBuffer = [Float](repeating: 0.0, count: Int(bufferHalfSize))
100+
imaginaryBuffer = [Float](repeating: 0.0, count: Int(bufferHalfSize))
101+
window = [Float](repeating: 1.0, count: Int(bufferSize))
102+
103+
fftSetup = vDSP_create_fftsetup(UInt(bufferLog2Size), FFTRadix(FFT_RADIX2))!
104+
105+
switch windowType {
106+
case .none:
107+
break
108+
case .hanning:
109+
vDSP_hann_window(&window, vDSP_Length(bufferSize), Int32(vDSP_HANN_NORM))
110+
case .hamming:
111+
vDSP_hamm_window(&window, vDSP_Length(bufferSize), 0)
112+
}
113+
}
114+
115+
deinit {
116+
vDSP_destroy_fftsetup(fftSetup)
117+
}
118+
119+
func process(buffer: [Float]) -> FFTResult {
120+
precondition(buffer.count == Int(bufferSize), "Input buffer size mismatch.")
121+
122+
var windowedBuffer = [Float](repeating: 0.0, count: Int(bufferSize))
123+
124+
vDSP_vmul(buffer, 1, window, 1, &windowedBuffer, 1, bufferSize)
125+
126+
return realBuffer.withUnsafeMutableBufferPointer { realPtr in
127+
imaginaryBuffer.withUnsafeMutableBufferPointer { imagPtr in
128+
var complexBuffer = DSPSplitComplex(realp: realPtr.baseAddress!, imagp: imagPtr.baseAddress!)
129+
130+
windowedBuffer.withUnsafeBufferPointer { bufferPtr in
131+
let complexPtr = UnsafeRawPointer(bufferPtr.baseAddress!).bindMemory(to: DSPComplex.self, capacity: Int(bufferHalfSize))
132+
vDSP_ctoz(complexPtr, 2, &complexBuffer, 1, bufferHalfSize)
133+
}
134+
135+
vDSP_fft_zrip(fftSetup, &complexBuffer, 1, bufferLog2Size, FFTDirection(FFT_FORWARD))
136+
137+
var magnitudes = [Float](repeating: 0.0, count: Int(bufferHalfSize))
138+
vDSP_zvabs(&complexBuffer, 1, &magnitudes, 1, bufferHalfSize)
139+
140+
// Convert magnitudes to decibels
141+
vDSP_vdbcon(magnitudes, 1, &zeroDBReference, &magnitudes, 1, vDSP_Length(magnitudes.count), 1)
142+
143+
return FFTResult(magnitudes: magnitudes)
144+
}
145+
}
146+
}
147+
}

0 commit comments

Comments
 (0)