Skip to content

Commit 485e76d

Browse files
authored
macOS screen share audio (livekit#561)
1 parent 76b6deb commit 485e76d

File tree

5 files changed

+227
-34
lines changed

5 files changed

+227
-34
lines changed
Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
/*
2+
* Copyright 2025 LiveKit
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
@preconcurrency import AVFoundation
18+
19+
#if swift(>=5.9)
20+
internal import LiveKitWebRTC
21+
#else
22+
@_implementationOnly import LiveKitWebRTC
23+
#endif
24+
25+
public final class DefaultMixerAudioObserver: AudioEngineObserver, Loggable {
26+
public var next: (any AudioEngineObserver)? {
27+
get { _state.next }
28+
set { _state.mutate { $0.next = newValue } }
29+
}
30+
31+
/// Adjust the volume of captured app audio. Range is 0.0 ~ 1.0.
32+
public var appVolume: Float {
33+
get { _state.read { $0.appMixerNode.outputVolume } }
34+
set { _state.mutate { $0.appMixerNode.outputVolume = newValue } }
35+
}
36+
37+
/// Adjust the volume of microphone audio. Range is 0.0 ~ 1.0.
38+
public var micVolume: Float {
39+
get { _state.read { $0.micMixerNode.outputVolume } }
40+
set { _state.mutate { $0.micMixerNode.outputVolume = newValue } }
41+
}
42+
43+
// MARK: - Internal
44+
45+
var appAudioNode: AVAudioPlayerNode {
46+
_state.read { $0.appNode }
47+
}
48+
49+
var micAudioNode: AVAudioPlayerNode {
50+
_state.read { $0.micNode }
51+
}
52+
53+
var isConnected: Bool {
54+
_state.read { $0.isConnected }
55+
}
56+
57+
struct State {
58+
var next: (any AudioEngineObserver)?
59+
60+
// AppAudio
61+
public let appNode = AVAudioPlayerNode()
62+
public let appMixerNode = AVAudioMixerNode()
63+
64+
// Not connected for device rendering mode.
65+
public let micNode = AVAudioPlayerNode()
66+
public let micMixerNode = AVAudioMixerNode()
67+
68+
public var isConnected: Bool = false
69+
}
70+
71+
let _state = StateSync(State())
72+
73+
public init() {}
74+
75+
public func setNext(_ handler: any AudioEngineObserver) {
76+
next = handler
77+
}
78+
79+
public func engineDidCreate(_ engine: AVAudioEngine) {
80+
let (appNode, appMixerNode, micNode, micMixerNode) = _state.read {
81+
($0.appNode, $0.appMixerNode, $0.micNode, $0.micMixerNode)
82+
}
83+
84+
engine.attach(appNode)
85+
engine.attach(appMixerNode)
86+
engine.attach(micNode)
87+
engine.attach(micMixerNode)
88+
89+
// Invoke next
90+
next?.engineDidCreate(engine)
91+
}
92+
93+
public func engineWillRelease(_ engine: AVAudioEngine) {
94+
// Invoke next
95+
next?.engineWillRelease(engine)
96+
97+
let (appNode, appMixerNode, micNode, micMixerNode) = _state.read {
98+
($0.appNode, $0.appMixerNode, $0.micNode, $0.micMixerNode)
99+
}
100+
101+
engine.detach(appNode)
102+
engine.detach(appMixerNode)
103+
engine.detach(micNode)
104+
engine.detach(micMixerNode)
105+
}
106+
107+
public func engineWillConnectInput(_ engine: AVAudioEngine, src: AVAudioNode?, dst: AVAudioNode, format: AVAudioFormat, context: [AnyHashable: Any]) {
108+
// Get the main mixer
109+
guard let mainMixerNode = context[kRTCAudioEngineInputMixerNodeKey] as? AVAudioMixerNode else {
110+
// If failed to get main mixer, call next and return.
111+
next?.engineWillConnectInput(engine, src: src, dst: dst, format: format, context: context)
112+
return
113+
}
114+
115+
// Read nodes from state lock.
116+
let (appNode, appMixerNode, micNode, micMixerNode) = _state.read {
117+
($0.appNode, $0.appMixerNode, $0.micNode, $0.micMixerNode)
118+
}
119+
120+
// TODO: Investigate if possible to get this format prior to starting screen capture.
121+
// <AVAudioFormat 0x600003055180: 2 ch, 48000 Hz, Float32, deinterleaved>
122+
let appAudioNodeFormat = AVAudioFormat(commonFormat: .pcmFormatFloat32,
123+
sampleRate: format.sampleRate, // Assume same sample rate
124+
channels: 2,
125+
interleaved: false)
126+
127+
log("Connecting app -> appMixer -> mainMixer")
128+
// appAudio -> appAudioMixer -> mainMixer
129+
engine.connect(appNode, to: appMixerNode, format: appAudioNodeFormat)
130+
engine.connect(appMixerNode, to: mainMixerNode, format: format)
131+
132+
// src is not null if device rendering mode.
133+
if let src {
134+
log("Connecting src (device) to micMixer -> mainMixer")
135+
// mic (device) -> micMixer -> mainMixer
136+
engine.connect(src, to: micMixerNode, format: format)
137+
}
138+
139+
// TODO: Investigate if possible to get this format prior to starting screen capture.
140+
let micNodeFormat = AVAudioFormat(commonFormat: .pcmFormatFloat32,
141+
sampleRate: format.sampleRate, // Assume same sample rate
142+
channels: 1, // Mono
143+
interleaved: false)
144+
145+
log("Connecting micAudio (player) to micMixer -> mainMixer")
146+
// mic (player) -> micMixer -> mainMixer
147+
engine.connect(micNode, to: micMixerNode, format: micNodeFormat)
148+
// Always connect micMixer to mainMixer
149+
engine.connect(micMixerNode, to: mainMixerNode, format: format)
150+
151+
_state.mutate { $0.isConnected = true }
152+
153+
// Invoke next
154+
next?.engineWillConnectInput(engine, src: src, dst: dst, format: format, context: context)
155+
}
156+
}

Sources/LiveKit/Convenience/AudioProcessing.swift

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,17 @@ public extension LKAudioBuffer {
5656
}
5757
}
5858

59+
public extension CMSampleBuffer {
60+
func toAVAudioPCMBuffer() -> AVAudioPCMBuffer? {
61+
let format = AVAudioFormat(cmAudioFormatDescription: formatDescription!)
62+
let numSamples = AVAudioFrameCount(numSamples)
63+
let pcmBuffer = AVAudioPCMBuffer(pcmFormat: format, frameCapacity: numSamples)!
64+
pcmBuffer.frameLength = numSamples
65+
CMSampleBufferCopyPCMDataIntoAudioBufferList(self, at: 0, frameCount: Int32(numSamples), into: pcmBuffer.mutableAudioBufferList)
66+
return pcmBuffer
67+
}
68+
}
69+
5970
public extension AVAudioPCMBuffer {
6071
/// Computes Peak and Linear Scale RMS Value (Average) for all channels.
6172
func audioLevels() -> [AudioLevel] {

Sources/LiveKit/Track/AudioManager.swift

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -258,6 +258,8 @@ public class AudioManager: Loggable {
258258
_state.mutate { $0.engineObservers = engineObservers }
259259
}
260260

261+
public let mixer = DefaultMixerAudioObserver()
262+
261263
/// Set to `true` to enable legacy mic mute mode.
262264
///
263265
/// - Default: Uses `AVAudioEngine`'s `isVoiceProcessingInputMuted` internally.
@@ -333,9 +335,9 @@ public class AudioManager: Loggable {
333335

334336
init() {
335337
#if os(iOS) || os(visionOS) || os(tvOS)
336-
let engineObservers: [any AudioEngineObserver] = [DefaultAudioSessionObserver()]
338+
let engineObservers: [any AudioEngineObserver] = [DefaultAudioSessionObserver(), mixer]
337339
#else
338-
let engineObservers: [any AudioEngineObserver] = []
340+
let engineObservers: [any AudioEngineObserver] = [mixer]
339341
#endif
340342
_state = StateSync(State(engineObservers: engineObservers))
341343
_admDelegateAdapter.audioManager = self

Sources/LiveKit/Track/Capturers/MacOSScreenCapturer.swift

Lines changed: 49 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -98,9 +98,16 @@ public class MacOSScreenCapturer: VideoCapturer {
9898
configuration.pixelFormat = kCVPixelFormatType_420YpCbCr8BiPlanarVideoRange
9999
configuration.showsCursor = options.showCursor
100100

101+
if #available(macOS 13.0, *) {
102+
configuration.capturesAudio = options.appAudio
103+
}
104+
101105
// Why does SCStream hold strong reference to delegate?
102106
let stream = SCStream(filter: filter, configuration: configuration, delegate: nil)
103107
try stream.addStreamOutput(self, type: .screen, sampleHandlerQueue: nil)
108+
if #available(macOS 13.0, *) {
109+
try stream.addStreamOutput(self, type: .audio, sampleHandlerQueue: nil)
110+
}
104111
try await stream.startCapture()
105112

106113
_screenCapturerState.mutate { $0.scStream = stream }
@@ -200,7 +207,6 @@ extension MacOSScreenCapturer {
200207
@available(macOS 12.3, *)
201208
extension MacOSScreenCapturer: SCStreamOutput {
202209
public func stream(_: SCStream, didOutputSampleBuffer sampleBuffer: CMSampleBuffer,
203-
204210
of outputType: SCStreamOutputType)
205211
{
206212
guard case .started = captureState else {
@@ -211,40 +217,51 @@ extension MacOSScreenCapturer: SCStreamOutput {
211217
// Return early if the sample buffer is invalid.
212218
guard sampleBuffer.isValid else { return }
213219

214-
guard case .screen = outputType else { return }
215-
216-
// Retrieve the array of metadata attachments from the sample buffer.
217-
guard let attachmentsArray = CMSampleBufferGetSampleAttachmentsArray(sampleBuffer,
218-
createIfNecessary: false) as? [[SCStreamFrameInfo: Any]],
219-
let attachments = attachmentsArray.first else { return }
220-
221-
// Validate the status of the frame. If it isn't `.complete`, return nil.
222-
guard let statusRawValue = attachments[SCStreamFrameInfo.status] as? Int,
223-
let status = SCFrameStatus(rawValue: statusRawValue),
224-
status == .complete else { return }
225-
226-
// Retrieve the content rectangle, scale, and scale factor.
227-
guard let contentRectDict = attachments[.contentRect],
228-
let contentRect = CGRect(dictionaryRepresentation: contentRectDict as! CFDictionary),
229-
// let contentScale = attachments[.contentScale] as? CGFloat,
230-
let scaleFactor = attachments[.scaleFactor] as? CGFloat else { return }
231-
232-
// Schedule resend timer
233-
let newTimer = Task.detached(priority: .utility) { [weak self] in
234-
while true {
235-
try? await Task.sleep(nanoseconds: UInt64(1 * 1_000_000_000))
236-
if Task.isCancelled { break }
237-
guard let self else { break }
238-
try await self._capturePreviousFrame()
220+
if case .audio = outputType {
221+
guard let pcm = sampleBuffer.toAVAudioPCMBuffer() else { return }
222+
let mixer = AudioManager.shared.mixer
223+
let node = mixer.appAudioNode
224+
guard mixer.isConnected, let engine = node.engine, engine.isRunning else { return }
225+
226+
node.scheduleBuffer(pcm)
227+
if !node.isPlaying {
228+
node.play()
239229
}
240-
}
241230

242-
_screenCapturerState.mutate {
243-
$0.resendTimer?.cancel()
244-
$0.resendTimer = newTimer
245-
}
231+
} else if case .screen = outputType {
232+
// Retrieve the array of metadata attachments from the sample buffer.
233+
guard let attachmentsArray = CMSampleBufferGetSampleAttachmentsArray(sampleBuffer,
234+
createIfNecessary: false) as? [[SCStreamFrameInfo: Any]],
235+
let attachments = attachmentsArray.first else { return }
236+
237+
// Validate the status of the frame. If it isn't `.complete`, return nil.
238+
guard let statusRawValue = attachments[SCStreamFrameInfo.status] as? Int,
239+
let status = SCFrameStatus(rawValue: statusRawValue),
240+
status == .complete else { return }
241+
242+
// Retrieve the content rectangle, scale, and scale factor.
243+
guard let contentRectDict = attachments[.contentRect],
244+
let contentRect = CGRect(dictionaryRepresentation: contentRectDict as! CFDictionary),
245+
// let contentScale = attachments[.contentScale] as? CGFloat,
246+
let scaleFactor = attachments[.scaleFactor] as? CGFloat else { return }
247+
248+
// Schedule resend timer
249+
let newTimer = Task.detached(priority: .utility) { [weak self] in
250+
while true {
251+
try? await Task.sleep(nanoseconds: UInt64(1 * 1_000_000_000))
252+
if Task.isCancelled { break }
253+
guard let self else { break }
254+
try await self._capturePreviousFrame()
255+
}
256+
}
246257

247-
capture(sampleBuffer, contentRect: contentRect, scaleFactor: scaleFactor)
258+
_screenCapturerState.mutate {
259+
$0.resendTimer?.cancel()
260+
$0.resendTimer = newTimer
261+
}
262+
263+
capture(sampleBuffer, contentRect: contentRect, scaleFactor: scaleFactor)
264+
}
248265
}
249266
}
250267

Sources/LiveKit/Types/Options/ScreenShareCaptureOptions.swift

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,9 @@ public final class ScreenShareCaptureOptions: NSObject, VideoCaptureOptions, Sen
2828
@objc
2929
public let showCursor: Bool
3030

31+
@objc
32+
public let appAudio: Bool
33+
3134
/// Use broadcast extension for screen capture (iOS only).
3235
///
3336
/// If a broadcast extension has been properly configured, this defaults to `true`.
@@ -49,12 +52,14 @@ public final class ScreenShareCaptureOptions: NSObject, VideoCaptureOptions, Sen
4952
public init(dimensions: Dimensions = .h1080_169,
5053
fps: Int = 30,
5154
showCursor: Bool = true,
55+
appAudio: Bool = false,
5256
useBroadcastExtension: Bool = defaultToBroadcastExtension,
5357
includeCurrentApplication: Bool = false)
5458
{
5559
self.dimensions = dimensions
5660
self.fps = fps
5761
self.showCursor = showCursor
62+
self.appAudio = appAudio
5863
self.useBroadcastExtension = useBroadcastExtension
5964
self.includeCurrentApplication = includeCurrentApplication
6065
}
@@ -66,6 +71,7 @@ public final class ScreenShareCaptureOptions: NSObject, VideoCaptureOptions, Sen
6671
return dimensions == other.dimensions &&
6772
fps == other.fps &&
6873
showCursor == other.showCursor &&
74+
appAudio == other.appAudio &&
6975
useBroadcastExtension == other.useBroadcastExtension &&
7076
includeCurrentApplication == other.includeCurrentApplication
7177
}
@@ -75,6 +81,7 @@ public final class ScreenShareCaptureOptions: NSObject, VideoCaptureOptions, Sen
7581
hasher.combine(dimensions)
7682
hasher.combine(fps)
7783
hasher.combine(showCursor)
84+
hasher.combine(appAudio)
7885
hasher.combine(useBroadcastExtension)
7986
hasher.combine(includeCurrentApplication)
8087
return hasher.finalize()

0 commit comments

Comments
 (0)