Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 8 additions & 5 deletions Sources/Scripta/MeetingRecorder.swift
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ final class MeetingRecorder: NSObject, ObservableObject {
}
}

whisperEngine.language = recognitionLanguage.components(separatedBy: "-").first?.lowercased() ?? "en"
whisperEngine.onTranscript = { [weak self] text in
guard let self, self.state == .recording, !self.micMuted else { return }
self.appendWhisperTranscript(text)
Expand Down Expand Up @@ -309,6 +310,10 @@ final class MeetingRecorder: NSObject, ObservableObject {
mplog("System audio: buffer #\(systemBufferCount) frames=\(pcm.frameLength) rate=\(pcm.format.sampleRate) ch=\(pcm.format.channelCount) fmt=\(pcm.format.commonFormat.rawValue)")
}

// Write original quality (48 kHz) to the audio file.
writeSystemAudio(pcm)

// Downsample to 16 kHz only for SFSpeech recognition.
let buffer: AVAudioPCMBuffer
if pcm.format.sampleRate == recognitionFormat.sampleRate &&
pcm.format.channelCount == recognitionFormat.channelCount &&
Expand All @@ -319,8 +324,6 @@ final class MeetingRecorder: NSObject, ObservableObject {
buffer = converted
}

writeSystemAudio(buffer)

if !systemRecognitionStarted && state == .recording {
systemRecognitionStarted = true
DispatchQueue.main.async { [weak self] in
Expand Down Expand Up @@ -676,9 +679,9 @@ final class MeetingRecorder: NSObject, ObservableObject {

private static let audioFileSettings: [String: Any] = [
AVFormatIDKey: kAudioFormatMPEG4AAC,
AVSampleRateKey: 16_000,
AVSampleRateKey: 48_000,
AVNumberOfChannelsKey: 1,
AVEncoderBitRateKey: 48_000,
AVEncoderBitRateKey: 128_000,
]

private func startAudioWriters() {
Expand Down Expand Up @@ -728,7 +731,7 @@ final class MeetingRecorder: NSObject, ObservableObject {
output.frameLength = outFrames

guard let outPtr = output.floatChannelData?[0] else { return }
if abs(ratio - 1.0) < 0.001 && buffer.format.channelCount == 1 {
if abs(ratio - 1.0) < 0.001 {
memcpy(outPtr, ch0, Int(frames) * MemoryLayout<Float>.size)
} else {
let srcCount = Int(frames)
Expand Down
2 changes: 1 addition & 1 deletion Sources/Scripta/SystemAudioCapture.swift
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ final class SystemAudioCapture: NSObject, SCStreamDelegate, SCStreamOutput {
let config = SCStreamConfiguration()
config.capturesAudio = true
config.excludesCurrentProcessAudio = true
config.sampleRate = 16_000
config.sampleRate = 48_000
config.channelCount = 1
config.queueDepth = 8
config.width = 2
Expand Down
3 changes: 2 additions & 1 deletion Sources/Scripta/WhisperEngine.swift
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ final class WhisperEngine {
private var isProcessing = false

var onTranscript: ((String) -> Void)?
var language: String = "en"

static let defaultModelName = "ggml-base.bin"

Expand Down Expand Up @@ -127,7 +128,7 @@ final class WhisperEngine {
params.print_special = false
params.no_context = true
params.single_segment = false
let langStr = strdup("en")
let langStr = strdup(self.language)
params.language = UnsafePointer(langStr)
params.n_threads = 4

Expand Down