62 lines
2.5 KiB
Swift
62 lines
2.5 KiB
Swift
import AVFoundation
|
|
import Commander
|
|
import Foundation
|
|
import Speech
|
|
import Swabble
|
|
|
|
@MainActor
|
|
struct TranscribeCommand: ParsableCommand {
|
|
@Argument(help: "Path to audio/video file") var inputFile: String = ""
|
|
@Option(name: .long("locale"), help: "Locale identifier", parsing: .singleValue) var locale: String = Locale.current
|
|
.identifier
|
|
@Flag(help: "Censor etiquette-sensitive content") var censor: Bool = false
|
|
@Option(name: .long("output"), help: "Output file path") var outputFile: String?
|
|
@Option(name: .long("format"), help: "Output format txt|srt") var format: String = "txt"
|
|
@Option(name: .long("max-length"), help: "Max sentence length for srt") var maxLength: Int = 40
|
|
|
|
static var commandDescription: CommandDescription {
|
|
CommandDescription(
|
|
commandName: "transcribe",
|
|
abstract: "Transcribe a media file locally")
|
|
}
|
|
|
|
init() {}
|
|
|
|
init(parsed: ParsedValues) {
|
|
self.init()
|
|
if let positional = parsed.positional.first { self.inputFile = positional }
|
|
if let loc = parsed.options["locale"]?.last { self.locale = loc }
|
|
if parsed.flags.contains("censor") { self.censor = true }
|
|
if let out = parsed.options["output"]?.last { self.outputFile = out }
|
|
if let fmt = parsed.options["format"]?.last { self.format = fmt }
|
|
if let len = parsed.options["maxLength"]?.last, let intVal = Int(len) { self.maxLength = intVal }
|
|
}
|
|
|
|
mutating func run() async throws {
|
|
let fileURL = URL(fileURLWithPath: inputFile)
|
|
let audioFile = try AVAudioFile(forReading: fileURL)
|
|
|
|
let outputFormat = OutputFormat(rawValue: format) ?? .txt
|
|
|
|
let transcriber = SpeechTranscriber(
|
|
locale: Locale(identifier: locale),
|
|
transcriptionOptions: censor ? [.etiquetteReplacements] : [],
|
|
reportingOptions: [],
|
|
attributeOptions: outputFormat.needsAudioTimeRange ? [.audioTimeRange] : [])
|
|
let analyzer = SpeechAnalyzer(modules: [transcriber])
|
|
try await analyzer.start(inputAudioFile: audioFile, finishAfterFile: true)
|
|
|
|
var transcript: AttributedString = ""
|
|
for try await result in transcriber.results {
|
|
transcript += result.text
|
|
}
|
|
|
|
let output = outputFormat.text(for: transcript, maxLength: self.maxLength)
|
|
if let path = outputFile {
|
|
try output.write(to: URL(fileURLWithPath: path), atomically: false, encoding: .utf8)
|
|
} else {
|
|
print(output)
|
|
}
|
|
}
|
|
}
|