feat: add talk mode across nodes

This commit is contained in:
Peter Steinberger
2025-12-29 23:21:05 +01:00
parent 6927b0fb8d
commit 20d7882033
26 changed files with 3087 additions and 0 deletions

View File

@@ -0,0 +1,194 @@
import Foundation
public struct TalkDirective: Equatable, Sendable {
public var voiceId: String?
public var modelId: String?
public var speed: Double?
public var rateWPM: Int?
public var stability: Double?
public var similarity: Double?
public var style: Double?
public var speakerBoost: Bool?
public var seed: Int?
public var normalize: String?
public var language: String?
public var outputFormat: String?
public var latencyTier: Int?
public var once: Bool?
public init(
voiceId: String? = nil,
modelId: String? = nil,
speed: Double? = nil,
rateWPM: Int? = nil,
stability: Double? = nil,
similarity: Double? = nil,
style: Double? = nil,
speakerBoost: Bool? = nil,
seed: Int? = nil,
normalize: String? = nil,
language: String? = nil,
outputFormat: String? = nil,
latencyTier: Int? = nil,
once: Bool? = nil)
{
self.voiceId = voiceId
self.modelId = modelId
self.speed = speed
self.rateWPM = rateWPM
self.stability = stability
self.similarity = similarity
self.style = style
self.speakerBoost = speakerBoost
self.seed = seed
self.normalize = normalize
self.language = language
self.outputFormat = outputFormat
self.latencyTier = latencyTier
self.once = once
}
}
public struct TalkDirectiveParseResult: Equatable, Sendable {
public let directive: TalkDirective?
public let stripped: String
public let unknownKeys: [String]
public init(directive: TalkDirective?, stripped: String, unknownKeys: [String]) {
self.directive = directive
self.stripped = stripped
self.unknownKeys = unknownKeys
}
}
public enum TalkDirectiveParser {
public static func parse(_ text: String) -> TalkDirectiveParseResult {
let normalized = text.replacingOccurrences(of: "\r\n", with: "\n")
var lines = normalized.split(separator: "\n", omittingEmptySubsequences: false)
guard !lines.isEmpty else { return TalkDirectiveParseResult(directive: nil, stripped: text, unknownKeys: []) }
guard let firstNonEmpty = lines.firstIndex(where: { !$0.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty })
else {
return TalkDirectiveParseResult(directive: nil, stripped: text, unknownKeys: [])
}
let head = lines[firstNonEmpty].trimmingCharacters(in: .whitespacesAndNewlines)
guard head.hasPrefix("{"), head.hasSuffix("}") else {
return TalkDirectiveParseResult(directive: nil, stripped: text, unknownKeys: [])
}
guard let data = head.data(using: .utf8),
let json = try? JSONSerialization.jsonObject(with: data) as? [String: Any]
else {
return TalkDirectiveParseResult(directive: nil, stripped: text, unknownKeys: [])
}
let speakerBoost = boolValue(json, keys: ["speaker_boost", "speakerBoost"])
?? boolValue(json, keys: ["no_speaker_boost", "noSpeakerBoost"]).map { !$0 }
let directive = TalkDirective(
voiceId: stringValue(json, keys: ["voice", "voice_id", "voiceId"]),
modelId: stringValue(json, keys: ["model", "model_id", "modelId"]),
speed: doubleValue(json, keys: ["speed"]),
rateWPM: intValue(json, keys: ["rate", "wpm"]),
stability: doubleValue(json, keys: ["stability"]),
similarity: doubleValue(json, keys: ["similarity", "similarity_boost", "similarityBoost"]),
style: doubleValue(json, keys: ["style"]),
speakerBoost: speakerBoost,
seed: intValue(json, keys: ["seed"]),
normalize: stringValue(json, keys: ["normalize", "apply_text_normalization"]),
language: stringValue(json, keys: ["lang", "language_code", "language"]),
outputFormat: stringValue(json, keys: ["output_format", "format"]),
latencyTier: intValue(json, keys: ["latency", "latency_tier", "latencyTier"]),
once: boolValue(json, keys: ["once"]))
let hasDirective = [
directive.voiceId,
directive.modelId,
directive.speed.map { "\($0)" },
directive.rateWPM.map { "\($0)" },
directive.stability.map { "\($0)" },
directive.similarity.map { "\($0)" },
directive.style.map { "\($0)" },
directive.speakerBoost.map { "\($0)" },
directive.seed.map { "\($0)" },
directive.normalize,
directive.language,
directive.outputFormat,
directive.latencyTier.map { "\($0)" },
directive.once.map { "\($0)" },
].contains { $0 != nil }
guard hasDirective else {
return TalkDirectiveParseResult(directive: nil, stripped: text, unknownKeys: [])
}
let knownKeys = Set([
"voice", "voice_id", "voiceid",
"model", "model_id", "modelid",
"speed", "rate", "wpm",
"stability", "similarity", "similarity_boost", "similarityboost",
"style",
"speaker_boost", "speakerboost",
"no_speaker_boost", "nospeakerboost",
"seed",
"normalize", "apply_text_normalization",
"lang", "language_code", "language",
"output_format", "format",
"latency", "latency_tier", "latencytier",
"once",
])
let unknownKeys = json.keys.filter { !knownKeys.contains($0.lowercased()) }.sorted()
lines.remove(at: firstNonEmpty)
if firstNonEmpty < lines.count {
let next = lines[firstNonEmpty].trimmingCharacters(in: .whitespacesAndNewlines)
if next.isEmpty {
lines.remove(at: firstNonEmpty)
}
}
let stripped = lines.joined(separator: "\n")
return TalkDirectiveParseResult(directive: directive, stripped: stripped, unknownKeys: unknownKeys)
}
private static func stringValue(_ dict: [String: Any], keys: [String]) -> String? {
for key in keys {
if let value = dict[key] as? String {
let trimmed = value.trimmingCharacters(in: .whitespacesAndNewlines)
if !trimmed.isEmpty { return trimmed }
}
}
return nil
}
private static func doubleValue(_ dict: [String: Any], keys: [String]) -> Double? {
for key in keys {
if let value = dict[key] as? Double { return value }
if let value = dict[key] as? Int { return Double(value) }
if let value = dict[key] as? String, let parsed = Double(value) { return parsed }
}
return nil
}
private static func intValue(_ dict: [String: Any], keys: [String]) -> Int? {
for key in keys {
if let value = dict[key] as? Int { return value }
if let value = dict[key] as? Double { return Int(value) }
if let value = dict[key] as? String, let parsed = Int(value) { return parsed }
}
return nil
}
private static func boolValue(_ dict: [String: Any], keys: [String]) -> Bool? {
for key in keys {
if let value = dict[key] as? Bool { return value }
if let value = dict[key] as? String {
let trimmed = value.trimmingCharacters(in: .whitespacesAndNewlines).lowercased()
if ["true", "yes", "1"].contains(trimmed) { return true }
if ["false", "no", "0"].contains(trimmed) { return false }
}
}
return nil
}
}

View File

@@ -0,0 +1,62 @@
import XCTest
@testable import ClawdisKit
final class TalkDirectiveTests: XCTestCase {
func testParsesDirectiveAndStripsLine() {
let text = """
{"voice":"abc123","once":true}
Hello there.
"""
let result = TalkDirectiveParser.parse(text)
XCTAssertEqual(result.directive?.voiceId, "abc123")
XCTAssertEqual(result.directive?.once, true)
XCTAssertEqual(result.stripped, "Hello there.")
}
func testIgnoresNonDirective() {
let text = "Hello world."
let result = TalkDirectiveParser.parse(text)
XCTAssertNil(result.directive)
XCTAssertEqual(result.stripped, text)
}
func testKeepsDirectiveLineIfNoRecognizedFields() {
let text = """
{"unknown":"value"}
Hello.
"""
let result = TalkDirectiveParser.parse(text)
XCTAssertNil(result.directive)
XCTAssertEqual(result.stripped, text)
}
func testParsesExtendedOptions() {
let text = """
{"voice_id":"v1","model_id":"m1","rate":200,"stability":0.5,"similarity":0.8,"style":0.2,"speaker_boost":true,"seed":1234,"normalize":"auto","lang":"en","output_format":"mp3_44100_128"}
Hello.
"""
let result = TalkDirectiveParser.parse(text)
XCTAssertEqual(result.directive?.voiceId, "v1")
XCTAssertEqual(result.directive?.modelId, "m1")
XCTAssertEqual(result.directive?.rateWPM, 200)
XCTAssertEqual(result.directive?.stability, 0.5)
XCTAssertEqual(result.directive?.similarity, 0.8)
XCTAssertEqual(result.directive?.style, 0.2)
XCTAssertEqual(result.directive?.speakerBoost, true)
XCTAssertEqual(result.directive?.seed, 1234)
XCTAssertEqual(result.directive?.normalize, "auto")
XCTAssertEqual(result.directive?.language, "en")
XCTAssertEqual(result.directive?.outputFormat, "mp3_44100_128")
XCTAssertEqual(result.stripped, "Hello.")
}
func testTracksUnknownKeys() {
let text = """
{"voice":"abc","mystery":"value","extra":1}
Hi.
"""
let result = TalkDirectiveParser.parse(text)
XCTAssertEqual(result.directive?.voiceId, "abc")
XCTAssertEqual(result.unknownKeys, ["extra", "mystery"])
}
}