Files
clawdbot/apps/shared/ClawdbotKit/Sources/ClawdbotKit/TalkDirective.swift
2026-01-04 14:38:51 +00:00

202 lines
7.7 KiB
Swift

import Foundation
public struct TalkDirective: Equatable, Sendable {
public var voiceId: String?
public var modelId: String?
public var speed: Double?
public var rateWPM: Int?
public var stability: Double?
public var similarity: Double?
public var style: Double?
public var speakerBoost: Bool?
public var seed: Int?
public var normalize: String?
public var language: String?
public var outputFormat: String?
public var latencyTier: Int?
public var once: Bool?
public init(
voiceId: String? = nil,
modelId: String? = nil,
speed: Double? = nil,
rateWPM: Int? = nil,
stability: Double? = nil,
similarity: Double? = nil,
style: Double? = nil,
speakerBoost: Bool? = nil,
seed: Int? = nil,
normalize: String? = nil,
language: String? = nil,
outputFormat: String? = nil,
latencyTier: Int? = nil,
once: Bool? = nil)
{
self.voiceId = voiceId
self.modelId = modelId
self.speed = speed
self.rateWPM = rateWPM
self.stability = stability
self.similarity = similarity
self.style = style
self.speakerBoost = speakerBoost
self.seed = seed
self.normalize = normalize
self.language = language
self.outputFormat = outputFormat
self.latencyTier = latencyTier
self.once = once
}
}
public struct TalkDirectiveParseResult: Equatable, Sendable {
public let directive: TalkDirective?
public let stripped: String
public let unknownKeys: [String]
public init(directive: TalkDirective?, stripped: String, unknownKeys: [String]) {
self.directive = directive
self.stripped = stripped
self.unknownKeys = unknownKeys
}
}
public enum TalkDirectiveParser {
public static func parse(_ text: String) -> TalkDirectiveParseResult {
let normalized = text.replacingOccurrences(of: "\r\n", with: "\n")
var lines = normalized.split(separator: "\n", omittingEmptySubsequences: false)
guard !lines.isEmpty else { return TalkDirectiveParseResult(directive: nil, stripped: text, unknownKeys: []) }
guard let firstNonEmptyIndex =
lines.firstIndex(where: { !$0.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty })
else {
return TalkDirectiveParseResult(directive: nil, stripped: text, unknownKeys: [])
}
var firstNonEmpty = firstNonEmptyIndex
if firstNonEmpty > 0 {
lines.removeSubrange(0..<firstNonEmpty)
firstNonEmpty = 0
}
let head = lines[firstNonEmpty].trimmingCharacters(in: .whitespacesAndNewlines)
guard head.hasPrefix("{"), head.hasSuffix("}") else {
return TalkDirectiveParseResult(directive: nil, stripped: text, unknownKeys: [])
}
guard let data = head.data(using: .utf8),
let json = try? JSONSerialization.jsonObject(with: data) as? [String: Any]
else {
return TalkDirectiveParseResult(directive: nil, stripped: text, unknownKeys: [])
}
let speakerBoost = self.boolValue(json, keys: ["speaker_boost", "speakerBoost"])
?? self.boolValue(json, keys: ["no_speaker_boost", "noSpeakerBoost"]).map { !$0 }
let directive = TalkDirective(
voiceId: stringValue(json, keys: ["voice", "voice_id", "voiceId"]),
modelId: stringValue(json, keys: ["model", "model_id", "modelId"]),
speed: doubleValue(json, keys: ["speed"]),
rateWPM: intValue(json, keys: ["rate", "wpm"]),
stability: doubleValue(json, keys: ["stability"]),
similarity: doubleValue(json, keys: ["similarity", "similarity_boost", "similarityBoost"]),
style: doubleValue(json, keys: ["style"]),
speakerBoost: speakerBoost,
seed: intValue(json, keys: ["seed"]),
normalize: stringValue(json, keys: ["normalize", "apply_text_normalization"]),
language: stringValue(json, keys: ["lang", "language_code", "language"]),
outputFormat: stringValue(json, keys: ["output_format", "format"]),
latencyTier: intValue(json, keys: ["latency", "latency_tier", "latencyTier"]),
once: boolValue(json, keys: ["once"]))
let hasDirective = [
directive.voiceId,
directive.modelId,
directive.speed.map { "\($0)" },
directive.rateWPM.map { "\($0)" },
directive.stability.map { "\($0)" },
directive.similarity.map { "\($0)" },
directive.style.map { "\($0)" },
directive.speakerBoost.map { "\($0)" },
directive.seed.map { "\($0)" },
directive.normalize,
directive.language,
directive.outputFormat,
directive.latencyTier.map { "\($0)" },
directive.once.map { "\($0)" },
].contains { $0 != nil }
guard hasDirective else {
return TalkDirectiveParseResult(directive: nil, stripped: text, unknownKeys: [])
}
let knownKeys = Set([
"voice", "voice_id", "voiceid",
"model", "model_id", "modelid",
"speed", "rate", "wpm",
"stability", "similarity", "similarity_boost", "similarityboost",
"style",
"speaker_boost", "speakerboost",
"no_speaker_boost", "nospeakerboost",
"seed",
"normalize", "apply_text_normalization",
"lang", "language_code", "language",
"output_format", "format",
"latency", "latency_tier", "latencytier",
"once",
])
let unknownKeys = json.keys.filter { !knownKeys.contains($0.lowercased()) }.sorted()
lines.remove(at: firstNonEmpty)
if firstNonEmpty < lines.count {
let next = lines[firstNonEmpty].trimmingCharacters(in: .whitespacesAndNewlines)
if next.isEmpty {
lines.remove(at: firstNonEmpty)
}
}
let stripped = lines.joined(separator: "\n")
return TalkDirectiveParseResult(directive: directive, stripped: stripped, unknownKeys: unknownKeys)
}
private static func stringValue(_ dict: [String: Any], keys: [String]) -> String? {
for key in keys {
if let value = dict[key] as? String {
let trimmed = value.trimmingCharacters(in: .whitespacesAndNewlines)
if !trimmed.isEmpty { return trimmed }
}
}
return nil
}
private static func doubleValue(_ dict: [String: Any], keys: [String]) -> Double? {
for key in keys {
if let value = dict[key] as? Double { return value }
if let value = dict[key] as? Int { return Double(value) }
if let value = dict[key] as? String, let parsed = Double(value) { return parsed }
}
return nil
}
private static func intValue(_ dict: [String: Any], keys: [String]) -> Int? {
for key in keys {
if let value = dict[key] as? Int { return value }
if let value = dict[key] as? Double { return Int(value) }
if let value = dict[key] as? String, let parsed = Int(value) { return parsed }
}
return nil
}
private static func boolValue(_ dict: [String: Any], keys: [String]) -> Bool? {
for key in keys {
if let value = dict[key] as? Bool { return value }
if let value = dict[key] as? String {
let trimmed = value.trimmingCharacters(in: .whitespacesAndNewlines).lowercased()
if ["true", "yes", "1"].contains(trimmed) { return true }
if ["false", "no", "0"].contains(trimmed) { return false }
}
}
return nil
}
}