feat: add xAI voice bot with Grok integration

- Add pkg/xai WebSocket client for xAI Voice Agent API - Add cmd/voicebot TeamSpeak bot with voice bridge - Proper 20ms audio timing with buffered queue - Clear audio queue on user interruption (VAD)
2026-01-15 22:38:39 +01:00
parent 02318b1490
commit aa8c0dbcbc
7 changed files with 677 additions and 0 deletions
--- a/pkg/xai/types.go
+++ b/pkg/xai/types.go
@@ -0,0 +1,136 @@
+package xai
+
+// Message types for xAI Voice Agent WebSocket API
+
+// ClientMessage is the base for messages sent to the server
+type ClientMessage struct {
+	Type string `json:"type"`
+}
+
+// SessionUpdate configures the voice session
+type SessionUpdate struct {
+	Type    string  `json:"type"` // "session.update"
+	Session Session `json:"session"`
+}
+
+type Session struct {
+	Voice         string         `json:"voice,omitempty"`
+	Instructions  string         `json:"instructions,omitempty"`
+	TurnDetection *TurnDetection `json:"turn_detection,omitempty"`
+	Audio         *AudioConfig   `json:"audio,omitempty"`
+}
+
+type TurnDetection struct {
+	Type string `json:"type"` // "server_vad" or null
+}
+
+type AudioConfig struct {
+	Input  *AudioFormatConfig `json:"input,omitempty"`
+	Output *AudioFormatConfig `json:"output,omitempty"`
+}
+
+type AudioFormatConfig struct {
+	Format AudioFormat `json:"format"`
+}
+
+type AudioFormat struct {
+	Type string `json:"type"` // "audio/pcm", "audio/pcmu", "audio/pcma"
+	Rate int    `json:"rate"` // 8000, 16000, 24000, 48000, etc.
+}
+
+// InputAudioBufferAppend sends audio data to the server
+type InputAudioBufferAppend struct {
+	Type  string `json:"type"`  // "input_audio_buffer.append"
+	Audio string `json:"audio"` // Base64 encoded PCM
+}
+
+// ResponseCreate requests a response from the model
+type ResponseCreate struct {
+	Type     string           `json:"type"` // "response.create"
+	Response ResponseSettings `json:"response"`
+}
+
+type ResponseSettings struct {
+	Modalities []string `json:"modalities"` // ["text", "audio"]
+}
+
+// ConversationItemCreate creates a new conversation item
+type ConversationItemCreate struct {
+	Type string           `json:"type"` // "conversation.item.create"
+	Item ConversationItem `json:"item"`
+}
+
+type ConversationItem struct {
+	Type    string        `json:"type"` // "message"
+	Role    string        `json:"role"` // "user", "assistant"
+	Content []ItemContent `json:"content"`
+}
+
+type ItemContent struct {
+	Type string `json:"type"` // "input_text", "input_audio"
+	Text string `json:"text,omitempty"`
+}
+
+// =============================================================================
+// Server Messages
+// =============================================================================
+
+// ServerMessage is the base for messages received from the server
+type ServerMessage struct {
+	Type    string `json:"type"`
+	EventID string `json:"event_id,omitempty"`
+}
+
+// SessionUpdated confirms session configuration
+type SessionUpdated struct {
+	Type    string  `json:"type"` // "session.updated"
+	EventID string  `json:"event_id"`
+	Session Session `json:"session"`
+}
+
+// ResponseOutputAudioDelta contains audio data from the model
+type ResponseOutputAudioDelta struct {
+	Type    string `json:"type"` // "response.output_audio.delta"
+	EventID string `json:"event_id"`
+	Delta   string `json:"delta"` // Base64 encoded PCM
+}
+
+// ResponseDone indicates the response is complete
+type ResponseDone struct {
+	Type    string `json:"type"` // "response.done"
+	EventID string `json:"event_id"`
+}
+
+// InputAudioBufferSpeechStarted indicates VAD detected speech start
+type InputAudioBufferSpeechStarted struct {
+	Type    string `json:"type"` // "input_audio_buffer.speech_started"
+	EventID string `json:"event_id"`
+}
+
+// InputAudioBufferSpeechStopped indicates VAD detected speech stop
+type InputAudioBufferSpeechStopped struct {
+	Type    string `json:"type"` // "input_audio_buffer.speech_stopped"
+	EventID string `json:"event_id"`
+}
+
+// ErrorMessage represents an error from the server
+type ErrorMessage struct {
+	Type    string    `json:"type"` // "error"
+	EventID string    `json:"event_id"`
+	Error   ErrorInfo `json:"error"`
+}
+
+type ErrorInfo struct {
+	Type    string `json:"type"`
+	Code    string `json:"code"`
+	Message string `json:"message"`
+}
+
+// Available voices
+const (
+	VoiceAra = "Ara"
+	VoiceRex = "Rex"
+	VoiceSal = "Sal"
+	VoiceEve = "Eve"
+	VoiceLeo = "Leo"
+)