package xai // Message types for xAI Voice Agent WebSocket API // ClientMessage is the base for messages sent to the server type ClientMessage struct { Type string `json:"type"` } // SessionUpdate configures the voice session type SessionUpdate struct { Type string `json:"type"` // "session.update" Session Session `json:"session"` } type Session struct { Voice string `json:"voice,omitempty"` Instructions string `json:"instructions,omitempty"` TurnDetection *TurnDetection `json:"turn_detection,omitempty"` Audio *AudioConfig `json:"audio,omitempty"` } type TurnDetection struct { Type string `json:"type"` // "server_vad" or null } type AudioConfig struct { Input *AudioFormatConfig `json:"input,omitempty"` Output *AudioFormatConfig `json:"output,omitempty"` } type AudioFormatConfig struct { Format AudioFormat `json:"format"` } type AudioFormat struct { Type string `json:"type"` // "audio/pcm", "audio/pcmu", "audio/pcma" Rate int `json:"rate"` // 8000, 16000, 24000, 48000, etc. } // InputAudioBufferAppend sends audio data to the server type InputAudioBufferAppend struct { Type string `json:"type"` // "input_audio_buffer.append" Audio string `json:"audio"` // Base64 encoded PCM } // ResponseCreate requests a response from the model type ResponseCreate struct { Type string `json:"type"` // "response.create" Response ResponseSettings `json:"response"` } type ResponseSettings struct { Modalities []string `json:"modalities"` // ["text", "audio"] } // ConversationItemCreate creates a new conversation item type ConversationItemCreate struct { Type string `json:"type"` // "conversation.item.create" Item ConversationItem `json:"item"` } type ConversationItem struct { Type string `json:"type"` // "message" Role string `json:"role"` // "user", "assistant" Content []ItemContent `json:"content"` } type ItemContent struct { Type string `json:"type"` // "input_text", "input_audio" Text string `json:"text,omitempty"` } // ============================================================================= // Server Messages // ============================================================================= // ServerMessage is the base for messages received from the server type ServerMessage struct { Type string `json:"type"` EventID string `json:"event_id,omitempty"` } // SessionUpdated confirms session configuration type SessionUpdated struct { Type string `json:"type"` // "session.updated" EventID string `json:"event_id"` Session Session `json:"session"` } // ResponseOutputAudioDelta contains audio data from the model type ResponseOutputAudioDelta struct { Type string `json:"type"` // "response.output_audio.delta" EventID string `json:"event_id"` Delta string `json:"delta"` // Base64 encoded PCM } // ResponseDone indicates the response is complete type ResponseDone struct { Type string `json:"type"` // "response.done" EventID string `json:"event_id"` } // InputAudioBufferSpeechStarted indicates VAD detected speech start type InputAudioBufferSpeechStarted struct { Type string `json:"type"` // "input_audio_buffer.speech_started" EventID string `json:"event_id"` } // InputAudioBufferSpeechStopped indicates VAD detected speech stop type InputAudioBufferSpeechStopped struct { Type string `json:"type"` // "input_audio_buffer.speech_stopped" EventID string `json:"event_id"` } // ErrorMessage represents an error from the server type ErrorMessage struct { Type string `json:"type"` // "error" EventID string `json:"event_id"` Error ErrorInfo `json:"error"` } type ErrorInfo struct { Type string `json:"type"` Code string `json:"code"` Message string `json:"message"` } // Available voices const ( VoiceAra = "Ara" VoiceRex = "Rex" VoiceSal = "Sal" VoiceEve = "Eve" VoiceLeo = "Leo" )