feat: Implement core voicebot functionality with TeamSpeak 3 and xAI integration.

This commit is contained in:
Jose Luis Montañes Ojados
2026-01-16 10:39:27 +01:00
parent aa8c0dbcbc
commit fb17813dcb
10 changed files with 460 additions and 287 deletions

View File

@@ -1,2 +1,5 @@
$env:PATH = "D:\esto_al_path\msys64\mingw64\bin;$env:PATH"
$env:PKG_CONFIG_PATH = "D:\esto_al_path\msys64\mingw64\lib\pkgconfig"
$env:XAI_API_KEY = "xai-TyecBoTLlFNL0Qxwnb0eRainG8hKTpJGtnCziMhm1tTyB1FrLpZm0gHNYA9qqqX21JsXStN1f9DseLdJ"
go run ./cmd/voicebot --server localhost:9987 --nickname GrokBot --voice Ara
go run ./cmd/voicebot --server localhost:9987 --nickname Eva --voice Ara --greeting "Hola!"

2
bot2.ps1 Normal file
View File

@@ -0,0 +1,2 @@
$env:XAI_API_KEY = "xai-TyecBoTLlFNL0Qxwnb0eRainG8hKTpJGtnCziMhm1tTyB1FrLpZm0gHNYA9qqqX21JsXStN1f9DseLdJ"
go run ./cmd/voicebot --server localhost:9987 --nickname Adam --voice Rex --greeting " "

View File

@@ -5,6 +5,7 @@ import (
"log"
"os"
"os/signal"
"strings"
"sync"
"syscall"
"time"
@@ -29,16 +30,19 @@ type Bot struct {
apiKey string
voice string
prompt string
greeting string // Optional greeting when user joins
selfID uint16 // Our own ClientID
sessions map[uint16]*VoiceSession
sessionsMu sync.RWMutex
startTime time.Time
}
func main() {
serverAddr := flag.String("server", "127.0.0.1:9987", "TeamSpeak 3 Server Address")
nickname := flag.String("nickname", "GrokBot", "Bot nickname")
voice := flag.String("voice", xai.VoiceAra, "xAI voice (Ara, Rex, Sal, Eve, Leo)")
greeting := flag.String("greeting", "Saluda brevemente al usuario que acaba de unirse.", "Greeting message (empty to disable)")
flag.Parse()
apiKey := os.Getenv("XAI_API_KEY")
@@ -55,7 +59,9 @@ func main() {
apiKey: apiKey,
voice: *voice,
prompt: "Eres Grok, un asistente de voz amigable y útil. Responde de forma concisa y natural.",
greeting: *greeting,
sessions: make(map[uint16]*VoiceSession),
startTime: time.Now(),
}
// Create TeamSpeak client
@@ -83,7 +89,7 @@ func main() {
}
// Create xAI session for this user
go bot.createSession(e.ClientID, e.Nickname)
go bot.createSession(e.ClientID, e.Nickname, bot.greeting)
})
bot.ts3.On(ts3client.EventClientLeft, func(e *ts3client.ClientLeftEvent) {
@@ -95,10 +101,9 @@ func main() {
bot.ts3.On(ts3client.EventAudio, func(e *ts3client.AudioEvent) {
// Forward audio from TeamSpeak to all xAI sessions
// In a real implementation, you'd want to track which user
// is speaking and only send to their session
// Forward audio ONLY to the sender's session
bot.sessionsMu.RLock()
for _, session := range bot.sessions {
if session, ok := bot.sessions[e.SenderID]; ok {
if session.XAI != nil && session.XAI.IsConnected() {
session.XAI.SendAudio(e.PCM)
}
@@ -113,33 +118,64 @@ func main() {
})
// Handle shutdown
shutdownDone := make(chan struct{})
go func() {
sigChan := make(chan os.Signal, 1)
signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
<-sigChan
log.Println("Cerrando...")
// Close all xAI sessions
// Close all xAI sessions and audio senders first
bot.sessionsMu.Lock()
for _, session := range bot.sessions {
// Close audio sender first
select {
case <-session.done:
// Already closed
default:
close(session.done)
}
// Then close xAI
if session.XAI != nil {
session.XAI.Close()
}
}
bot.sessionsMu.Unlock()
// Wait for audio senders to stop
time.Sleep(200 * time.Millisecond)
// Now disconnect from TeamSpeak
bot.ts3.Disconnect()
// os.Exit(0)
// Signal main that we are done
close(shutdownDone)
}()
// Start global audio mixer
go bot.runAudioMixer(shutdownDone)
// Connect to TeamSpeak
if err := bot.ts3.Connect(); err != nil {
// If connect returns error, check if it's because we're shutting down
select {
case <-shutdownDone:
// Normal shutdown
log.Println("Conexión cerrada por shutdown")
default:
log.Fatalf("Error de conexión: %v", err)
}
}
// Wait for shutdown to complete if we returned from Connect cleanly
log.Println("Esperando confirmación final de shutdown...")
<-shutdownDone
log.Println("Shutdown completado. Saliendo.")
os.Exit(0)
}
// createSession creates a new xAI voice session for a user
func (b *Bot) createSession(clientID uint16, nickname string) {
func (b *Bot) createSession(clientID uint16, nickname string, greeting string) {
log.Printf("[Session] Creando sesión xAI para %s...", nickname)
// Create session with audio queue
@@ -152,7 +188,8 @@ func (b *Bot) createSession(clientID uint16, nickname string) {
}
// Start audio sender goroutine with proper 20ms timing
go b.audioSender(session)
// Global audio mixer handles sending
// go b.audioSender(session)
xaiClient := xai.New(b.apiKey)
@@ -184,6 +221,8 @@ func (b *Bot) createSession(clientID uint16, nickname string) {
// Clear audio queue when user starts speaking (interruption)
xaiClient.OnSpeechStarted(func() {
// Disable queue clearing for now to prevent cutting off greetings due to sensitive VAD
/*
b.sessionsMu.Lock()
// Clear the buffer
session.AudioBuffer = session.AudioBuffer[:0]
@@ -192,7 +231,8 @@ func (b *Bot) createSession(clientID uint16, nickname string) {
<-session.AudioQueue
}
b.sessionsMu.Unlock()
log.Printf("[Session] Audio queue cleared (user interruption)")
*/
log.Printf("[Session] Speech started by %s (VAD) - Ignoring interruption to ensure playback", nickname)
})
// Connect to xAI
@@ -218,26 +258,74 @@ func (b *Bot) createSession(clientID uint16, nickname string) {
b.sessionsMu.Unlock()
log.Printf("[Session] ✓ Sesión xAI activa para %s", nickname)
// Send greeting to start conversation (if configured)
// Send greeting to start conversation (if configured)
if strings.TrimSpace(greeting) != "" {
// Only greet if we are past the startup grace period (3 seconds)
// This prevents "Greeting Storm" when joining a channel with existing users
if time.Since(b.startTime) > 3*time.Second {
go func() {
time.Sleep(500 * time.Millisecond) // Small delay for session to stabilize
if err := xaiClient.SendText(greeting); err != nil {
log.Printf("[Session] Error enviando saludo: %v", err)
}
}()
} else {
log.Printf("[Session] Omitiendo saludo inicial para %s (sesión existente detectada en arranque)", nickname)
}
}
}
// audioSender sends audio frames to TeamSpeak with proper 20ms timing
func (b *Bot) audioSender(session *VoiceSession) {
// runAudioMixer mixes audio from all active sessions and sends it to TeamSpeak
func (b *Bot) runAudioMixer(stop <-chan struct{}) {
ticker := time.NewTicker(20 * time.Millisecond)
defer ticker.Stop()
mixedFrame := make([]int16, 960)
for {
select {
case <-session.done:
case <-stop:
return
case <-ticker.C:
hasAudio := false
// Zero output buffer
for i := range mixedFrame {
mixedFrame[i] = 0
}
b.sessionsMu.RLock()
for _, session := range b.sessions {
// Try to get a frame from the queue
select {
case frame := <-session.AudioQueue:
if err := b.ts3.SendAudio(frame); err != nil {
log.Printf("[Session] Error enviando audio: %v", err)
hasAudio = true
// Mix (Sum and Clamp)
for i := 0; i < 960; i++ {
if i >= len(frame) {
break
}
val := int32(mixedFrame[i]) + int32(frame[i])
if val > 32767 {
val = 32767
}
if val < -32768 {
val = -32768
}
mixedFrame[i] = int16(val)
}
default:
// No frame available, that's ok
// No audio from this session
}
}
b.sessionsMu.RUnlock()
if hasAudio {
if err := b.ts3.SendAudio(mixedFrame); err != nil {
log.Printf("[Mixer] Error sending audio: %v", err)
}
}
}
}

View File

@@ -2,6 +2,7 @@ package client
import (
"log"
"sync"
"time"
"go-ts/pkg/protocol"
@@ -47,6 +48,7 @@ type Client struct {
// Audio
VoiceDecoders map[uint16]*opus.Decoder // Map VID (sender ID) to decoder
VoiceEncoder *opus.Encoder // Encoder for outgoing audio
VoiceEncoderMu sync.Mutex // Protects VoiceEncoder
// Event handler for public API
eventHandler EventHandler

View File

@@ -154,8 +154,12 @@ func (c *Client) handleCommand(pkt *protocol.Packet) error {
log.Printf("Command: %s", cmdStr)
// Parse Command
cmd, args := protocol.ParseCommand([]byte(cmdStr))
// Parse Commands (possibly multiple piped items)
commands := protocol.ParseCommands([]byte(cmdStr))
for _, command := range commands {
cmd := command.Name
args := command.Params
switch cmd {
case "initivexpand2":
@@ -412,6 +416,7 @@ func (c *Client) handleCommand(pkt *protocol.Packet) error {
default:
log.Printf("Unhandled command: %s Args: %v", cmd, args)
}
} // End for loop
return nil
}

View File

@@ -117,6 +117,10 @@ func (c *Client) SendVoice(pcm []int16) error {
channels := 1
codec := uint8(CodecOpusVoice)
// Protect shared encoder
c.VoiceEncoderMu.Lock()
defer c.VoiceEncoderMu.Unlock()
// Get or Create Encoder
if c.VoiceEncoder == nil {
var err error

View File

@@ -23,6 +23,31 @@ func ParseCommand(data []byte) (string, map[string]string) {
return cmd, args
}
// ParseCommands parses response that may contain multiple items separated by pipe (|)
func ParseCommands(data []byte) []*Command {
s := string(data)
// TS3 uses pipe | to separate list items
items := strings.Split(s, "|")
cmds := make([]*Command, 0, len(items))
// First item contains the command name
name, args := ParseCommand([]byte(items[0]))
cmds = append(cmds, &Command{Name: name, Params: args})
// Subsequent items reuse the same command name
for _, item := range items[1:] {
// Hack: Prepend command name to reuse ParseCommand logic
// or better: manually parse args.
// Since ParseCommand splits by space, we can just use "DUMMY " + item
// ensuring we trim properly.
_, itemArgs := ParseCommand([]byte("CMD " + strings.TrimSpace(item)))
cmds = append(cmds, &Command{Name: name, Params: itemArgs})
}
return cmds
}
// Unescape TS3 string
func Unescape(s string) string {
r := strings.NewReplacer(

View File

@@ -132,9 +132,11 @@ func (c *Client) Connect() error {
err := c.internal.Connect(c.address)
if err != nil {
c.emit(EventDisconnected, &DisconnectedEvent{Reason: err.Error()})
log.Printf("[TS3Client] Connect returning with error: %v", err)
return err
}
log.Printf("[TS3Client] Connect returning cleanly")
return nil
}
@@ -154,18 +156,24 @@ func (c *Client) ConnectAsync() <-chan error {
// Disconnect closes the connection gracefully
func (c *Client) Disconnect() {
log.Println("[Disconnect] Starting disconnect sequence...")
if c.internal != nil {
// Send disconnect command to server
log.Println("[Disconnect] Sending disconnect command...")
c.sendDisconnect("leaving")
// Small delay to allow packet to be sent
time.Sleep(100 * time.Millisecond)
// Wait for packet to be sent and ACKed - the internal loop must still be running
log.Println("[Disconnect] Waiting for disconnect to be processed...")
time.Sleep(1000 * time.Millisecond)
// Stop the internal loop
log.Println("[Disconnect] Stopping internal loop...")
c.internal.Stop()
if c.internal.Conn != nil {
log.Println("[Disconnect] Closing connection...")
c.internal.Conn.Close()
}
}
c.connected = false
log.Println("[Disconnect] Done")
c.emit(EventDisconnected, &DisconnectedEvent{Reason: "client disconnect"})
}

View File

@@ -129,6 +129,35 @@ func (c *Client) SendAudio(pcm []int16) error {
return c.sendJSON(msg)
}
// SendText sends a text message to trigger a Grok response
func (c *Client) SendText(text string) error {
// Create conversation item with text
createMsg := ConversationItemCreate{
Type: "conversation.item.create",
Item: ConversationItem{
Type: "message",
Role: "user",
Content: []ItemContent{
{Type: "input_text", Text: text},
},
},
}
if err := c.sendJSON(createMsg); err != nil {
return err
}
// Request response
responseMsg := ResponseCreate{
Type: "response.create",
Response: ResponseSettings{
Modalities: []string{"text", "audio"},
},
}
return c.sendJSON(responseMsg)
}
// Close closes the WebSocket connection
func (c *Client) Close() {
c.mu.Lock()
@@ -179,6 +208,13 @@ func (c *Client) receiveLoop() {
_, message, err := c.conn.ReadMessage()
if err != nil {
// Check if closed intentionally
select {
case <-c.done:
return
default:
}
if websocket.IsCloseError(err, websocket.CloseNormalClosure) {
log.Println("[xAI] Connection closed normally")
} else {

Binary file not shown.