feat: Implement core voicebot functionality with TeamSpeak 3 and xAI integration.
This commit is contained in:
@@ -5,6 +5,7 @@ import (
|
||||
"log"
|
||||
"os"
|
||||
"os/signal"
|
||||
"strings"
|
||||
"sync"
|
||||
"syscall"
|
||||
"time"
|
||||
@@ -25,20 +26,23 @@ type VoiceSession struct {
|
||||
|
||||
// Bot manages the TeamSpeak connection and xAI sessions
|
||||
type Bot struct {
|
||||
ts3 *ts3client.Client
|
||||
apiKey string
|
||||
voice string
|
||||
prompt string
|
||||
ts3 *ts3client.Client
|
||||
apiKey string
|
||||
voice string
|
||||
prompt string
|
||||
greeting string // Optional greeting when user joins
|
||||
|
||||
selfID uint16 // Our own ClientID
|
||||
sessions map[uint16]*VoiceSession
|
||||
sessionsMu sync.RWMutex
|
||||
startTime time.Time
|
||||
}
|
||||
|
||||
func main() {
|
||||
serverAddr := flag.String("server", "127.0.0.1:9987", "TeamSpeak 3 Server Address")
|
||||
nickname := flag.String("nickname", "GrokBot", "Bot nickname")
|
||||
voice := flag.String("voice", xai.VoiceAra, "xAI voice (Ara, Rex, Sal, Eve, Leo)")
|
||||
greeting := flag.String("greeting", "Saluda brevemente al usuario que acaba de unirse.", "Greeting message (empty to disable)")
|
||||
flag.Parse()
|
||||
|
||||
apiKey := os.Getenv("XAI_API_KEY")
|
||||
@@ -52,10 +56,12 @@ func main() {
|
||||
log.Printf("Voice: %s", *voice)
|
||||
|
||||
bot := &Bot{
|
||||
apiKey: apiKey,
|
||||
voice: *voice,
|
||||
prompt: "Eres Grok, un asistente de voz amigable y útil. Responde de forma concisa y natural.",
|
||||
sessions: make(map[uint16]*VoiceSession),
|
||||
apiKey: apiKey,
|
||||
voice: *voice,
|
||||
prompt: "Eres Grok, un asistente de voz amigable y útil. Responde de forma concisa y natural.",
|
||||
greeting: *greeting,
|
||||
sessions: make(map[uint16]*VoiceSession),
|
||||
startTime: time.Now(),
|
||||
}
|
||||
|
||||
// Create TeamSpeak client
|
||||
@@ -83,7 +89,7 @@ func main() {
|
||||
}
|
||||
|
||||
// Create xAI session for this user
|
||||
go bot.createSession(e.ClientID, e.Nickname)
|
||||
go bot.createSession(e.ClientID, e.Nickname, bot.greeting)
|
||||
})
|
||||
|
||||
bot.ts3.On(ts3client.EventClientLeft, func(e *ts3client.ClientLeftEvent) {
|
||||
@@ -95,10 +101,9 @@ func main() {
|
||||
|
||||
bot.ts3.On(ts3client.EventAudio, func(e *ts3client.AudioEvent) {
|
||||
// Forward audio from TeamSpeak to all xAI sessions
|
||||
// In a real implementation, you'd want to track which user
|
||||
// is speaking and only send to their session
|
||||
// Forward audio ONLY to the sender's session
|
||||
bot.sessionsMu.RLock()
|
||||
for _, session := range bot.sessions {
|
||||
if session, ok := bot.sessions[e.SenderID]; ok {
|
||||
if session.XAI != nil && session.XAI.IsConnected() {
|
||||
session.XAI.SendAudio(e.PCM)
|
||||
}
|
||||
@@ -113,33 +118,64 @@ func main() {
|
||||
})
|
||||
|
||||
// Handle shutdown
|
||||
shutdownDone := make(chan struct{})
|
||||
go func() {
|
||||
sigChan := make(chan os.Signal, 1)
|
||||
signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
|
||||
<-sigChan
|
||||
log.Println("Cerrando...")
|
||||
|
||||
// Close all xAI sessions
|
||||
// Close all xAI sessions and audio senders first
|
||||
bot.sessionsMu.Lock()
|
||||
for _, session := range bot.sessions {
|
||||
// Close audio sender first
|
||||
select {
|
||||
case <-session.done:
|
||||
// Already closed
|
||||
default:
|
||||
close(session.done)
|
||||
}
|
||||
// Then close xAI
|
||||
if session.XAI != nil {
|
||||
session.XAI.Close()
|
||||
}
|
||||
}
|
||||
bot.sessionsMu.Unlock()
|
||||
|
||||
// Wait for audio senders to stop
|
||||
time.Sleep(200 * time.Millisecond)
|
||||
|
||||
// Now disconnect from TeamSpeak
|
||||
bot.ts3.Disconnect()
|
||||
// os.Exit(0)
|
||||
|
||||
// Signal main that we are done
|
||||
close(shutdownDone)
|
||||
}()
|
||||
|
||||
// Start global audio mixer
|
||||
go bot.runAudioMixer(shutdownDone)
|
||||
|
||||
// Connect to TeamSpeak
|
||||
if err := bot.ts3.Connect(); err != nil {
|
||||
log.Fatalf("Error de conexión: %v", err)
|
||||
// If connect returns error, check if it's because we're shutting down
|
||||
select {
|
||||
case <-shutdownDone:
|
||||
// Normal shutdown
|
||||
log.Println("Conexión cerrada por shutdown")
|
||||
default:
|
||||
log.Fatalf("Error de conexión: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Wait for shutdown to complete if we returned from Connect cleanly
|
||||
log.Println("Esperando confirmación final de shutdown...")
|
||||
<-shutdownDone
|
||||
log.Println("Shutdown completado. Saliendo.")
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
// createSession creates a new xAI voice session for a user
|
||||
func (b *Bot) createSession(clientID uint16, nickname string) {
|
||||
func (b *Bot) createSession(clientID uint16, nickname string, greeting string) {
|
||||
log.Printf("[Session] Creando sesión xAI para %s...", nickname)
|
||||
|
||||
// Create session with audio queue
|
||||
@@ -152,7 +188,8 @@ func (b *Bot) createSession(clientID uint16, nickname string) {
|
||||
}
|
||||
|
||||
// Start audio sender goroutine with proper 20ms timing
|
||||
go b.audioSender(session)
|
||||
// Global audio mixer handles sending
|
||||
// go b.audioSender(session)
|
||||
|
||||
xaiClient := xai.New(b.apiKey)
|
||||
|
||||
@@ -184,15 +221,18 @@ func (b *Bot) createSession(clientID uint16, nickname string) {
|
||||
|
||||
// Clear audio queue when user starts speaking (interruption)
|
||||
xaiClient.OnSpeechStarted(func() {
|
||||
b.sessionsMu.Lock()
|
||||
// Clear the buffer
|
||||
session.AudioBuffer = session.AudioBuffer[:0]
|
||||
// Drain the queue
|
||||
for len(session.AudioQueue) > 0 {
|
||||
<-session.AudioQueue
|
||||
}
|
||||
b.sessionsMu.Unlock()
|
||||
log.Printf("[Session] Audio queue cleared (user interruption)")
|
||||
// Disable queue clearing for now to prevent cutting off greetings due to sensitive VAD
|
||||
/*
|
||||
b.sessionsMu.Lock()
|
||||
// Clear the buffer
|
||||
session.AudioBuffer = session.AudioBuffer[:0]
|
||||
// Drain the queue
|
||||
for len(session.AudioQueue) > 0 {
|
||||
<-session.AudioQueue
|
||||
}
|
||||
b.sessionsMu.Unlock()
|
||||
*/
|
||||
log.Printf("[Session] Speech started by %s (VAD) - Ignoring interruption to ensure playback", nickname)
|
||||
})
|
||||
|
||||
// Connect to xAI
|
||||
@@ -218,26 +258,74 @@ func (b *Bot) createSession(clientID uint16, nickname string) {
|
||||
b.sessionsMu.Unlock()
|
||||
|
||||
log.Printf("[Session] ✓ Sesión xAI activa para %s", nickname)
|
||||
|
||||
// Send greeting to start conversation (if configured)
|
||||
// Send greeting to start conversation (if configured)
|
||||
if strings.TrimSpace(greeting) != "" {
|
||||
// Only greet if we are past the startup grace period (3 seconds)
|
||||
// This prevents "Greeting Storm" when joining a channel with existing users
|
||||
if time.Since(b.startTime) > 3*time.Second {
|
||||
go func() {
|
||||
time.Sleep(500 * time.Millisecond) // Small delay for session to stabilize
|
||||
if err := xaiClient.SendText(greeting); err != nil {
|
||||
log.Printf("[Session] Error enviando saludo: %v", err)
|
||||
}
|
||||
}()
|
||||
} else {
|
||||
log.Printf("[Session] Omitiendo saludo inicial para %s (sesión existente detectada en arranque)", nickname)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// audioSender sends audio frames to TeamSpeak with proper 20ms timing
|
||||
func (b *Bot) audioSender(session *VoiceSession) {
|
||||
// runAudioMixer mixes audio from all active sessions and sends it to TeamSpeak
|
||||
func (b *Bot) runAudioMixer(stop <-chan struct{}) {
|
||||
ticker := time.NewTicker(20 * time.Millisecond)
|
||||
defer ticker.Stop()
|
||||
|
||||
mixedFrame := make([]int16, 960)
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-session.done:
|
||||
case <-stop:
|
||||
return
|
||||
case <-ticker.C:
|
||||
// Try to get a frame from the queue
|
||||
select {
|
||||
case frame := <-session.AudioQueue:
|
||||
if err := b.ts3.SendAudio(frame); err != nil {
|
||||
log.Printf("[Session] Error enviando audio: %v", err)
|
||||
hasAudio := false
|
||||
|
||||
// Zero output buffer
|
||||
for i := range mixedFrame {
|
||||
mixedFrame[i] = 0
|
||||
}
|
||||
|
||||
b.sessionsMu.RLock()
|
||||
for _, session := range b.sessions {
|
||||
// Try to get a frame from the queue
|
||||
select {
|
||||
case frame := <-session.AudioQueue:
|
||||
hasAudio = true
|
||||
// Mix (Sum and Clamp)
|
||||
for i := 0; i < 960; i++ {
|
||||
if i >= len(frame) {
|
||||
break
|
||||
}
|
||||
val := int32(mixedFrame[i]) + int32(frame[i])
|
||||
if val > 32767 {
|
||||
val = 32767
|
||||
}
|
||||
if val < -32768 {
|
||||
val = -32768
|
||||
}
|
||||
mixedFrame[i] = int16(val)
|
||||
}
|
||||
default:
|
||||
// No audio from this session
|
||||
}
|
||||
}
|
||||
b.sessionsMu.RUnlock()
|
||||
|
||||
if hasAudio {
|
||||
if err := b.ts3.SendAudio(mixedFrame); err != nil {
|
||||
log.Printf("[Mixer] Error sending audio: %v", err)
|
||||
}
|
||||
default:
|
||||
// No frame available, that's ok
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user