354 lines
8.9 KiB
Go
354 lines
8.9 KiB
Go
package main
|
|
|
|
import (
|
|
"flag"
|
|
"fmt"
|
|
"log"
|
|
"os"
|
|
"os/signal"
|
|
"sync"
|
|
"syscall"
|
|
"time"
|
|
|
|
"go-ts/pkg/ts3client"
|
|
"go-ts/pkg/xai"
|
|
)
|
|
|
|
// UserInfo tracks connected users (no individual sessions)
|
|
type UserInfo struct {
|
|
ClientID uint16
|
|
Nickname string
|
|
}
|
|
|
|
// Bot manages the TeamSpeak connection and single global xAI session
|
|
type Bot struct {
|
|
ts3 *ts3client.Client
|
|
apiKey string
|
|
voice string
|
|
prompt string
|
|
greeting string
|
|
|
|
selfID uint16
|
|
users map[uint16]*UserInfo
|
|
usersMu sync.RWMutex
|
|
startTime time.Time
|
|
|
|
// Global xAI Session (one for all users)
|
|
globalXAI *xai.Client
|
|
globalMu sync.Mutex
|
|
|
|
// Input audio mixing (multiple users → one stream)
|
|
inputBuffer []int16
|
|
inputMu sync.Mutex
|
|
lastInputTime time.Time
|
|
|
|
// Output audio (xAI response → TeamSpeak)
|
|
outputQueue chan []int16
|
|
outputBuffer []int16
|
|
outputMu sync.Mutex
|
|
|
|
done chan struct{}
|
|
}
|
|
|
|
func main() {
|
|
serverAddr := flag.String("server", "127.0.0.1:9987", "TeamSpeak 3 Server Address")
|
|
nickname := flag.String("nickname", "GrokBot", "Bot nickname")
|
|
voice := flag.String("voice", xai.VoiceAra, "xAI voice (Ara, Rex, Sal, Eve, Leo)")
|
|
greeting := flag.String("greeting", "", "Greeting message when users join (empty to disable)")
|
|
room := flag.String("room", "", "Channel name to join after connecting (empty = stay in default)")
|
|
flag.Parse()
|
|
|
|
apiKey := os.Getenv("XAI_API_KEY")
|
|
if apiKey == "" {
|
|
log.Fatal("XAI_API_KEY environment variable not set")
|
|
}
|
|
|
|
log.Println("=== xAI Voice Bot for TeamSpeak (Unified Session) ===")
|
|
log.Printf("Server: %s", *serverAddr)
|
|
log.Printf("Nickname: %s", *nickname)
|
|
log.Printf("Voice: %s", *voice)
|
|
|
|
bot := &Bot{
|
|
apiKey: apiKey,
|
|
voice: *voice,
|
|
prompt: "Eres Grok, un asistente de voz amigable y útil en un canal de TeamSpeak. Puedes escuchar a múltiples personas hablando. Responde de forma concisa y natural. Si varias personas hablan, trata de entender el contexto de la conversación grupal.",
|
|
greeting: *greeting,
|
|
users: make(map[uint16]*UserInfo),
|
|
startTime: time.Now(),
|
|
inputBuffer: make([]int16, 0, 960*50), // ~1 second buffer
|
|
outputQueue: make(chan []int16, 500), // ~10 seconds of audio
|
|
done: make(chan struct{}),
|
|
}
|
|
|
|
// Create TeamSpeak client
|
|
bot.ts3 = ts3client.New(*serverAddr, ts3client.Config{
|
|
Nickname: *nickname,
|
|
})
|
|
|
|
// Register event handlers
|
|
bot.ts3.On(ts3client.EventConnected, func(e *ts3client.ConnectedEvent) {
|
|
bot.selfID = e.ClientID
|
|
log.Printf("✓ Conectado a TeamSpeak! ClientID=%d, Server=%s", e.ClientID, e.ServerName)
|
|
|
|
// Initialize global xAI session after connecting
|
|
go func() {
|
|
if err := bot.initGlobalSession(); err != nil {
|
|
log.Printf("[Global] Error iniciando sesión xAI: %v", err)
|
|
}
|
|
}()
|
|
})
|
|
|
|
bot.ts3.On(ts3client.EventChannelList, func(e *ts3client.ChannelListEvent) {
|
|
log.Printf("✓ %d canales disponibles", len(e.Channels))
|
|
|
|
// Join specified room if provided
|
|
if *room != "" {
|
|
go func() {
|
|
// Small delay to ensure connection is fully established
|
|
time.Sleep(500 * time.Millisecond)
|
|
ch := bot.ts3.GetChannelByName(*room)
|
|
if ch != nil {
|
|
log.Printf("[Room] Uniéndose al canal: %s (ID=%d)", ch.Name, ch.ID)
|
|
if err := bot.ts3.JoinChannel(ch.ID); err != nil {
|
|
log.Printf("[Room] Error al unirse al canal: %v", err)
|
|
} else {
|
|
log.Printf("[Room] ✓ Unido al canal: %s", ch.Name)
|
|
}
|
|
} else {
|
|
log.Printf("[Room] ⚠ Canal no encontrado: %s", *room)
|
|
}
|
|
}()
|
|
}
|
|
})
|
|
|
|
bot.ts3.On(ts3client.EventClientEnter, func(e *ts3client.ClientEnterEvent) {
|
|
// Don't track ourselves
|
|
if e.ClientID == bot.selfID {
|
|
log.Printf(" (Soy yo, ignorando)")
|
|
return
|
|
}
|
|
|
|
log.Printf("→ Usuario entró: %s (ID=%d)", e.Nickname, e.ClientID)
|
|
|
|
bot.usersMu.Lock()
|
|
bot.users[e.ClientID] = &UserInfo{
|
|
ClientID: e.ClientID,
|
|
Nickname: e.Nickname,
|
|
}
|
|
bot.usersMu.Unlock()
|
|
|
|
// Notify xAI about new user (if past startup grace period)
|
|
if bot.greeting != "" && time.Since(bot.startTime) > 3*time.Second {
|
|
bot.globalMu.Lock()
|
|
if bot.globalXAI != nil && bot.globalXAI.IsConnected() {
|
|
msg := fmt.Sprintf("%s. El usuario %s acaba de unirse al canal.", bot.greeting, e.Nickname)
|
|
if err := bot.globalXAI.SendText(msg); err != nil {
|
|
log.Printf("[Global] Error enviando notificación: %v", err)
|
|
}
|
|
}
|
|
bot.globalMu.Unlock()
|
|
}
|
|
})
|
|
|
|
bot.ts3.On(ts3client.EventClientLeft, func(e *ts3client.ClientLeftEvent) {
|
|
bot.usersMu.Lock()
|
|
if user, ok := bot.users[e.ClientID]; ok {
|
|
log.Printf("← Usuario salió: %s (ID=%d, %s)", user.Nickname, e.ClientID, e.Reason)
|
|
delete(bot.users, e.ClientID)
|
|
}
|
|
bot.usersMu.Unlock()
|
|
})
|
|
|
|
// Audio handler: Mix ALL incoming audio into unified buffer
|
|
bot.ts3.On(ts3client.EventAudio, func(e *ts3client.AudioEvent) {
|
|
bot.handleInputAudio(e.SenderID, e.PCM)
|
|
})
|
|
|
|
bot.ts3.On(ts3client.EventError, func(e *ts3client.ErrorEvent) {
|
|
if e.ID != "0" {
|
|
log.Printf("! Error del servidor: [%s] %s", e.ID, e.Message)
|
|
}
|
|
})
|
|
|
|
// Handle shutdown
|
|
shutdownDone := make(chan struct{})
|
|
go func() {
|
|
sigChan := make(chan os.Signal, 1)
|
|
signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
|
|
<-sigChan
|
|
log.Println("Cerrando...")
|
|
|
|
// Signal all goroutines to stop
|
|
close(bot.done)
|
|
|
|
// Close global xAI session
|
|
bot.globalMu.Lock()
|
|
if bot.globalXAI != nil {
|
|
bot.globalXAI.Close()
|
|
}
|
|
bot.globalMu.Unlock()
|
|
|
|
// Wait for goroutines
|
|
time.Sleep(200 * time.Millisecond)
|
|
|
|
// Disconnect from TeamSpeak
|
|
bot.ts3.Disconnect()
|
|
|
|
close(shutdownDone)
|
|
}()
|
|
|
|
// Start input sender (sends mixed audio to xAI)
|
|
go bot.runInputSender()
|
|
|
|
// Start output mixer (sends xAI audio to TeamSpeak)
|
|
go bot.runOutputSender()
|
|
|
|
// Connect to TeamSpeak
|
|
if err := bot.ts3.Connect(); err != nil {
|
|
select {
|
|
case <-shutdownDone:
|
|
log.Println("Conexión cerrada por shutdown")
|
|
default:
|
|
log.Fatalf("Error de conexión: %v", err)
|
|
}
|
|
}
|
|
|
|
log.Println("Esperando confirmación final de shutdown...")
|
|
<-shutdownDone
|
|
log.Println("Shutdown completado. Saliendo.")
|
|
os.Exit(0)
|
|
}
|
|
|
|
// initGlobalSession creates the single xAI session for all users
|
|
func (b *Bot) initGlobalSession() error {
|
|
log.Println("[Global] Iniciando sesión xAI global...")
|
|
|
|
xaiClient := xai.New(b.apiKey)
|
|
|
|
// Handle output audio from xAI → buffer for TeamSpeak
|
|
xaiClient.OnAudio(func(pcm []int16) {
|
|
b.outputMu.Lock()
|
|
b.outputBuffer = append(b.outputBuffer, pcm...)
|
|
|
|
// Queue complete 960-sample frames
|
|
for len(b.outputBuffer) >= 960 {
|
|
frame := make([]int16, 960)
|
|
copy(frame, b.outputBuffer[:960])
|
|
b.outputBuffer = b.outputBuffer[960:]
|
|
|
|
select {
|
|
case b.outputQueue <- frame:
|
|
default:
|
|
// Queue full, drop oldest
|
|
}
|
|
}
|
|
b.outputMu.Unlock()
|
|
})
|
|
|
|
// Log transcripts
|
|
xaiClient.OnTranscript(func(text string) {
|
|
log.Printf("[Grok] %s", text)
|
|
})
|
|
|
|
// Connect to xAI
|
|
if err := xaiClient.Connect(); err != nil {
|
|
return fmt.Errorf("connect: %w", err)
|
|
}
|
|
|
|
// Configure the session
|
|
if err := xaiClient.ConfigureSession(b.voice, b.prompt); err != nil {
|
|
xaiClient.Close()
|
|
return fmt.Errorf("configure: %w", err)
|
|
}
|
|
|
|
b.globalMu.Lock()
|
|
b.globalXAI = xaiClient
|
|
b.globalMu.Unlock()
|
|
|
|
log.Println("[Global] ✓ Sesión xAI global activa")
|
|
return nil
|
|
}
|
|
|
|
// handleInputAudio mixes incoming audio from any user into the unified buffer
|
|
func (b *Bot) handleInputAudio(senderID uint16, pcm []int16) {
|
|
b.inputMu.Lock()
|
|
defer b.inputMu.Unlock()
|
|
|
|
// Extend buffer if needed
|
|
neededLen := len(pcm)
|
|
currentLen := len(b.inputBuffer)
|
|
|
|
if currentLen < neededLen {
|
|
// Extend with zeros
|
|
b.inputBuffer = append(b.inputBuffer, make([]int16, neededLen-currentLen)...)
|
|
}
|
|
|
|
// Mix (add with clipping protection)
|
|
for i, sample := range pcm {
|
|
val := int32(b.inputBuffer[i]) + int32(sample)
|
|
if val > 32767 {
|
|
val = 32767
|
|
}
|
|
if val < -32768 {
|
|
val = -32768
|
|
}
|
|
b.inputBuffer[i] = int16(val)
|
|
}
|
|
|
|
b.lastInputTime = time.Now()
|
|
}
|
|
|
|
// runInputSender sends buffered audio to xAI every 20ms
|
|
func (b *Bot) runInputSender() {
|
|
ticker := time.NewTicker(20 * time.Millisecond)
|
|
defer ticker.Stop()
|
|
|
|
for {
|
|
select {
|
|
case <-b.done:
|
|
return
|
|
case <-ticker.C:
|
|
b.inputMu.Lock()
|
|
if len(b.inputBuffer) >= 960 {
|
|
// Extract one frame
|
|
frame := make([]int16, 960)
|
|
copy(frame, b.inputBuffer[:960])
|
|
// Shift buffer (remove consumed samples)
|
|
b.inputBuffer = b.inputBuffer[960:]
|
|
b.inputMu.Unlock()
|
|
|
|
// Send to global xAI session
|
|
b.globalMu.Lock()
|
|
if b.globalXAI != nil && b.globalXAI.IsConnected() {
|
|
b.globalXAI.SendAudio(frame)
|
|
}
|
|
b.globalMu.Unlock()
|
|
} else {
|
|
b.inputMu.Unlock()
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// runOutputSender sends xAI audio responses to TeamSpeak with proper timing
|
|
func (b *Bot) runOutputSender() {
|
|
ticker := time.NewTicker(20 * time.Millisecond)
|
|
defer ticker.Stop()
|
|
|
|
for {
|
|
select {
|
|
case <-b.done:
|
|
return
|
|
case <-ticker.C:
|
|
select {
|
|
case frame := <-b.outputQueue:
|
|
if err := b.ts3.SendAudio(frame); err != nil {
|
|
log.Printf("[Output] Error sending audio: %v", err)
|
|
}
|
|
default:
|
|
// No audio to send
|
|
}
|
|
}
|
|
}
|
|
}
|