Files
go-ts/pkg/audio/capture.go
2026-01-17 17:06:46 +01:00

266 lines
6.1 KiB
Go

//go:build windows
package audio
import (
"encoding/binary"
"fmt"
"sync"
"time"
"unsafe"
"github.com/go-ole/go-ole"
"github.com/moutend/go-wca/pkg/wca"
)
// Capturer handles WASAPI audio capture from microphone
type Capturer struct {
client *wca.IAudioClient
captureClient *wca.IAudioCaptureClient
waveFormat *wca.WAVEFORMATEX
bufferSize uint32
running bool
mu sync.Mutex
stopChan chan struct{}
wg sync.WaitGroup
// Callback for captured audio (called with 960-sample frames)
onAudio func(samples []int16)
// Sample accumulation buffer
sampleBuffer []int16
bufferMu sync.Mutex
// Current audio level (0-100)
currentLevel int
levelMu sync.RWMutex
}
// NewCapturer creates a new WASAPI audio capturer
func NewCapturer() (*Capturer, error) {
// Initialize COM (may already be initialized)
ole.CoInitializeEx(0, ole.COINIT_APARTMENTTHREADED)
// Get default capture endpoint (microphone)
var deviceEnumerator *wca.IMMDeviceEnumerator
if err := wca.CoCreateInstance(
wca.CLSID_MMDeviceEnumerator,
0,
wca.CLSCTX_ALL,
wca.IID_IMMDeviceEnumerator,
&deviceEnumerator,
); err != nil {
return nil, fmt.Errorf("failed to create device enumerator: %w", err)
}
defer deviceEnumerator.Release()
var device *wca.IMMDevice
if err := deviceEnumerator.GetDefaultAudioEndpoint(wca.ECapture, wca.EConsole, &device); err != nil {
return nil, fmt.Errorf("failed to get default capture device: %w", err)
}
defer device.Release()
// Activate audio client
var audioClient *wca.IAudioClient
if err := device.Activate(wca.IID_IAudioClient, wca.CLSCTX_ALL, nil, &audioClient); err != nil {
return nil, fmt.Errorf("failed to activate audio client: %w", err)
}
// Set up format for 48kHz mono 16-bit (TeamSpeak format)
waveFormat := &wca.WAVEFORMATEX{
WFormatTag: wca.WAVE_FORMAT_PCM,
NChannels: 1,
NSamplesPerSec: 48000,
WBitsPerSample: 16,
NBlockAlign: 2,
NAvgBytesPerSec: 96000,
CbSize: 0,
}
// Initialize in shared mode - 100ms buffer
duration := wca.REFERENCE_TIME(100 * 10000) // 100ms in 100-nanosecond units
if err := audioClient.Initialize(
wca.AUDCLNT_SHAREMODE_SHARED,
wca.AUDCLNT_STREAMFLAGS_AUTOCONVERTPCM|wca.AUDCLNT_STREAMFLAGS_SRC_DEFAULT_QUALITY,
duration,
0,
waveFormat,
nil,
); err != nil {
audioClient.Release()
return nil, fmt.Errorf("failed to initialize audio client: %w", err)
}
// Get buffer size
var bufferSize uint32
if err := audioClient.GetBufferSize(&bufferSize); err != nil {
audioClient.Release()
return nil, fmt.Errorf("failed to get buffer size: %w", err)
}
// Get capture client
var captureClient *wca.IAudioCaptureClient
if err := audioClient.GetService(wca.IID_IAudioCaptureClient, &captureClient); err != nil {
audioClient.Release()
return nil, fmt.Errorf("failed to get capture client: %w", err)
}
return &Capturer{
client: audioClient,
captureClient: captureClient,
waveFormat: waveFormat,
bufferSize: bufferSize,
stopChan: make(chan struct{}),
sampleBuffer: make([]int16, 0, captureFrameSamples*50), // ~1 second buffer
}, nil
}
// SetCallback sets the callback for captured audio (receives 960-sample frames)
func (c *Capturer) SetCallback(fn func(samples []int16)) {
c.mu.Lock()
c.onAudio = fn
c.mu.Unlock()
}
// Start begins audio capture
func (c *Capturer) Start() error {
c.mu.Lock()
if c.running {
c.mu.Unlock()
return nil
}
c.running = true
c.stopChan = make(chan struct{}) // Recreate channel for each start
c.mu.Unlock()
if err := c.client.Start(); err != nil {
return fmt.Errorf("failed to start audio client: %w", err)
}
c.wg.Add(1)
go c.captureLoop()
return nil
}
// Stop stops audio capture
func (c *Capturer) Stop() {
c.mu.Lock()
if !c.running {
c.mu.Unlock()
return
}
c.running = false
c.mu.Unlock()
close(c.stopChan)
c.wg.Wait() // Wait for capture loop to finish before proceeding
c.client.Stop()
}
// Close releases all resources
func (c *Capturer) Close() {
c.Stop()
if c.captureClient != nil {
c.captureClient.Release()
}
if c.client != nil {
c.client.Release()
}
}
// GetLevel returns the current audio input level (0-100)
func (c *Capturer) GetLevel() int {
c.levelMu.RLock()
defer c.levelMu.RUnlock()
return c.currentLevel
}
// IsRunning returns whether capture is active
func (c *Capturer) IsRunning() bool {
c.mu.Lock()
defer c.mu.Unlock()
return c.running
}
func (c *Capturer) captureLoop() {
defer c.wg.Done()
ticker := time.NewTicker(10 * time.Millisecond) // Check more often than 20ms
defer ticker.Stop()
for {
select {
case <-c.stopChan:
return
case <-ticker.C:
c.readFromBuffer()
}
}
}
func (c *Capturer) readFromBuffer() {
// Read all available packets
for {
var packetLength uint32
if err := c.captureClient.GetNextPacketSize(&packetLength); err != nil {
return
}
if packetLength == 0 {
break
}
var buffer *byte
var numFrames uint32
var flags uint32
if err := c.captureClient.GetBuffer(&buffer, &numFrames, &flags, nil, nil); err != nil {
return
}
if numFrames == 0 {
c.captureClient.ReleaseBuffer(numFrames)
continue
}
samples := make([]int16, numFrames)
bufSlice := unsafe.Slice(buffer, numFrames*2)
for i := uint32(0); i < numFrames; i++ {
samples[i] = int16(binary.LittleEndian.Uint16(bufSlice[i*2:]))
}
c.captureClient.ReleaseBuffer(numFrames)
// Skip silent buffers
if flags&wca.AUDCLNT_BUFFERFLAGS_SILENT != 0 {
continue
}
// Add to sample buffer
c.bufferMu.Lock()
c.sampleBuffer = append(c.sampleBuffer, samples...)
// Calculate level from latest samples
level := CalculateRMSLevel(samples)
c.levelMu.Lock()
c.currentLevel = level
c.levelMu.Unlock()
// Send complete 960-sample frames
for len(c.sampleBuffer) >= captureFrameSamples {
frame := make([]int16, captureFrameSamples)
copy(frame, c.sampleBuffer[:captureFrameSamples])
c.sampleBuffer = c.sampleBuffer[captureFrameSamples:]
// Call callback with the frame
c.mu.Lock()
callback := c.onAudio
c.mu.Unlock()
if callback != nil {
callback(frame)
}
}
c.bufferMu.Unlock()
}
}