//go:build windows package audio import ( "encoding/binary" "fmt" "sync" "time" "unsafe" "github.com/go-ole/go-ole" "github.com/moutend/go-wca/pkg/wca" ) // Capturer handles WASAPI audio capture from microphone type Capturer struct { client *wca.IAudioClient captureClient *wca.IAudioCaptureClient waveFormat *wca.WAVEFORMATEX bufferSize uint32 running bool mu sync.Mutex stopChan chan struct{} // Callback for captured audio (called with 960-sample frames) onAudio func(samples []int16) // Sample accumulation buffer sampleBuffer []int16 bufferMu sync.Mutex // Current audio level (0-100) currentLevel int levelMu sync.RWMutex } // NewCapturer creates a new WASAPI audio capturer func NewCapturer() (*Capturer, error) { // Initialize COM (may already be initialized) ole.CoInitializeEx(0, ole.COINIT_APARTMENTTHREADED) // Get default capture endpoint (microphone) var deviceEnumerator *wca.IMMDeviceEnumerator if err := wca.CoCreateInstance( wca.CLSID_MMDeviceEnumerator, 0, wca.CLSCTX_ALL, wca.IID_IMMDeviceEnumerator, &deviceEnumerator, ); err != nil { return nil, fmt.Errorf("failed to create device enumerator: %w", err) } defer deviceEnumerator.Release() var device *wca.IMMDevice if err := deviceEnumerator.GetDefaultAudioEndpoint(wca.ECapture, wca.EConsole, &device); err != nil { return nil, fmt.Errorf("failed to get default capture device: %w", err) } defer device.Release() // Activate audio client var audioClient *wca.IAudioClient if err := device.Activate(wca.IID_IAudioClient, wca.CLSCTX_ALL, nil, &audioClient); err != nil { return nil, fmt.Errorf("failed to activate audio client: %w", err) } // Set up format for 48kHz mono 16-bit (TeamSpeak format) waveFormat := &wca.WAVEFORMATEX{ WFormatTag: wca.WAVE_FORMAT_PCM, NChannels: 1, NSamplesPerSec: 48000, WBitsPerSample: 16, NBlockAlign: 2, NAvgBytesPerSec: 96000, CbSize: 0, } // Initialize in shared mode - 100ms buffer duration := wca.REFERENCE_TIME(100 * 10000) // 100ms in 100-nanosecond units if err := audioClient.Initialize( wca.AUDCLNT_SHAREMODE_SHARED, wca.AUDCLNT_STREAMFLAGS_AUTOCONVERTPCM|wca.AUDCLNT_STREAMFLAGS_SRC_DEFAULT_QUALITY, duration, 0, waveFormat, nil, ); err != nil { audioClient.Release() return nil, fmt.Errorf("failed to initialize audio client: %w", err) } // Get buffer size var bufferSize uint32 if err := audioClient.GetBufferSize(&bufferSize); err != nil { audioClient.Release() return nil, fmt.Errorf("failed to get buffer size: %w", err) } // Get capture client var captureClient *wca.IAudioCaptureClient if err := audioClient.GetService(wca.IID_IAudioCaptureClient, &captureClient); err != nil { audioClient.Release() return nil, fmt.Errorf("failed to get capture client: %w", err) } return &Capturer{ client: audioClient, captureClient: captureClient, waveFormat: waveFormat, bufferSize: bufferSize, stopChan: make(chan struct{}), sampleBuffer: make([]int16, 0, captureFrameSamples*50), // ~1 second buffer }, nil } // SetCallback sets the callback for captured audio (receives 960-sample frames) func (c *Capturer) SetCallback(fn func(samples []int16)) { c.mu.Lock() c.onAudio = fn c.mu.Unlock() } // Start begins audio capture func (c *Capturer) Start() error { c.mu.Lock() if c.running { c.mu.Unlock() return nil } c.running = true c.stopChan = make(chan struct{}) // Recreate channel for each start c.mu.Unlock() if err := c.client.Start(); err != nil { return fmt.Errorf("failed to start audio client: %w", err) } go c.captureLoop() return nil } // Stop stops audio capture func (c *Capturer) Stop() { c.mu.Lock() if !c.running { c.mu.Unlock() return } c.running = false c.mu.Unlock() close(c.stopChan) c.client.Stop() } // Close releases all resources func (c *Capturer) Close() { c.Stop() if c.captureClient != nil { c.captureClient.Release() } if c.client != nil { c.client.Release() } } // GetLevel returns the current audio input level (0-100) func (c *Capturer) GetLevel() int { c.levelMu.RLock() defer c.levelMu.RUnlock() return c.currentLevel } // IsRunning returns whether capture is active func (c *Capturer) IsRunning() bool { c.mu.Lock() defer c.mu.Unlock() return c.running } func (c *Capturer) captureLoop() { ticker := time.NewTicker(10 * time.Millisecond) // Check more often than 20ms defer ticker.Stop() for { select { case <-c.stopChan: return case <-ticker.C: c.readFromBuffer() } } } func (c *Capturer) readFromBuffer() { // Read all available packets for { var packetLength uint32 if err := c.captureClient.GetNextPacketSize(&packetLength); err != nil { return } if packetLength == 0 { break } var buffer *byte var numFrames uint32 var flags uint32 if err := c.captureClient.GetBuffer(&buffer, &numFrames, &flags, nil, nil); err != nil { return } if numFrames == 0 { c.captureClient.ReleaseBuffer(numFrames) continue } samples := make([]int16, numFrames) bufSlice := unsafe.Slice(buffer, numFrames*2) for i := uint32(0); i < numFrames; i++ { samples[i] = int16(binary.LittleEndian.Uint16(bufSlice[i*2:])) } c.captureClient.ReleaseBuffer(numFrames) // Skip silent buffers if flags&wca.AUDCLNT_BUFFERFLAGS_SILENT != 0 { continue } // Add to sample buffer c.bufferMu.Lock() c.sampleBuffer = append(c.sampleBuffer, samples...) // Calculate level from latest samples level := CalculateRMSLevel(samples) c.levelMu.Lock() c.currentLevel = level c.levelMu.Unlock() // Send complete 960-sample frames for len(c.sampleBuffer) >= captureFrameSamples { frame := make([]int16, captureFrameSamples) copy(frame, c.sampleBuffer[:captureFrameSamples]) c.sampleBuffer = c.sampleBuffer[captureFrameSamples:] // Call callback with the frame c.mu.Lock() callback := c.onAudio c.mu.Unlock() if callback != nil { callback(frame) } } c.bufferMu.Unlock() } }