Files
go-ts/pkg/audio/playback.go

525 lines
13 KiB
Go
Raw Normal View History

//go:build windows
package audio
import (
"encoding/binary"
"fmt"
"log"
"sync"
"time"
"unsafe"
"github.com/go-ole/go-ole"
"github.com/moutend/go-wca/pkg/wca"
)
// Player handles WASAPI audio playback with mixing support
type Player struct {
client *wca.IAudioClient
renderClient *wca.IAudioRenderClient
waveFormat *wca.WAVEFORMATEX
bufferSize uint32
volume float32
muted bool
mu sync.Mutex
running bool
stopChan chan struct{}
// User buffers for mixing
// map[SenderID] -> AudioQueue
userBuffers map[uint16][]int16
// User EQs (DSP Filters)
userEQs map[uint16]*EQChain
// User settings
userSettings map[uint16]*UserSettings
bufferMu sync.Mutex
}
// NewPlayer creates a new WASAPI audio player
func NewPlayer() (*Player, error) {
// Initialize COM
ole.CoInitializeEx(0, ole.COINIT_APARTMENTTHREADED)
log.Printf("[Audio] Windows/WASAPI initializing...")
var deviceEnumerator *wca.IMMDeviceEnumerator
if err := wca.CoCreateInstance(
wca.CLSID_MMDeviceEnumerator,
0,
wca.CLSCTX_ALL,
wca.IID_IMMDeviceEnumerator,
&deviceEnumerator,
); err != nil {
return nil, fmt.Errorf("failed to create device enumerator: %w", err)
}
defer deviceEnumerator.Release()
var device *wca.IMMDevice
if err := deviceEnumerator.GetDefaultAudioEndpoint(wca.ERender, wca.EConsole, &device); err != nil {
return nil, fmt.Errorf("failed to get default render device: %w", err)
}
defer device.Release()
var audioClient *wca.IAudioClient
if err := device.Activate(wca.IID_IAudioClient, wca.CLSCTX_ALL, nil, &audioClient); err != nil {
return nil, fmt.Errorf("failed to activate audio client: %w", err)
}
waveFormat := &wca.WAVEFORMATEX{
WFormatTag: wca.WAVE_FORMAT_PCM,
NChannels: 2, // STEREO
NSamplesPerSec: 48000,
WBitsPerSample: 16,
NBlockAlign: 4, // 16bit * 2 channels / 8 = 4 bytes
NAvgBytesPerSec: 192000, // 48000 * 4
CbSize: 0,
}
duration := wca.REFERENCE_TIME(100 * 10000) // 100ms buffer
if err := audioClient.Initialize(
wca.AUDCLNT_SHAREMODE_SHARED,
wca.AUDCLNT_STREAMFLAGS_AUTOCONVERTPCM|wca.AUDCLNT_STREAMFLAGS_SRC_DEFAULT_QUALITY,
duration,
0,
waveFormat,
nil,
); err != nil {
audioClient.Release()
return nil, fmt.Errorf("failed to initialize audio client: %w", err)
}
var bufferSize uint32
if err := audioClient.GetBufferSize(&bufferSize); err != nil {
audioClient.Release()
return nil, fmt.Errorf("failed to get buffer size: %w", err)
}
var renderClient *wca.IAudioRenderClient
if err := audioClient.GetService(wca.IID_IAudioRenderClient, &renderClient); err != nil {
audioClient.Release()
return nil, fmt.Errorf("failed to get render client: %w", err)
}
return &Player{
client: audioClient,
renderClient: renderClient,
waveFormat: waveFormat,
bufferSize: bufferSize,
volume: 1.0,
muted: false,
stopChan: make(chan struct{}),
userBuffers: make(map[uint16][]int16),
userEQs: make(map[uint16]*EQChain),
userSettings: make(map[uint16]*UserSettings),
}, nil
}
// Start begins audio playback
func (p *Player) Start() error {
p.mu.Lock()
if p.running {
p.mu.Unlock()
return nil
}
p.running = true
p.stopChan = make(chan struct{})
p.mu.Unlock()
if err := p.client.Start(); err != nil {
return fmt.Errorf("failed to start audio client: %w", err)
}
go p.playbackLoop()
return nil
}
// Stop stops audio playback
func (p *Player) Stop() {
p.mu.Lock()
if !p.running {
p.mu.Unlock()
return
}
p.running = false
p.mu.Unlock()
close(p.stopChan)
p.client.Stop()
}
// Close releases all resources
func (p *Player) Close() {
p.Stop()
if p.renderClient != nil {
p.renderClient.Release()
}
if p.client != nil {
p.client.Release()
}
ole.CoUninitialize()
}
// PlayPCM adds audio samples to a specific user's buffer
func (p *Player) PlayPCM(senderID uint16, samples []int16) {
if p.muted {
return
}
// ---------------------------------------------------------
// PHASE 1: Read Configuration (Safe Copy)
// ---------------------------------------------------------
p.bufferMu.Lock()
// Check per-user mute
settings, hasSettings := p.userSettings[senderID]
if hasSettings && settings.Muted {
p.bufferMu.Unlock()
return
}
// Get EQ Instance (Create if needed)
if _, ok := p.userEQs[senderID]; !ok {
p.userEQs[senderID] = NewEQChain(48000)
}
userEQ := p.userEQs[senderID]
// Check/Copy Gains
var gains []float64
hasActiveEQ := false
if hasSettings && len(settings.Gains) == 5 {
// Copy gains to avoid race if UI changes them while we process
gains = make([]float64, 5)
copy(gains, settings.Gains)
for _, g := range gains {
if g != 0 {
hasActiveEQ = true
break
}
}
}
p.bufferMu.Unlock()
// ---------------------------------------------------------
// END PHASE 1 (Lock Released)
// ---------------------------------------------------------
// ---------------------------------------------------------
// PHASE 2: Heavy Processing (Concurrent)
// ---------------------------------------------------------
// Normalize to Stereo (Interleaved)
// If input is Mono (960 samples), expand to Stereo (1920 samples)
// If input is already Stereo, using it as is.
var stereoSamples []int16
if len(samples) < 1500 { // Heuristic for Mono (960)
stereoSamples = make([]int16, len(samples)*2)
for i, s := range samples {
stereoSamples[i*2] = s
stereoSamples[i*2+1] = s
}
} else {
// Already stereo (assumed)
stereoSamples = make([]int16, len(samples))
copy(stereoSamples, samples)
}
// Apply EQ Filters if needed
if hasActiveEQ {
// Update gains on the private EQ instance (Thread-safe per user)
for i, g := range gains {
userEQ.SetGain(i, g)
}
// Process Stereo
stereoSamples = userEQ.Process(stereoSamples)
}
// Calculate EQ bands for visualization
// Downmix to Mono for FFT visualization to save CPU and complexity
vizSamples := make([]int16, len(stereoSamples)/2)
for i := 0; i < len(vizSamples); i++ {
// Average L+R
val := (int32(stereoSamples[i*2]) + int32(stereoSamples[i*2+1])) / 2
vizSamples[i] = int16(val)
}
bands := CalculateEQBands(vizSamples, 48000)
// ---------------------------------------------------------
// PHASE 3: Write Output (Lock Acquired)
// ---------------------------------------------------------
p.bufferMu.Lock()
defer p.bufferMu.Unlock()
// Re-check existence (could have disconnected?)
// Update user settings with new bands
if _, ok := p.userSettings[senderID]; !ok {
p.userSettings[senderID] = &UserSettings{Volume: 1.0, Muted: false}
}
p.userSettings[senderID].EQBands = bands
// Append to user's specific buffer
p.userBuffers[senderID] = append(p.userBuffers[senderID], stereoSamples...)
// Limit buffer size per user (Stereo 2sec = 48000*2*2 = 192000 items)
// frameSamples is 960 (20ms). 2sec = 100 frames * 960 * 2 = 192000
const maxBufferSize = 48000 * 2 * 2 // 2 seconds stereo
if len(p.userBuffers[senderID]) > maxBufferSize {
// Drop oldest
drop := len(p.userBuffers[senderID]) - maxBufferSize
// Ensure we drop aligned to stereo frame (even number)
if drop%2 != 0 {
drop++
}
p.userBuffers[senderID] = p.userBuffers[senderID][drop:]
}
}
// SetVolume sets playback volume (0.0 to 1.0)
func (p *Player) SetVolume(vol float32) {
if vol < 0 {
vol = 0
}
if vol > 1.0 {
vol = 1.0
}
p.mu.Lock()
p.volume = vol
p.mu.Unlock()
}
// GetVolume returns current volume (0.0 to 1.0)
func (p *Player) GetVolume() float32 {
p.mu.Lock()
defer p.mu.Unlock()
return p.volume
}
// SetMuted sets mute state
func (p *Player) SetMuted(muted bool) {
p.mu.Lock()
p.muted = muted
p.mu.Unlock()
}
func (p *Player) IsMuted() bool {
p.mu.Lock()
defer p.mu.Unlock()
return p.muted
}
// SetUserVolume sets volume for a specific user (1.0 is default)
func (p *Player) SetUserVolume(clientID uint16, vol float32) {
p.bufferMu.Lock()
defer p.bufferMu.Unlock()
if _, ok := p.userSettings[clientID]; !ok {
p.userSettings[clientID] = &UserSettings{Volume: 1.0, Muted: false}
}
p.userSettings[clientID].Volume = vol
}
// SetUserMuted sets mute state for a specific user
func (p *Player) SetUserMuted(clientID uint16, muted bool) {
p.bufferMu.Lock()
defer p.bufferMu.Unlock()
if _, ok := p.userSettings[clientID]; !ok {
p.userSettings[clientID] = &UserSettings{Volume: 1.0, Muted: false}
}
p.userSettings[clientID].Muted = muted
}
// GetUserSettings gets current volume and mute state for user
func (p *Player) GetUserSettings(clientID uint16) (float32, bool) {
p.bufferMu.Lock()
defer p.bufferMu.Unlock()
if settings, ok := p.userSettings[clientID]; ok {
return settings.Volume, settings.Muted
}
return 1.0, false
}
// GetEQBands returns the current 5-band EQ values for a user (0.0-1.0)
func (p *Player) GetEQBands(clientID uint16) []float64 {
p.bufferMu.Lock()
defer p.bufferMu.Unlock()
if settings, ok := p.userSettings[clientID]; ok {
return settings.EQBands
}
return nil
}
// SetUserGain sets the EQ gain for a specific band (0-4) and user.
// Gain is in dB (e.g. -12.0 to +12.0)
func (p *Player) SetUserGain(clientID uint16, bandIdx int, gainDb float64) {
p.bufferMu.Lock()
defer p.bufferMu.Unlock()
p.ensureUserSettings(clientID)
// Ensure Gains slice exists
if len(p.userSettings[clientID].Gains) != 5 {
p.userSettings[clientID].Gains = make([]float64, 5)
}
if bandIdx >= 0 && bandIdx < 5 {
p.userSettings[clientID].Gains[bandIdx] = gainDb
}
}
// GetUserGain returns the gain for a band
func (p *Player) GetUserGain(clientID uint16, bandIdx int) float64 {
p.bufferMu.Lock()
defer p.bufferMu.Unlock()
if settings, ok := p.userSettings[clientID]; ok {
if len(settings.Gains) > bandIdx {
return settings.Gains[bandIdx]
}
}
return 0.0
}
func (p *Player) ensureUserSettings(clientID uint16) {
if _, ok := p.userSettings[clientID]; !ok {
p.userSettings[clientID] = &UserSettings{
Volume: 1.0,
Muted: false,
Gains: make([]float64, 5),
}
}
}
func (p *Player) ensureEQ(clientID uint16) {
if _, ok := p.userEQs[clientID]; !ok {
// New EQ chain
// Assume 48000 Hz, would be better to pass actual stream rate
p.userEQs[clientID] = NewEQChain(48000)
}
}
func (p *Player) playbackLoop() {
ticker := time.NewTicker(10 * time.Millisecond)
defer ticker.Stop()
for {
select {
case <-p.stopChan:
return
case <-ticker.C:
p.writeFrame()
}
}
}
func (p *Player) writeFrame() {
for {
var padding uint32
if err := p.client.GetCurrentPadding(&padding); err != nil {
return
}
available := p.bufferSize - padding
if available < frameSamples {
return
}
p.bufferMu.Lock()
// Mix audio from all active user buffers
// Stereo mixing: buffer size is frameSamples * 2
mixed := make([]int32, frameSamples*2)
activeUsers := 0
hasAnyAudio := false
for id, buf := range p.userBuffers {
if len(buf) > 0 {
hasAnyAudio = true
activeUsers++
// Take up to frameSamples*2 (Stereo) from this user
toTake := frameSamples * 2
if len(buf) < int(frameSamples)*2 {
toTake = len(buf)
}
// Ensure we take pairs (alignment)
toTake = toTake &^ 1 // clear lowest bit
for i := 0; i < toTake; i++ {
sample := int32(buf[i])
// Apply user volume if set
if settings, ok := p.userSettings[id]; ok {
sample = int32(float32(sample) * settings.Volume)
}
mixed[i] += sample
}
// Advance buffer
if len(buf) <= toTake {
delete(p.userBuffers, id) // Finished this buffer
} else {
p.userBuffers[id] = buf[toTake:]
}
}
}
p.bufferMu.Unlock()
// If no audio is playing, don't write anything (keep buffer empty for lower latency when audio starts)
if !hasAnyAudio {
return
}
// Get WASAPI buffer
var buffer *byte
if err := p.renderClient.GetBuffer(uint32(frameSamples), &buffer); err != nil {
return
}
p.mu.Lock()
vol := p.volume
p.mu.Unlock()
// Write mixed samples with clipping protection and volume application
// Output buffer is for Stereo (frameSamples * 2 channels)
bufSlice := unsafe.Slice(buffer, int(frameSamples)*2*2) // *2 channels *2 bytes? No, unsafe.Slice takes count of Type.
// If buffer is *byte, we need bytes. frameSamples * 2 channels * 2 bytes/sample.
// Wait, GetBuffer returns BYTE pointer.
// Let's use uint16 slice.
// The logic below was: binary.LittleEndian.PutUint16(bufSlice[i*2:], ...)
// frameSamples was 960. loop 0..960.
// Now we have Stereo mixed buffer. Length = frameSamples * 2.
// We need to write frameSamples * 2 samples.
// Correct loop for Stereo:
for i := 0; i < int(frameSamples)*2; i++ { // Iterate over all samples (L, R, L, R...)
val := mixed[i]
// Apply master volume
val = int32(float32(val) * vol)
// Hard clipping
if val > 32767 {
val = 32767
} else if val < -32768 {
val = -32768
}
// Map to output byte buffer
// i is sample index. Each sample is 2 bytes.
// Offset = i * 2.
binary.LittleEndian.PutUint16(bufSlice[i*2:], uint16(val))
}
p.renderClient.ReleaseBuffer(uint32(frameSamples), 0)
}
}