feat: full WSL2 audio support and Windows audio stability fix

This commit is contained in:
Jose Luis Montañes Ojados
2026-01-17 01:38:13 +01:00
parent c1bb24473e
commit c81f64d9ca
12 changed files with 492 additions and 75 deletions

View File

@@ -1,3 +1,5 @@
//go:build windows
package audio
import (
@@ -11,8 +13,6 @@ import (
"github.com/moutend/go-wca/pkg/wca"
)
const captureFrameSamples = 960 // 20ms at 48kHz
// Capturer handles WASAPI audio capture from microphone
type Capturer struct {
client *wca.IAudioClient

112
pkg/audio/capture_linux.go Normal file
View File

@@ -0,0 +1,112 @@
//go:build linux
package audio
import (
"fmt"
"sync"
"github.com/gordonklaus/portaudio"
)
// Capturer handles audio capture using PortAudio
type Capturer struct {
stream *portaudio.Stream
running bool
mu sync.Mutex
onAudio func(samples []int16)
currentLevel int
levelMu sync.RWMutex
}
func NewCapturer() (*Capturer, error) {
if err := initPortAudio(); err != nil {
return nil, err
}
return &Capturer{}, nil
}
func (c *Capturer) SetCallback(fn func(samples []int16)) {
c.mu.Lock()
c.onAudio = fn
c.mu.Unlock()
}
func (c *Capturer) Start() error {
c.mu.Lock()
defer c.mu.Unlock()
if c.running {
return nil
}
var err error
c.stream, err = portaudio.OpenDefaultStream(1, 0, 48000, frameSamples, c.processCapture)
if err != nil {
return fmt.Errorf("failed to open portaudio capture stream: %w", err)
}
if err := c.stream.Start(); err != nil {
c.stream.Close()
return fmt.Errorf("failed to start portaudio capture stream: %w", err)
}
c.running = true
return nil
}
func (c *Capturer) Stop() {
c.mu.Lock()
defer c.mu.Unlock()
if !c.running {
return
}
c.running = false
if c.stream != nil {
c.stream.Abort()
}
}
func (c *Capturer) Close() {
c.Stop()
c.mu.Lock()
if c.stream != nil {
c.stream.Close()
}
c.mu.Unlock()
terminatePortAudio()
}
func (c *Capturer) processCapture(in []int16) {
c.mu.Lock()
callback := c.onAudio
running := c.running
c.mu.Unlock()
if !running || callback == nil {
return
}
// Calculate level
level := CalculateRMSLevel(in)
c.levelMu.Lock()
c.currentLevel = level
c.levelMu.Unlock()
// Clone buffer and send to callback
samples := make([]int16, len(in))
copy(samples, in)
callback(samples)
}
func (c *Capturer) GetLevel() int {
c.levelMu.RLock()
defer c.levelMu.RUnlock()
return c.currentLevel
}
func (c *Capturer) IsRunning() bool {
c.mu.Lock()
defer c.mu.Unlock()
return c.running
}

13
pkg/audio/common.go Normal file
View File

@@ -0,0 +1,13 @@
package audio
// Shared constants
const (
frameSamples = 960 // 20ms at 48kHz
captureFrameSamples = 960 // 20ms at 48kHz
)
// UserSettings represents per-user audio configuration
type UserSettings struct {
Volume float32 // 0.0 - 1.0 (or higher for boost)
Muted bool
}

45
pkg/audio/global_linux.go Normal file
View File

@@ -0,0 +1,45 @@
//go:build linux
package audio
import (
"fmt"
"os"
"sync"
"github.com/gordonklaus/portaudio"
)
var (
paMu sync.Mutex
paRefCount int
)
func initPortAudio() error {
paMu.Lock()
defer paMu.Unlock()
if paRefCount == 0 {
if err := portaudio.Initialize(); err != nil {
return err
}
devices, err := portaudio.Devices()
if err == nil {
fmt.Fprintf(os.Stderr, "[Audio] Linux/PortAudio initialized globally. Devices found: %d\n", len(devices))
}
}
paRefCount++
return nil
}
func terminatePortAudio() {
paMu.Lock()
defer paMu.Unlock()
paRefCount--
if paRefCount == 0 {
fmt.Fprintf(os.Stderr, "[Audio] Linux/PortAudio terminating globally...\n")
portaudio.Terminate()
}
}

View File

@@ -1,8 +1,11 @@
//go:build windows
package audio
import (
"encoding/binary"
"fmt"
"log"
"sync"
"time"
"unsafe"
@@ -32,19 +35,11 @@ type Player struct {
bufferMu sync.Mutex
}
type UserSettings struct {
Volume float32 // 0.0 - 1.0 (or higher for boost)
Muted bool
}
const (
frameSamples = 960 // 20ms at 48kHz
)
// NewPlayer creates a new WASAPI audio player
func NewPlayer() (*Player, error) {
// Initialize COM
ole.CoInitializeEx(0, ole.COINIT_APARTMENTTHREADED)
log.Printf("[Audio] Windows/WASAPI initializing...")
var deviceEnumerator *wca.IMMDeviceEnumerator
if err := wca.CoCreateInstance(
@@ -255,7 +250,7 @@ func (p *Player) GetUserSettings(clientID uint16) (float32, bool) {
}
func (p *Player) playbackLoop() {
ticker := time.NewTicker(20 * time.Millisecond)
ticker := time.NewTicker(10 * time.Millisecond)
defer ticker.Stop()
for {
@@ -269,79 +264,88 @@ func (p *Player) playbackLoop() {
}
func (p *Player) writeFrame() {
var padding uint32
if err := p.client.GetCurrentPadding(&padding); err != nil {
return
}
for {
var padding uint32
if err := p.client.GetCurrentPadding(&padding); err != nil {
return
}
available := p.bufferSize - padding
if available < frameSamples {
return
}
available := p.bufferSize - padding
if available < frameSamples {
return
}
p.bufferMu.Lock()
p.bufferMu.Lock()
// Mix audio from all active user buffers
mixed := make([]int32, frameSamples)
activeUsers := 0
// Mix audio from all active user buffers
mixed := make([]int32, frameSamples)
activeUsers := 0
hasAnyAudio := false
for id, buf := range p.userBuffers {
if len(buf) > 0 {
activeUsers++
// Take up to frameSamples from this user
toTake := frameSamples
if len(buf) < frameSamples {
toTake = len(buf)
}
for i := 0; i < toTake; i++ {
sample := int32(buf[i])
// Apply user volume if set
if settings, ok := p.userSettings[id]; ok {
sample = int32(float32(sample) * settings.Volume)
for id, buf := range p.userBuffers {
if len(buf) > 0 {
hasAnyAudio = true
activeUsers++
// Take up to frameSamples from this user
toTake := frameSamples
if len(buf) < frameSamples {
toTake = len(buf)
}
mixed[i] += sample
}
for i := 0; i < toTake; i++ {
sample := int32(buf[i])
// Advance buffer
if len(buf) <= frameSamples {
delete(p.userBuffers, id)
} else {
p.userBuffers[id] = buf[frameSamples:]
// Apply user volume if set
if settings, ok := p.userSettings[id]; ok {
sample = int32(float32(sample) * settings.Volume)
}
mixed[i] += sample
}
// Advance buffer
if len(buf) <= frameSamples {
delete(p.userBuffers, id)
} else {
p.userBuffers[id] = buf[frameSamples:]
}
}
}
}
p.bufferMu.Unlock()
p.bufferMu.Unlock()
// Get WASAPI buffer
var buffer *byte
if err := p.renderClient.GetBuffer(uint32(frameSamples), &buffer); err != nil {
return
}
p.mu.Lock()
vol := p.volume
p.mu.Unlock()
// Write mixed samples with clipping protection and volume application
bufSlice := unsafe.Slice(buffer, int(frameSamples)*2)
for i := 0; i < int(frameSamples); i++ {
val := mixed[i]
// Apply volume
val = int32(float32(val) * vol)
// Hard clipping
if val > 32767 {
val = 32767
} else if val < -32768 {
val = -32768
// If no audio is playing, don't write anything (keep buffer empty for lower latency when audio starts)
if !hasAnyAudio {
return
}
binary.LittleEndian.PutUint16(bufSlice[i*2:], uint16(val))
}
p.renderClient.ReleaseBuffer(uint32(frameSamples), 0)
// Get WASAPI buffer
var buffer *byte
if err := p.renderClient.GetBuffer(uint32(frameSamples), &buffer); err != nil {
return
}
p.mu.Lock()
vol := p.volume
p.mu.Unlock()
// Write mixed samples with clipping protection and volume application
bufSlice := unsafe.Slice(buffer, int(frameSamples)*2)
for i := 0; i < int(frameSamples); i++ {
val := mixed[i]
// Apply master volume
val = int32(float32(val) * vol)
// Hard clipping
if val > 32767 {
val = 32767
} else if val < -32768 {
val = -32768
}
binary.LittleEndian.PutUint16(bufSlice[i*2:], uint16(val))
}
p.renderClient.ReleaseBuffer(uint32(frameSamples), 0)
}
}

201
pkg/audio/playback_linux.go Normal file
View File

@@ -0,0 +1,201 @@
//go:build linux
package audio
import (
"fmt"
"sync"
"github.com/gordonklaus/portaudio"
)
// Player handles audio playback using PortAudio
type Player struct {
stream *portaudio.Stream
volume float32
muted bool
mu sync.Mutex
running bool
stopChan chan struct{}
// User buffers for mixing
userBuffers map[uint16][]int16
// User settings
userSettings map[uint16]*UserSettings
bufferMu sync.Mutex
}
func NewPlayer() (*Player, error) {
if err := initPortAudio(); err != nil {
return nil, err
}
p := &Player{
volume: 1.0,
muted: false,
stopChan: make(chan struct{}),
userBuffers: make(map[uint16][]int16),
userSettings: make(map[uint16]*UserSettings),
}
return p, nil
}
func (p *Player) Start() error {
p.mu.Lock()
if p.running {
p.mu.Unlock()
return nil
}
// Create stream (Mono, 48kHz, 16-bit)
// We'll use a callback-based stream for lower latency
var err error
p.stream, err = portaudio.OpenDefaultStream(0, 1, 48000, frameSamples, p.processAudio)
if err != nil {
p.mu.Unlock()
return fmt.Errorf("failed to open portaudio stream: %w", err)
}
if err := p.stream.Start(); err != nil {
p.stream.Close()
p.mu.Unlock()
return fmt.Errorf("failed to start portaudio stream: %w", err)
}
p.running = true
p.mu.Unlock()
return nil
}
func (p *Player) Stop() {
p.mu.Lock()
defer p.mu.Unlock()
if !p.running {
return
}
p.running = false
if p.stream != nil {
p.stream.Abort()
}
}
func (p *Player) Close() {
p.Stop()
p.mu.Lock()
if p.stream != nil {
p.stream.Close()
}
p.mu.Unlock()
terminatePortAudio()
}
// processAudio is the PortAudio callback
func (p *Player) processAudio(out []int16) {
p.bufferMu.Lock()
defer p.bufferMu.Unlock()
// Initial silence
for i := range out {
out[i] = 0
}
if p.muted {
return
}
p.mu.Lock()
vol := p.volume
p.mu.Unlock()
mixed := make([]int32, len(out))
for id, buf := range p.userBuffers {
if len(buf) > 0 {
toTake := len(out)
if len(buf) < toTake {
toTake = len(buf)
}
for i := 0; i < toTake; i++ {
sample := int32(buf[i])
if settings, ok := p.userSettings[id]; ok {
sample = int32(float32(sample) * settings.Volume)
}
mixed[i] += sample
}
// Advance buffer
if len(buf) <= len(out) {
delete(p.userBuffers, id)
} else {
p.userBuffers[id] = buf[len(out):]
}
}
}
// Apply master volume and clip
for i := 0; i < len(out); i++ {
val := int32(float32(mixed[i]) * vol)
if val > 32767 {
val = 32767
} else if val < -32768 {
val = -32768
}
out[i] = int16(val)
}
}
func (p *Player) PlayPCM(senderID uint16, samples []int16) {
p.bufferMu.Lock()
defer p.bufferMu.Unlock()
if settings, ok := p.userSettings[senderID]; ok && settings.Muted {
return
}
p.userBuffers[senderID] = append(p.userBuffers[senderID], samples...)
if len(p.userBuffers[senderID]) > 48000*2 {
drop := len(p.userBuffers[senderID]) - 48000
p.userBuffers[senderID] = p.userBuffers[senderID][drop:]
}
}
func (p *Player) SetVolume(vol float32) {
p.mu.Lock()
defer p.mu.Unlock()
p.volume = vol
}
func (p *Player) SetMuted(muted bool) {
p.mu.Lock()
defer p.mu.Unlock()
p.muted = muted
}
func (p *Player) SetUserVolume(clientID uint16, vol float32) {
p.bufferMu.Lock()
defer p.bufferMu.Unlock()
if _, ok := p.userSettings[clientID]; !ok {
p.userSettings[clientID] = &UserSettings{Volume: 1.0, Muted: false}
}
p.userSettings[clientID].Volume = vol
}
func (p *Player) SetUserMuted(clientID uint16, muted bool) {
p.bufferMu.Lock()
defer p.bufferMu.Unlock()
if _, ok := p.userSettings[clientID]; !ok {
p.userSettings[clientID] = &UserSettings{Volume: 1.0, Muted: false}
}
p.userSettings[clientID].Muted = muted
}
func (p *Player) GetUserSettings(clientID uint16) (float32, bool) {
p.bufferMu.Lock()
defer p.bufferMu.Unlock()
if settings, ok := p.userSettings[clientID]; ok {
return settings.Volume, settings.Muted
}
return 1.0, false
}