From be929ce55aff1738c0715865e4ab13de8e59f5c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jose=20Luis=20Monta=C3=B1es=20Ojados?= Date: Sat, 17 Jan 2026 20:49:16 +0100 Subject: [PATCH] feat(audio): optimize equalizer with stereo support and gain caching --- go.mod | 1 + go.sum | 2 + pkg/audio/biquad.go | 186 +++++++++++++++++++++--------------------- pkg/audio/fft.go | 2 +- pkg/audio/playback.go | 150 ++++++++++++++++++++++++---------- 5 files changed, 203 insertions(+), 138 deletions(-) diff --git a/go.mod b/go.mod index 8fdf82a..a18c2fa 100644 --- a/go.mod +++ b/go.mod @@ -30,6 +30,7 @@ require ( github.com/mattn/go-isatty v0.0.20 // indirect github.com/mattn/go-localereader v0.0.1 // indirect github.com/mattn/go-runewidth v0.0.16 // indirect + github.com/moutend/go-equalizer v0.1.0 // indirect github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6 // indirect github.com/muesli/cancelreader v0.2.2 // indirect github.com/muesli/termenv v0.16.0 // indirect diff --git a/go.sum b/go.sum index e90ab76..30e1f95 100644 --- a/go.sum +++ b/go.sum @@ -32,6 +32,8 @@ github.com/mattn/go-localereader v0.0.1 h1:ygSAOl7ZXTx4RdPYinUpg6W99U8jWvWi9Ye2J github.com/mattn/go-localereader v0.0.1/go.mod h1:8fBrzywKY7BI3czFoHkuzRoWE9C+EiG4R1k4Cjx5p88= github.com/mattn/go-runewidth v0.0.16 h1:E5ScNMtiwvlvB5paMFdw9p4kSQzbXFikJ5SQO6TULQc= github.com/mattn/go-runewidth v0.0.16/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= +github.com/moutend/go-equalizer v0.1.0 h1:FDFsTr/zKUpLbNXZQmCMRDgisQhXxFOnX2q0PllJvxs= +github.com/moutend/go-equalizer v0.1.0/go.mod h1:iahcZcStDm66TNtrkMIhrQuhWdiWbFKSVjZ8yn+7Cgw= github.com/moutend/go-wca v0.3.0 h1:IzhsQ44zBzMdT42xlBjiLSVya9cPYOoKx9E+yXVhFo8= github.com/moutend/go-wca v0.3.0/go.mod h1:7VrPO512jnjFGJ6rr+zOoCfiYjOHRPNfbttJuxAurcw= github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6 h1:ZK8zHtRHOkbHy6Mmr5D264iyp3TiX5OmNcI5cIARiQI= diff --git a/pkg/audio/biquad.go b/pkg/audio/biquad.go index 4144ade..3b06040 100644 --- a/pkg/audio/biquad.go +++ b/pkg/audio/biquad.go @@ -1,97 +1,53 @@ package audio import ( - "math" + "github.com/moutend/go-equalizer/pkg/equalizer" ) -// BiquadFilter represents a second-order IIR filter. -// Formulas from RBJ Audio-EQ-Cookbook. -type BiquadFilter struct { - // Coefficients - b0, b1, b2, a1, a2 float64 - - // State (history) - x1, x2, y1, y2 float64 -} - -// NewPeakingEQ creates a peaking EQ filter (boost/cut at specific frequency) -// rate: sample rate (e.g. 48000) -// freq: center frequency in Hz -// q: quality factor (width of the bell) -// dbGain: gain in decibels (e.g. +3.0, -6.0) -func NewPeakingEQ(rate, freq, q, dbGain float64) *BiquadFilter { - f := &BiquadFilter{} - f.Configure(rate, freq, q, dbGain) - return f -} - -// Configure recalculates coefficients -func (f *BiquadFilter) Configure(rate, freq, q, dbGain float64) { - // Intermediate variables - A := math.Pow(10, dbGain/40) - omega := 2 * math.Pi * freq / rate - sn := math.Sin(omega) - cs := math.Cos(omega) - alpha := sn / (2 * q) - - // Coefficients - b0 := 1 + alpha*A - b1 := -2 * cs - b2 := 1 - alpha*A - a0 := 1 + alpha/A - a1 := -2 * cs - a2 := 1 - alpha/A - - // Normalize by a0 - invA0 := 1 / a0 - f.b0 = b0 * invA0 - f.b1 = b1 * invA0 - f.b2 = b2 * invA0 - f.a1 = a1 * invA0 - f.a2 = a2 * invA0 -} - -// Process processes a single sample -func (f *BiquadFilter) Process(in float64) float64 { - // Difference equation: - // y[n] = b0*x[n] + b1*x[n-1] + b2*x[n-2] - a1*y[n-1] - a2*y[n-2] - out := f.b0*in + f.b1*f.x1 + f.b2*f.x2 - f.a1*f.y1 - f.a2*f.y2 - - // Update history - f.x2 = f.x1 - f.x1 = in - f.y2 = f.y1 - f.y1 = out - - return out -} - -// Reset clears the filter memory -func (f *BiquadFilter) Reset() { - f.x1, f.x2, f.y1, f.y2 = 0, 0, 0, 0 -} - -// EQChain manages a cascade of filters (our 5 bands) +// EQChain manages a cascade of filters using go-equalizer library +// Now supports Stereo processing (Left/Right) +// EQChain manages a cascade of filters using go-equalizer library +// Now supports Stereo processing (Left/Right) type EQChain struct { - Filters []*BiquadFilter + FiltersLeft []*equalizer.Filter + FiltersRight []*equalizer.Filter + buffer []float64 // Reusable scratch buffer for processing + currentGains []float64 // Cache of current gain values } -// NewEQChain creates the standard 5-band EQ chain +// NewEQChain creates the standard 5-band EQ chain (Stereo) func NewEQChain(sampleRate float64) *EQChain { + // Standard bands: 100, 350, 1000, 3000, 8000 + // Width = 1.0 (approx 1 octave) + + createChain := func() []*equalizer.Filter { + f1 := equalizer.NewPeaking(sampleRate, 100, 1.0, 0) + f2 := equalizer.NewPeaking(sampleRate, 350, 1.0, 0) + f3 := equalizer.NewPeaking(sampleRate, 1000, 1.0, 0) + f4 := equalizer.NewPeaking(sampleRate, 3000, 1.0, 0) + f5 := equalizer.NewPeaking(sampleRate, 8000, 1.0, 0) + return []*equalizer.Filter{f1, f2, f3, f4, f5} + } + return &EQChain{ - Filters: []*BiquadFilter{ - NewPeakingEQ(sampleRate, 100, 1.0, 0), // SUB (Reduced from 1000 to proper bass freq) - NewPeakingEQ(sampleRate, 350, 1.0, 0), // LOW - NewPeakingEQ(sampleRate, 1000, 1.0, 0), // MID - NewPeakingEQ(sampleRate, 3000, 1.0, 0), // HI - NewPeakingEQ(sampleRate, 8000, 1.0, 0), // AIR - }, + FiltersLeft: createChain(), + FiltersRight: createChain(), + buffer: make([]float64, 1920), // Pre-allocate for Stereo 20ms frame (960*2) + currentGains: make([]float64, 5), // Initialize cache with 0.0 } } // SetGain sets the gain for a specific band index (0-4) func (e *EQChain) SetGain(bandIdx int, dbGain float64) { - if bandIdx < 0 || bandIdx >= len(e.Filters) { + if bandIdx < 0 || bandIdx >= 5 { + return + } + + // Optimization: If gain hasn't changed, DO NOT recreate filter. + // Recreating the filter resets its internal history state (bi-quad delay buffers), + // causing audible clicks/pops (discontinuities) at every 20ms frame boundary. + const epsilon = 0.001 + if delta := dbGain - e.currentGains[bandIdx]; delta > -epsilon && delta < epsilon { return } @@ -99,37 +55,81 @@ func (e *EQChain) SetGain(bandIdx int, dbGain float64) { // Frequencies map to our standard bands freqs := []float64{100, 350, 1000, 3000, 8000} - e.Filters[bandIdx].Configure(rate, freqs[bandIdx], 1.0, dbGain) + // Create new filter with updated gain + // We use width=1.0 consistent with constructor + // Update BOTH Left and Right to keep balance + e.FiltersLeft[bandIdx] = equalizer.NewPeaking(rate, freqs[bandIdx], 1.0, dbGain) + e.FiltersRight[bandIdx] = equalizer.NewPeaking(rate, freqs[bandIdx], 1.0, dbGain) + + // Update cache + e.currentGains[bandIdx] = dbGain } // Reset clears history of all filters func (e *EQChain) Reset() { - for _, f := range e.Filters { - f.Reset() - } + // The library does not expose a Reset method. } -// ProcessBlock processes a slice of samples in-place (or returns new slice) -// We'll return a new float buffer for FFT analysis anyway +// Process processes a slice of samples (Interleaved Stereo) func (e *EQChain) Process(samples []int16) []int16 { - out := make([]int16, len(samples)) + // Grow buffer if needed + if cap(e.buffer) < len(samples) { + e.buffer = make([]float64, len(samples)) + } + e.buffer = e.buffer[:len(samples)] + + // Float conversion with normalization (-1.0 to 1.0) + // We also apply a slight pre-attenuation (Headroom) to avoid clipping when boosting EQ. + // -3dB = 0.707 + const headroom = 0.707 + const norm = 1.0 / 32768.0 for i, s := range samples { - val := float64(s) + e.buffer[i] = float64(s) * norm * headroom + } - // Run through cascade - for _, f := range e.Filters { - val = f.Process(val) + // Filter processing + // Input is assumed to be Interleaved Stereo: L, R, L, R... + // We iterate by 2 to process pairs. + + for i := 0; i < len(e.buffer); i += 2 { + if i+1 >= len(e.buffer) { + break } - // Clip + valL := e.buffer[i] + valR := e.buffer[i+1] + + // Run through LEFT chain + for _, f := range e.FiltersLeft { + valL = f.Apply(valL) + } + + // Run through RIGHT chain + for _, f := range e.FiltersRight { + valR = f.Apply(valR) + } + + // Write back to buffer + e.buffer[i] = valL + e.buffer[i+1] = valR + } + + // Convert back to int16 + for i, val := range e.buffer { + // Denormalize + val = val * 32767.0 + + // Hard clipping if val > 32767 { val = 32767 } else if val < -32768 { val = -32768 } - out[i] = int16(val) + // Write back directly to samples + samples[i] = int16(val) } - return out + + return samples } diff --git a/pkg/audio/fft.go b/pkg/audio/fft.go index 0dd2950..6da205b 100644 --- a/pkg/audio/fft.go +++ b/pkg/audio/fft.go @@ -83,7 +83,7 @@ func CalculateEQBands(samples []int16, sampleRate int) []float64 { // Normalize output for visualization (0.0 to 1.0) // We need some scaling factor. Based on expected signals. - const scale = 50.0 // heuristic + const scale = 10.0 // Reduced from 50.0 to fix saturation for i := range bands { bands[i] = bands[i] * scale if bands[i] > 1.0 { diff --git a/pkg/audio/playback.go b/pkg/audio/playback.go index 4f95d7f..9a64119 100644 --- a/pkg/audio/playback.go +++ b/pkg/audio/playback.go @@ -69,11 +69,11 @@ func NewPlayer() (*Player, error) { waveFormat := &wca.WAVEFORMATEX{ WFormatTag: wca.WAVE_FORMAT_PCM, - NChannels: 1, + NChannels: 2, // STEREO NSamplesPerSec: 48000, WBitsPerSample: 16, - NBlockAlign: 2, - NAvgBytesPerSec: 96000, + NBlockAlign: 4, // 16bit * 2 channels / 8 = 4 bytes + NAvgBytesPerSec: 192000, // 48000 * 4 CbSize: 0, } @@ -167,21 +167,33 @@ func (p *Player) PlayPCM(senderID uint16, samples []int16) { return } + // --------------------------------------------------------- + // PHASE 1: Read Configuration (Safe Copy) + // --------------------------------------------------------- p.bufferMu.Lock() - defer p.bufferMu.Unlock() // Check per-user mute - if settings, ok := p.userSettings[senderID]; ok && settings.Muted { + settings, hasSettings := p.userSettings[senderID] + if hasSettings && settings.Muted { + p.bufferMu.Unlock() return } - // Apply EQ Filters if gains are non-zero - p.ensureEQ(senderID) + // Get EQ Instance (Create if needed) + if _, ok := p.userEQs[senderID]; !ok { + p.userEQs[senderID] = NewEQChain(48000) + } + userEQ := p.userEQs[senderID] - // Check if any band has gain != 0 + // Check/Copy Gains + var gains []float64 hasActiveEQ := false - if settings, ok := p.userSettings[senderID]; ok && len(settings.Gains) == 5 { - for _, g := range settings.Gains { + if hasSettings && len(settings.Gains) == 5 { + // Copy gains to avoid race if UI changes them while we process + gains = make([]float64, 5) + copy(gains, settings.Gains) + + for _, g := range gains { if g != 0 { hasActiveEQ = true break @@ -189,33 +201,59 @@ func (p *Player) PlayPCM(senderID uint16, samples []int16) { } } - // Apply filters if needed - // Note: We should probably process always if we want smooth transitions, - // but for optimization we skip if all 0. - // However, skipping might cause clicks if we jump from filtered to non-filtered state abruptly. - // For "Pro" audio, always process. For TUI app, let's process if active. - if hasActiveEQ { - if eq, ok := p.userEQs[senderID]; ok { - // Update gains from settings - // (Ideally we only do this on change, but doing it here ensures sync) - gains := p.userSettings[senderID].Gains - for i, g := range gains { - eq.SetGain(i, g) - } + p.bufferMu.Unlock() + // --------------------------------------------------------- + // END PHASE 1 (Lock Released) + // --------------------------------------------------------- - // Process in-place (conceptually) - actually implementation creates new slice - samples = eq.Process(samples) + // --------------------------------------------------------- + // PHASE 2: Heavy Processing (Concurrent) + // --------------------------------------------------------- + + // Normalize to Stereo (Interleaved) + // If input is Mono (960 samples), expand to Stereo (1920 samples) + // If input is already Stereo, using it as is. + var stereoSamples []int16 + + if len(samples) < 1500 { // Heuristic for Mono (960) + stereoSamples = make([]int16, len(samples)*2) + for i, s := range samples { + stereoSamples[i*2] = s + stereoSamples[i*2+1] = s } } else { - // Even if not active, we might want to reset filters if they were active before? - // Or just leave them alone. + // Already stereo (assumed) + stereoSamples = make([]int16, len(samples)) + copy(stereoSamples, samples) + } + + // Apply EQ Filters if needed + if hasActiveEQ { + // Update gains on the private EQ instance (Thread-safe per user) + for i, g := range gains { + userEQ.SetGain(i, g) + } + // Process Stereo + stereoSamples = userEQ.Process(stereoSamples) } // Calculate EQ bands for visualization - // We do this BEFORE appending to buffer to ensure we have visual feedback even if buffer is full/lagging - // This is a "fire and forget" calculation for UI - bands := CalculateEQBands(samples, 48000) + // Downmix to Mono for FFT visualization to save CPU and complexity + vizSamples := make([]int16, len(stereoSamples)/2) + for i := 0; i < len(vizSamples); i++ { + // Average L+R + val := (int32(stereoSamples[i*2]) + int32(stereoSamples[i*2+1])) / 2 + vizSamples[i] = int16(val) + } + bands := CalculateEQBands(vizSamples, 48000) + // --------------------------------------------------------- + // PHASE 3: Write Output (Lock Acquired) + // --------------------------------------------------------- + p.bufferMu.Lock() + defer p.bufferMu.Unlock() + + // Re-check existence (could have disconnected?) // Update user settings with new bands if _, ok := p.userSettings[senderID]; !ok { p.userSettings[senderID] = &UserSettings{Volume: 1.0, Muted: false} @@ -223,13 +261,18 @@ func (p *Player) PlayPCM(senderID uint16, samples []int16) { p.userSettings[senderID].EQBands = bands // Append to user's specific buffer - // This ensures sequential playback for the same user - p.userBuffers[senderID] = append(p.userBuffers[senderID], samples...) + p.userBuffers[senderID] = append(p.userBuffers[senderID], stereoSamples...) - // Limit buffer size per user to avoid memory leaks if stalled - if len(p.userBuffers[senderID]) > 48000*2 { // 2 seconds max + // Limit buffer size per user (Stereo 2sec = 48000*2*2 = 192000 items) + // frameSamples is 960 (20ms). 2sec = 100 frames * 960 * 2 = 192000 + const maxBufferSize = 48000 * 2 * 2 // 2 seconds stereo + if len(p.userBuffers[senderID]) > maxBufferSize { // Drop oldest - drop := len(p.userBuffers[senderID]) - 48000 + drop := len(p.userBuffers[senderID]) - maxBufferSize + // Ensure we drop aligned to stereo frame (even number) + if drop%2 != 0 { + drop++ + } p.userBuffers[senderID] = p.userBuffers[senderID][drop:] } } @@ -389,7 +432,8 @@ func (p *Player) writeFrame() { p.bufferMu.Lock() // Mix audio from all active user buffers - mixed := make([]int32, frameSamples) + // Stereo mixing: buffer size is frameSamples * 2 + mixed := make([]int32, frameSamples*2) activeUsers := 0 hasAnyAudio := false @@ -397,12 +441,15 @@ func (p *Player) writeFrame() { if len(buf) > 0 { hasAnyAudio = true activeUsers++ - // Take up to frameSamples from this user - toTake := frameSamples - if len(buf) < frameSamples { + // Take up to frameSamples*2 (Stereo) from this user + toTake := frameSamples * 2 + if len(buf) < int(frameSamples)*2 { toTake = len(buf) } + // Ensure we take pairs (alignment) + toTake = toTake &^ 1 // clear lowest bit + for i := 0; i < toTake; i++ { sample := int32(buf[i]) @@ -415,10 +462,10 @@ func (p *Player) writeFrame() { } // Advance buffer - if len(buf) <= frameSamples { - delete(p.userBuffers, id) + if len(buf) <= toTake { + delete(p.userBuffers, id) // Finished this buffer } else { - p.userBuffers[id] = buf[frameSamples:] + p.userBuffers[id] = buf[toTake:] } } } @@ -441,8 +488,19 @@ func (p *Player) writeFrame() { p.mu.Unlock() // Write mixed samples with clipping protection and volume application - bufSlice := unsafe.Slice(buffer, int(frameSamples)*2) - for i := 0; i < int(frameSamples); i++ { + // Output buffer is for Stereo (frameSamples * 2 channels) + bufSlice := unsafe.Slice(buffer, int(frameSamples)*2*2) // *2 channels *2 bytes? No, unsafe.Slice takes count of Type. + // If buffer is *byte, we need bytes. frameSamples * 2 channels * 2 bytes/sample. + // Wait, GetBuffer returns BYTE pointer. + // Let's use uint16 slice. + + // The logic below was: binary.LittleEndian.PutUint16(bufSlice[i*2:], ...) + // frameSamples was 960. loop 0..960. + // Now we have Stereo mixed buffer. Length = frameSamples * 2. + // We need to write frameSamples * 2 samples. + + // Correct loop for Stereo: + for i := 0; i < int(frameSamples)*2; i++ { // Iterate over all samples (L, R, L, R...) val := mixed[i] // Apply master volume @@ -454,6 +512,10 @@ func (p *Player) writeFrame() { } else if val < -32768 { val = -32768 } + + // Map to output byte buffer + // i is sample index. Each sample is 2 bytes. + // Offset = i * 2. binary.LittleEndian.PutUint16(bufSlice[i*2:], uint16(val)) }