feat(audio): optimize equalizer with stereo support and gain caching

2026-01-17 20:49:16 +01:00
parent 711eb148df
commit be929ce55a
5 changed files with 203 additions and 138 deletions
--- a/go.mod
+++ b/go.mod
@@ -30,6 +30,7 @@ require (
 	github.com/mattn/go-isatty v0.0.20 // indirect
 	github.com/mattn/go-localereader v0.0.1 // indirect
 	github.com/mattn/go-runewidth v0.0.16 // indirect
 	github.com/moutend/go-equalizer v0.1.0 // indirect
 	github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6 // indirect
 	github.com/muesli/cancelreader v0.2.2 // indirect
 	github.com/muesli/termenv v0.16.0 // indirect
--- a/go.sum
+++ b/go.sum
@@ -32,6 +32,8 @@ github.com/mattn/go-localereader v0.0.1 h1:ygSAOl7ZXTx4RdPYinUpg6W99U8jWvWi9Ye2J
 github.com/mattn/go-localereader v0.0.1/go.mod h1:8fBrzywKY7BI3czFoHkuzRoWE9C+EiG4R1k4Cjx5p88=
 github.com/mattn/go-runewidth v0.0.16 h1:E5ScNMtiwvlvB5paMFdw9p4kSQzbXFikJ5SQO6TULQc=
 github.com/mattn/go-runewidth v0.0.16/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
 github.com/moutend/go-equalizer v0.1.0 h1:FDFsTr/zKUpLbNXZQmCMRDgisQhXxFOnX2q0PllJvxs=
 github.com/moutend/go-equalizer v0.1.0/go.mod h1:iahcZcStDm66TNtrkMIhrQuhWdiWbFKSVjZ8yn+7Cgw=
 github.com/moutend/go-wca v0.3.0 h1:IzhsQ44zBzMdT42xlBjiLSVya9cPYOoKx9E+yXVhFo8=
 github.com/moutend/go-wca v0.3.0/go.mod h1:7VrPO512jnjFGJ6rr+zOoCfiYjOHRPNfbttJuxAurcw=
 github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6 h1:ZK8zHtRHOkbHy6Mmr5D264iyp3TiX5OmNcI5cIARiQI=
--- a/pkg/audio/biquad.go
+++ b/pkg/audio/biquad.go
@@ -1,97 +1,53 @@
 package audio
 import (
-	"math"
+	"github.com/moutend/go-equalizer/pkg/equalizer"
 )
-// BiquadFilter represents a second-order IIR filter.
+// EQChain manages a cascade of filters using go-equalizer library
-// Formulas from RBJ Audio-EQ-Cookbook.
+// Now supports Stereo processing (Left/Right)
-type BiquadFilter struct {
+// EQChain manages a cascade of filters using go-equalizer library
-	// Coefficients
+// Now supports Stereo processing (Left/Right)
 	b0, b1, b2, a1, a2 float64
 	// State (history)
 	x1, x2, y1, y2 float64
 }
 // NewPeakingEQ creates a peaking EQ filter (boost/cut at specific frequency)
 // rate: sample rate (e.g. 48000)
 // freq: center frequency in Hz
 // q: quality factor (width of the bell)
 // dbGain: gain in decibels (e.g. +3.0, -6.0)
 func NewPeakingEQ(rate, freq, q, dbGain float64) *BiquadFilter {
 	f := &BiquadFilter{}
 	f.Configure(rate, freq, q, dbGain)
 	return f
 }
 // Configure recalculates coefficients
 func (f *BiquadFilter) Configure(rate, freq, q, dbGain float64) {
 	// Intermediate variables
 	A := math.Pow(10, dbGain/40)
 	omega := 2 * math.Pi * freq / rate
 	sn := math.Sin(omega)
 	cs := math.Cos(omega)
 	alpha := sn / (2 * q)
 	// Coefficients
 	b0 := 1 + alpha*A
 	b1 := -2 * cs
 	b2 := 1 - alpha*A
 	a0 := 1 + alpha/A
 	a1 := -2 * cs
 	a2 := 1 - alpha/A
 	// Normalize by a0
 	invA0 := 1 / a0
 	f.b0 = b0 * invA0
 	f.b1 = b1 * invA0
 	f.b2 = b2 * invA0
 	f.a1 = a1 * invA0
 	f.a2 = a2 * invA0
 }
 // Process processes a single sample
 func (f *BiquadFilter) Process(in float64) float64 {
 	// Difference equation:
 	// y[n] = b0*x[n] + b1*x[n-1] + b2*x[n-2] - a1*y[n-1] - a2*y[n-2]
 	out := f.b0*in + f.b1*f.x1 + f.b2*f.x2 - f.a1*f.y1 - f.a2*f.y2
 	// Update history
 	f.x2 = f.x1
 	f.x1 = in
 	f.y2 = f.y1
 	f.y1 = out
 	return out
 }
 // Reset clears the filter memory
 func (f *BiquadFilter) Reset() {
 	f.x1, f.x2, f.y1, f.y2 = 0, 0, 0, 0
 }
 // EQChain manages a cascade of filters (our 5 bands)
 type EQChain struct {
-	Filters []*BiquadFilter
+	FiltersLeft  []*equalizer.Filter
 	FiltersRight []*equalizer.Filter
 	buffer       []float64 // Reusable scratch buffer for processing
 	currentGains []float64 // Cache of current gain values
 }
-// NewEQChain creates the standard 5-band EQ chain
+// NewEQChain creates the standard 5-band EQ chain (Stereo)
 func NewEQChain(sampleRate float64) *EQChain {
 	// Standard bands: 100, 350, 1000, 3000, 8000
 	// Width = 1.0 (approx 1 octave)
 	createChain := func() []*equalizer.Filter {
 		f1 := equalizer.NewPeaking(sampleRate, 100, 1.0, 0)
 		f2 := equalizer.NewPeaking(sampleRate, 350, 1.0, 0)
 		f3 := equalizer.NewPeaking(sampleRate, 1000, 1.0, 0)
 		f4 := equalizer.NewPeaking(sampleRate, 3000, 1.0, 0)
 		f5 := equalizer.NewPeaking(sampleRate, 8000, 1.0, 0)
 		return []*equalizer.Filter{f1, f2, f3, f4, f5}
 	}
 	return &EQChain{
-		Filters: []*BiquadFilter{
+		FiltersLeft:  createChain(),
-			NewPeakingEQ(sampleRate, 100, 1.0, 0),  // SUB (Reduced from 1000 to proper bass freq)
+		FiltersRight: createChain(),
-			NewPeakingEQ(sampleRate, 350, 1.0, 0),  // LOW
+		buffer:       make([]float64, 1920), // Pre-allocate for Stereo 20ms frame (960*2)
-			NewPeakingEQ(sampleRate, 1000, 1.0, 0), // MID
+		currentGains: make([]float64, 5),    // Initialize cache with 0.0
 			NewPeakingEQ(sampleRate, 3000, 1.0, 0), // HI
 			NewPeakingEQ(sampleRate, 8000, 1.0, 0), // AIR
 		},
 	}
 }
 // SetGain sets the gain for a specific band index (0-4)
 func (e *EQChain) SetGain(bandIdx int, dbGain float64) {
-	if bandIdx < 0 || bandIdx >= len(e.Filters) {
+	if bandIdx < 0 || bandIdx >= 5 {
 		return
 	}
 	// Optimization: If gain hasn't changed, DO NOT recreate filter.
 	// Recreating the filter resets its internal history state (bi-quad delay buffers),
 	// causing audible clicks/pops (discontinuities) at every 20ms frame boundary.
 	const epsilon = 0.001
 	if delta := dbGain - e.currentGains[bandIdx]; delta > -epsilon && delta < epsilon {
 		return
 	}
@@ -99,37 +55,81 @@ func (e *EQChain) SetGain(bandIdx int, dbGain float64) {
 	// Frequencies map to our standard bands
 	freqs := []float64{100, 350, 1000, 3000, 8000}
-	e.Filters[bandIdx].Configure(rate, freqs[bandIdx], 1.0, dbGain)
+	// Create new filter with updated gain
 	// We use width=1.0 consistent with constructor
 	// Update BOTH Left and Right to keep balance
 	e.FiltersLeft[bandIdx] = equalizer.NewPeaking(rate, freqs[bandIdx], 1.0, dbGain)
 	e.FiltersRight[bandIdx] = equalizer.NewPeaking(rate, freqs[bandIdx], 1.0, dbGain)
 	// Update cache
 	e.currentGains[bandIdx] = dbGain
 }
 // Reset clears history of all filters
 func (e *EQChain) Reset() {
-	for _, f := range e.Filters {
+	// The library does not expose a Reset method.
 		f.Reset()
 	}
 }
-// ProcessBlock processes a slice of samples in-place (or returns new slice)
+// Process processes a slice of samples (Interleaved Stereo)
 // We'll return a new float buffer for FFT analysis anyway
 func (e *EQChain) Process(samples []int16) []int16 {
-	out := make([]int16, len(samples))
+	// Grow buffer if needed
 	if cap(e.buffer) < len(samples) {
 		e.buffer = make([]float64, len(samples))
 	}
 	e.buffer = e.buffer[:len(samples)]
 	// Float conversion with normalization (-1.0 to 1.0)
 	// We also apply a slight pre-attenuation (Headroom) to avoid clipping when boosting EQ.
 	// -3dB = 0.707
 	const headroom = 0.707
 	const norm = 1.0 / 32768.0
 	for i, s := range samples {
-		val := float64(s)
+		e.buffer[i] = float64(s) * norm * headroom
 	}
-		// Run through cascade
+	// Filter processing
-		for _, f := range e.Filters {
+	// Input is assumed to be Interleaved Stereo: L, R, L, R...
-			val = f.Process(val)
+	// We iterate by 2 to process pairs.
 	for i := 0; i < len(e.buffer); i += 2 {
 		if i+1 >= len(e.buffer) {
 			break
 		}
-		// Clip
+		valL := e.buffer[i]
 		valR := e.buffer[i+1]
 		// Run through LEFT chain
 		for _, f := range e.FiltersLeft {
 			valL = f.Apply(valL)
 		}
 		// Run through RIGHT chain
 		for _, f := range e.FiltersRight {
 			valR = f.Apply(valR)
 		}
 		// Write back to buffer
 		e.buffer[i] = valL
 		e.buffer[i+1] = valR
 	}
 	// Convert back to int16
 	for i, val := range e.buffer {
 		// Denormalize
 		val = val * 32767.0
 		// Hard clipping
 		if val > 32767 {
 			val = 32767
 		} else if val < -32768 {
 			val = -32768
 		}
-		out[i] = int16(val)
+		// Write back directly to samples
 		samples[i] = int16(val)
 	}
-	return out
+
 	return samples
 }
--- a/pkg/audio/fft.go
+++ b/pkg/audio/fft.go
@@ -83,7 +83,7 @@ func CalculateEQBands(samples []int16, sampleRate int) []float64 {
 	// Normalize output for visualization (0.0 to 1.0)
 	// We need some scaling factor. Based on expected signals.
-	const scale = 50.0 // heuristic
+	const scale = 10.0 // Reduced from 50.0 to fix saturation
 	for i := range bands {
 		bands[i] = bands[i] * scale
 		if bands[i] > 1.0 {
--- a/pkg/audio/playback.go
+++ b/pkg/audio/playback.go
@@ -69,11 +69,11 @@ func NewPlayer() (*Player, error) {
 	waveFormat := &wca.WAVEFORMATEX{
 		WFormatTag:      wca.WAVE_FORMAT_PCM,
-		NChannels:       1,
+		NChannels:       2, // STEREO
 		NSamplesPerSec:  48000,
 		WBitsPerSample:  16,
-		NBlockAlign:     2,
+		NBlockAlign:     4,      // 16bit * 2 channels / 8 = 4 bytes
-		NAvgBytesPerSec: 96000,
+		NAvgBytesPerSec: 192000, // 48000 * 4
 		CbSize:          0,
 	}
@@ -167,21 +167,33 @@ func (p *Player) PlayPCM(senderID uint16, samples []int16) {
 		return
 	}
 	// ---------------------------------------------------------
 	// PHASE 1: Read Configuration (Safe Copy)
 	// ---------------------------------------------------------
 	p.bufferMu.Lock()
 	defer p.bufferMu.Unlock()
 	// Check per-user mute
-	if settings, ok := p.userSettings[senderID]; ok && settings.Muted {
+	settings, hasSettings := p.userSettings[senderID]
 	if hasSettings && settings.Muted {
 		p.bufferMu.Unlock()
 		return
 	}
-	// Apply EQ Filters if gains are non-zero
+	// Get EQ Instance (Create if needed)
-	p.ensureEQ(senderID)
+	if _, ok := p.userEQs[senderID]; !ok {
 		p.userEQs[senderID] = NewEQChain(48000)
 	}
 	userEQ := p.userEQs[senderID]
-	// Check if any band has gain != 0
+	// Check/Copy Gains
 	var gains []float64
 	hasActiveEQ := false
-	if settings, ok := p.userSettings[senderID]; ok && len(settings.Gains) == 5 {
+	if hasSettings && len(settings.Gains) == 5 {
-		for _, g := range settings.Gains {
+		// Copy gains to avoid race if UI changes them while we process
 		gains = make([]float64, 5)
 		copy(gains, settings.Gains)
 		for _, g := range gains {
 			if g != 0 {
 				hasActiveEQ = true
 				break
@@ -189,33 +201,59 @@ func (p *Player) PlayPCM(senderID uint16, samples []int16) {
 		}
 	}
-	// Apply filters if needed
+	p.bufferMu.Unlock()
-	// Note: We should probably process always if we want smooth transitions,
+	// ---------------------------------------------------------
-	// but for optimization we skip if all 0.
+	// END PHASE 1 (Lock Released)
-	// However, skipping might cause clicks if we jump from filtered to non-filtered state abruptly.
+	// ---------------------------------------------------------
 	// For "Pro" audio, always process. For TUI app, let's process if active.
 	if hasActiveEQ {
 		if eq, ok := p.userEQs[senderID]; ok {
 			// Update gains from settings
 			// (Ideally we only do this on change, but doing it here ensures sync)
 			gains := p.userSettings[senderID].Gains
 			for i, g := range gains {
 				eq.SetGain(i, g)
 			}
-			// Process in-place (conceptually) - actually implementation creates new slice
+	// ---------------------------------------------------------
-			samples = eq.Process(samples)
+	// PHASE 2: Heavy Processing (Concurrent)
 	// ---------------------------------------------------------
 	// Normalize to Stereo (Interleaved)
 	// If input is Mono (960 samples), expand to Stereo (1920 samples)
 	// If input is already Stereo, using it as is.
 	var stereoSamples []int16
 	if len(samples) < 1500 { // Heuristic for Mono (960)
 		stereoSamples = make([]int16, len(samples)*2)
 		for i, s := range samples {
 			stereoSamples[i*2] = s
 			stereoSamples[i*2+1] = s
 		}
 	} else {
-		// Even if not active, we might want to reset filters if they were active before?
+		// Already stereo (assumed)
-		// Or just leave them alone.
+		stereoSamples = make([]int16, len(samples))
 		copy(stereoSamples, samples)
 	}
 	// Apply EQ Filters if needed
 	if hasActiveEQ {
 		// Update gains on the private EQ instance (Thread-safe per user)
 		for i, g := range gains {
 			userEQ.SetGain(i, g)
 		}
 		// Process Stereo
 		stereoSamples = userEQ.Process(stereoSamples)
 	}
 	// Calculate EQ bands for visualization
-	// We do this BEFORE appending to buffer to ensure we have visual feedback even if buffer is full/lagging
+	// Downmix to Mono for FFT visualization to save CPU and complexity
-	// This is a "fire and forget" calculation for UI
+	vizSamples := make([]int16, len(stereoSamples)/2)
-	bands := CalculateEQBands(samples, 48000)
+	for i := 0; i < len(vizSamples); i++ {
 		// Average L+R
 		val := (int32(stereoSamples[i*2]) + int32(stereoSamples[i*2+1])) / 2
 		vizSamples[i] = int16(val)
 	}
 	bands := CalculateEQBands(vizSamples, 48000)
 	// ---------------------------------------------------------
 	// PHASE 3: Write Output (Lock Acquired)
 	// ---------------------------------------------------------
 	p.bufferMu.Lock()
 	defer p.bufferMu.Unlock()
 	// Re-check existence (could have disconnected?)
 	// Update user settings with new bands
 	if _, ok := p.userSettings[senderID]; !ok {
 		p.userSettings[senderID] = &UserSettings{Volume: 1.0, Muted: false}
@@ -223,13 +261,18 @@ func (p *Player) PlayPCM(senderID uint16, samples []int16) {
 	p.userSettings[senderID].EQBands = bands
 	// Append to user's specific buffer
-	// This ensures sequential playback for the same user
+	p.userBuffers[senderID] = append(p.userBuffers[senderID], stereoSamples...)
 	p.userBuffers[senderID] = append(p.userBuffers[senderID], samples...)
-	// Limit buffer size per user to avoid memory leaks if stalled
+	// Limit buffer size per user (Stereo 2sec = 48000*2*2 = 192000 items)
-	if len(p.userBuffers[senderID]) > 48000*2 { // 2 seconds max
+	// frameSamples is 960 (20ms). 2sec = 100 frames * 960 * 2 = 192000
 	const maxBufferSize = 48000 * 2 * 2 // 2 seconds stereo
 	if len(p.userBuffers[senderID]) > maxBufferSize {
 		// Drop oldest
-		drop := len(p.userBuffers[senderID]) - 48000
+		drop := len(p.userBuffers[senderID]) - maxBufferSize
 		// Ensure we drop aligned to stereo frame (even number)
 		if drop%2 != 0 {
 			drop++
 		}
 		p.userBuffers[senderID] = p.userBuffers[senderID][drop:]
 	}
 }
@@ -389,7 +432,8 @@ func (p *Player) writeFrame() {
 		p.bufferMu.Lock()
 		// Mix audio from all active user buffers
-		mixed := make([]int32, frameSamples)
+		// Stereo mixing: buffer size is frameSamples * 2
 		mixed := make([]int32, frameSamples*2)
 		activeUsers := 0
 		hasAnyAudio := false
@@ -397,12 +441,15 @@ func (p *Player) writeFrame() {
 			if len(buf) > 0 {
 				hasAnyAudio = true
 				activeUsers++
-				// Take up to frameSamples from this user
+				// Take up to frameSamples*2 (Stereo) from this user
-				toTake := frameSamples
+				toTake := frameSamples * 2
-				if len(buf) < frameSamples {
+				if len(buf) < int(frameSamples)*2 {
 					toTake = len(buf)
 				}
 				// Ensure we take pairs (alignment)
 				toTake = toTake &^ 1 // clear lowest bit
 				for i := 0; i < toTake; i++ {
 					sample := int32(buf[i])
@@ -415,10 +462,10 @@ func (p *Player) writeFrame() {
 				}
 				// Advance buffer
-				if len(buf) <= frameSamples {
+				if len(buf) <= toTake {
-					delete(p.userBuffers, id)
+					delete(p.userBuffers, id) // Finished this buffer
 				} else {
-					p.userBuffers[id] = buf[frameSamples:]
+					p.userBuffers[id] = buf[toTake:]
 				}
 			}
 		}
@@ -441,8 +488,19 @@ func (p *Player) writeFrame() {
 		p.mu.Unlock()
 		// Write mixed samples with clipping protection and volume application
-		bufSlice := unsafe.Slice(buffer, int(frameSamples)*2)
+		// Output buffer is for Stereo (frameSamples * 2 channels)
-		for i := 0; i < int(frameSamples); i++ {
+		bufSlice := unsafe.Slice(buffer, int(frameSamples)*2*2) // *2 channels *2 bytes? No, unsafe.Slice takes count of Type.
 		// If buffer is *byte, we need bytes. frameSamples * 2 channels * 2 bytes/sample.
 		// Wait, GetBuffer returns BYTE pointer.
 		// Let's use uint16 slice.
 		// The logic below was: binary.LittleEndian.PutUint16(bufSlice[i*2:], ...)
 		// frameSamples was 960. loop 0..960.
 		// Now we have Stereo mixed buffer. Length = frameSamples * 2.
 		// We need to write frameSamples * 2 samples.
 		// Correct loop for Stereo:
 		for i := 0; i < int(frameSamples)*2; i++ { // Iterate over all samples (L, R, L, R...)
 			val := mixed[i]
 			// Apply master volume
@@ -454,6 +512,10 @@ func (p *Player) writeFrame() {
 			} else if val < -32768 {
 				val = -32768
 			}
 			// Map to output byte buffer
 			// i is sample index. Each sample is 2 bytes.
 			// Offset = i * 2.
 			binary.LittleEndian.PutUint16(bufSlice[i*2:], uint16(val))
 		}