feat(audio): optimize equalizer with stereo support and gain caching

2026-01-17 20:49:16 +01:00
parent 711eb148df
commit be929ce55a
5 changed files with 203 additions and 138 deletions
--- a/pkg/audio/playback.go
+++ b/pkg/audio/playback.go
@@ -69,11 +69,11 @@ func NewPlayer() (*Player, error) {

 	waveFormat := &wca.WAVEFORMATEX{
 		WFormatTag:      wca.WAVE_FORMAT_PCM,
-		NChannels:       1,
+		NChannels:       2, // STEREO
 		NSamplesPerSec:  48000,
 		WBitsPerSample:  16,
-		NBlockAlign:     2,
-		NAvgBytesPerSec: 96000,
+		NBlockAlign:     4,      // 16bit * 2 channels / 8 = 4 bytes
+		NAvgBytesPerSec: 192000, // 48000 * 4
 		CbSize:          0,
 	}

@@ -167,21 +167,33 @@ func (p *Player) PlayPCM(senderID uint16, samples []int16) {
 		return
 	}

+	// ---------------------------------------------------------
+	// PHASE 1: Read Configuration (Safe Copy)
+	// ---------------------------------------------------------
 	p.bufferMu.Lock()
-	defer p.bufferMu.Unlock()

 	// Check per-user mute
-	if settings, ok := p.userSettings[senderID]; ok && settings.Muted {
+	settings, hasSettings := p.userSettings[senderID]
+	if hasSettings && settings.Muted {
+		p.bufferMu.Unlock()
 		return
 	}

-	// Apply EQ Filters if gains are non-zero
-	p.ensureEQ(senderID)
+	// Get EQ Instance (Create if needed)
+	if _, ok := p.userEQs[senderID]; !ok {
+		p.userEQs[senderID] = NewEQChain(48000)
+	}
+	userEQ := p.userEQs[senderID]

-	// Check if any band has gain != 0
+	// Check/Copy Gains
+	var gains []float64
 	hasActiveEQ := false
-	if settings, ok := p.userSettings[senderID]; ok && len(settings.Gains) == 5 {
-		for _, g := range settings.Gains {
+	if hasSettings && len(settings.Gains) == 5 {
+		// Copy gains to avoid race if UI changes them while we process
+		gains = make([]float64, 5)
+		copy(gains, settings.Gains)
+
+		for _, g := range gains {
 			if g != 0 {
 				hasActiveEQ = true
 				break
@@ -189,33 +201,59 @@ func (p *Player) PlayPCM(senderID uint16, samples []int16) {
 		}
 	}

-	// Apply filters if needed
-	// Note: We should probably process always if we want smooth transitions,
-	// but for optimization we skip if all 0.
-	// However, skipping might cause clicks if we jump from filtered to non-filtered state abruptly.
-	// For "Pro" audio, always process. For TUI app, let's process if active.
-	if hasActiveEQ {
-		if eq, ok := p.userEQs[senderID]; ok {
-			// Update gains from settings
-			// (Ideally we only do this on change, but doing it here ensures sync)
-			gains := p.userSettings[senderID].Gains
-			for i, g := range gains {
-				eq.SetGain(i, g)
-			}
+	p.bufferMu.Unlock()
+	// ---------------------------------------------------------
+	// END PHASE 1 (Lock Released)
+	// ---------------------------------------------------------

-			// Process in-place (conceptually) - actually implementation creates new slice
-			samples = eq.Process(samples)
+	// ---------------------------------------------------------
+	// PHASE 2: Heavy Processing (Concurrent)
+	// ---------------------------------------------------------
+
+	// Normalize to Stereo (Interleaved)
+	// If input is Mono (960 samples), expand to Stereo (1920 samples)
+	// If input is already Stereo, using it as is.
+	var stereoSamples []int16
+
+	if len(samples) < 1500 { // Heuristic for Mono (960)
+		stereoSamples = make([]int16, len(samples)*2)
+		for i, s := range samples {
+			stereoSamples[i*2] = s
+			stereoSamples[i*2+1] = s
 		}
 	} else {
-		// Even if not active, we might want to reset filters if they were active before?
-		// Or just leave them alone.
+		// Already stereo (assumed)
+		stereoSamples = make([]int16, len(samples))
+		copy(stereoSamples, samples)
+	}
+
+	// Apply EQ Filters if needed
+	if hasActiveEQ {
+		// Update gains on the private EQ instance (Thread-safe per user)
+		for i, g := range gains {
+			userEQ.SetGain(i, g)
+		}
+		// Process Stereo
+		stereoSamples = userEQ.Process(stereoSamples)
 	}

 	// Calculate EQ bands for visualization
-	// We do this BEFORE appending to buffer to ensure we have visual feedback even if buffer is full/lagging
-	// This is a "fire and forget" calculation for UI
-	bands := CalculateEQBands(samples, 48000)
+	// Downmix to Mono for FFT visualization to save CPU and complexity
+	vizSamples := make([]int16, len(stereoSamples)/2)
+	for i := 0; i < len(vizSamples); i++ {
+		// Average L+R
+		val := (int32(stereoSamples[i*2]) + int32(stereoSamples[i*2+1])) / 2
+		vizSamples[i] = int16(val)
+	}
+	bands := CalculateEQBands(vizSamples, 48000)

+	// ---------------------------------------------------------
+	// PHASE 3: Write Output (Lock Acquired)
+	// ---------------------------------------------------------
+	p.bufferMu.Lock()
+	defer p.bufferMu.Unlock()
+
+	// Re-check existence (could have disconnected?)
 	// Update user settings with new bands
 	if _, ok := p.userSettings[senderID]; !ok {
 		p.userSettings[senderID] = &UserSettings{Volume: 1.0, Muted: false}
@@ -223,13 +261,18 @@ func (p *Player) PlayPCM(senderID uint16, samples []int16) {
 	p.userSettings[senderID].EQBands = bands

 	// Append to user's specific buffer
-	// This ensures sequential playback for the same user
-	p.userBuffers[senderID] = append(p.userBuffers[senderID], samples...)
+	p.userBuffers[senderID] = append(p.userBuffers[senderID], stereoSamples...)

-	// Limit buffer size per user to avoid memory leaks if stalled
-	if len(p.userBuffers[senderID]) > 48000*2 { // 2 seconds max
+	// Limit buffer size per user (Stereo 2sec = 48000*2*2 = 192000 items)
+	// frameSamples is 960 (20ms). 2sec = 100 frames * 960 * 2 = 192000
+	const maxBufferSize = 48000 * 2 * 2 // 2 seconds stereo
+	if len(p.userBuffers[senderID]) > maxBufferSize {
 		// Drop oldest
-		drop := len(p.userBuffers[senderID]) - 48000
+		drop := len(p.userBuffers[senderID]) - maxBufferSize
+		// Ensure we drop aligned to stereo frame (even number)
+		if drop%2 != 0 {
+			drop++
+		}
 		p.userBuffers[senderID] = p.userBuffers[senderID][drop:]
 	}
 }
@@ -389,7 +432,8 @@ func (p *Player) writeFrame() {
 		p.bufferMu.Lock()

 		// Mix audio from all active user buffers
-		mixed := make([]int32, frameSamples)
+		// Stereo mixing: buffer size is frameSamples * 2
+		mixed := make([]int32, frameSamples*2)
 		activeUsers := 0
 		hasAnyAudio := false

@@ -397,12 +441,15 @@ func (p *Player) writeFrame() {
 			if len(buf) > 0 {
 				hasAnyAudio = true
 				activeUsers++
-				// Take up to frameSamples from this user
-				toTake := frameSamples
-				if len(buf) < frameSamples {
+				// Take up to frameSamples*2 (Stereo) from this user
+				toTake := frameSamples * 2
+				if len(buf) < int(frameSamples)*2 {
 					toTake = len(buf)
 				}

+				// Ensure we take pairs (alignment)
+				toTake = toTake &^ 1 // clear lowest bit
+
 				for i := 0; i < toTake; i++ {
 					sample := int32(buf[i])

@@ -415,10 +462,10 @@ func (p *Player) writeFrame() {
 				}

 				// Advance buffer
-				if len(buf) <= frameSamples {
-					delete(p.userBuffers, id)
+				if len(buf) <= toTake {
+					delete(p.userBuffers, id) // Finished this buffer
 				} else {
-					p.userBuffers[id] = buf[frameSamples:]
+					p.userBuffers[id] = buf[toTake:]
 				}
 			}
 		}
@@ -441,8 +488,19 @@ func (p *Player) writeFrame() {
 		p.mu.Unlock()

 		// Write mixed samples with clipping protection and volume application
-		bufSlice := unsafe.Slice(buffer, int(frameSamples)*2)
-		for i := 0; i < int(frameSamples); i++ {
+		// Output buffer is for Stereo (frameSamples * 2 channels)
+		bufSlice := unsafe.Slice(buffer, int(frameSamples)*2*2) // *2 channels *2 bytes? No, unsafe.Slice takes count of Type.
+		// If buffer is *byte, we need bytes. frameSamples * 2 channels * 2 bytes/sample.
+		// Wait, GetBuffer returns BYTE pointer.
+		// Let's use uint16 slice.
+
+		// The logic below was: binary.LittleEndian.PutUint16(bufSlice[i*2:], ...)
+		// frameSamples was 960. loop 0..960.
+		// Now we have Stereo mixed buffer. Length = frameSamples * 2.
+		// We need to write frameSamples * 2 samples.
+
+		// Correct loop for Stereo:
+		for i := 0; i < int(frameSamples)*2; i++ { // Iterate over all samples (L, R, L, R...)
 			val := mixed[i]

 			// Apply master volume
@@ -454,6 +512,10 @@ func (p *Player) writeFrame() {
 			} else if val < -32768 {
 				val = -32768
 			}
+
+			// Map to output byte buffer
+			// i is sample index. Each sample is 2 bytes.
+			// Offset = i * 2.
 			binary.LittleEndian.PutUint16(bufSlice[i*2:], uint16(val))
 		}