go-ts/pkg/audio/fft.go

package audio

import (
	"math"
)

// CalculateEQBands computes frequency magnitudes for 5 EQ bands from PCM samples.
// It uses a simplified approach tailored for visualization:
// 1. Converts int16 PCM to float64
// 2. Applies a Window function (Hanning)
// 3. Performs a simple DFT (Discrete Fourier Transform) - sufficient for small N/visualization
// 4. Aggregates bins into 5 bands: Bass, Low-Mid, Mid, Hybrid-High, High
func CalculateEQBands(samples []int16, sampleRate int) []float64 {
	// We'll use a relatively small window size for responsiveness and performance
	// 512 samples at 48kHz is ~10ms, which is very fast.
	// 1024 samples is ~21ms.

	const windowSize = 1024
	if len(samples) < windowSize {
		// Not enough data, return empty or zeroed bands
		// Pad with zeros if we really wanted to processing, but for vis just return what we have?
		// Actually, let's just make a copy and pad with zeros to windowSize
		padded := make([]int16, windowSize)
		copy(padded, samples)
		samples = padded
	} else {
		// Take the last windowSize samples (most recent audio)
		samples = samples[len(samples)-windowSize:]
	}

	// Prepare complex input
	real := make([]float64, windowSize)
	imag := make([]float64, windowSize)

	// Apply Hanning Window
	for i := 0; i < windowSize; i++ {
		val := float64(samples[i]) / 32768.0 // Normalize to -1.0..1.0
		// Hanning window formula
		window := 0.5 * (1 - math.Cos(2*math.Pi*float64(i)/float64(windowSize-1)))
		real[i] = val * window
	}

	// Perform basic FFT (Cooley-Tukey)
	// Since windowSize is power of 2 (1024), we can use a recursive or iterative FFT.
	// For simplicity in a single file without deps, we'll write a small recursive one or iterative.
	// Given typical Go performance, a simple recursive one is fine for N=1024 per user talk event.
	fft(real, imag)

	// Calculate magnitudes and bucket into bands
	// Freq resolution = SampleRate / WindowSize = 48000 / 1024 ~= 46.875 Hz per bin
	// Nyquist = 24000 Hz (Bin 512)

	// Band definitions (approximate range):
	// 1. Sub/Bass: 0 - 250 Hz (Bins 0-5)
	// 2. Low Mids: 250 - 500 Hz (Bins 6-10)
	// 3. Mids: 500 - 2000 Hz (Bins 11-42)
	// 4. Upper Mids: 2000 - 4000 Hz (Bins 43-85)
	// 5. Highs: 4000Hz+ (Bins 86-512)

	bands := make([]float64, 5)

	// Helper to collect energy
	collectEnergy := func(startBin, endBin int) float64 {
		sum := 0.0
		for i := startBin; i <= endBin && i < windowSize/2; i++ {
			// Magnitude = sqrt(re^2 + im^2)
			mag := math.Sqrt(real[i]*real[i] + imag[i]*imag[i])
			sum += mag
		}
		// Average
		count := float64(endBin - startBin + 1)
		if count > 0 {
			return sum / count
		}
		return 0
	}

	bands[0] = collectEnergy(1, 6) // Skip DC (bin 0)
	bands[1] = collectEnergy(7, 12)
	bands[2] = collectEnergy(13, 45)
	bands[3] = collectEnergy(46, 90)
	bands[4] = collectEnergy(91, 511)

	// Normalize output for visualization (0.0 to 1.0)
	// We need some scaling factor. Based on expected signals.
	const scale = 10.0 // Reduced from 50.0 to fix saturation
	for i := range bands {
		bands[i] = bands[i] * scale
		if bands[i] > 1.0 {
			bands[i] = 1.0
		}
	}

	return bands
}

// Simple in-place Cooley-Tukey FFT.
// n must be power of 2.
func fft(real, imag []float64) {
	n := len(real)
	if n <= 1 {
		return
	}

	// Split even and odd
	half := n / 2
	realEven := make([]float64, half)
	imagEven := make([]float64, half)
	realOdd := make([]float64, half)
	imagOdd := make([]float64, half)

	for i := 0; i < half; i++ {
		realEven[i] = real[2*i]
		imagEven[i] = imag[2*i]
		realOdd[i] = real[2*i+1]
		imagOdd[i] = imag[2*i+1]
	}

	// Recursion
	fft(realEven, imagEven)
	fft(realOdd, imagOdd)

	// Combine
	for k := 0; k < half; k++ {
		tReal := math.Cos(-2 * math.Pi * float64(k) / float64(n))
		tImag := math.Sin(-2 * math.Pi * float64(k) / float64(n))

		// Multiply odd by twist factor (tReal+itImag) * (oddReal+iOddImag)
		// (ac - bd) + i(ad + bc)
		twistReal := tReal*realOdd[k] - tImag*imagOdd[k]
		twistImag := tReal*imagOdd[k] + tImag*realOdd[k]

		real[k] = realEven[k] + twistReal
		imag[k] = imagEven[k] + twistImag
		real[k+half] = realEven[k] - twistReal
		imag[k+half] = imagEven[k] - twistImag
	}
}