Files

139 lines
4.1 KiB
Go
Raw Permalink Normal View History

package audio
import (
"math"
)
// CalculateEQBands computes frequency magnitudes for 5 EQ bands from PCM samples.
// It uses a simplified approach tailored for visualization:
// 1. Converts int16 PCM to float64
// 2. Applies a Window function (Hanning)
// 3. Performs a simple DFT (Discrete Fourier Transform) - sufficient for small N/visualization
// 4. Aggregates bins into 5 bands: Bass, Low-Mid, Mid, Hybrid-High, High
func CalculateEQBands(samples []int16, sampleRate int) []float64 {
// We'll use a relatively small window size for responsiveness and performance
// 512 samples at 48kHz is ~10ms, which is very fast.
// 1024 samples is ~21ms.
const windowSize = 1024
if len(samples) < windowSize {
// Not enough data, return empty or zeroed bands
// Pad with zeros if we really wanted to processing, but for vis just return what we have?
// Actually, let's just make a copy and pad with zeros to windowSize
padded := make([]int16, windowSize)
copy(padded, samples)
samples = padded
} else {
// Take the last windowSize samples (most recent audio)
samples = samples[len(samples)-windowSize:]
}
// Prepare complex input
real := make([]float64, windowSize)
imag := make([]float64, windowSize)
// Apply Hanning Window
for i := 0; i < windowSize; i++ {
val := float64(samples[i]) / 32768.0 // Normalize to -1.0..1.0
// Hanning window formula
window := 0.5 * (1 - math.Cos(2*math.Pi*float64(i)/float64(windowSize-1)))
real[i] = val * window
}
// Perform basic FFT (Cooley-Tukey)
// Since windowSize is power of 2 (1024), we can use a recursive or iterative FFT.
// For simplicity in a single file without deps, we'll write a small recursive one or iterative.
// Given typical Go performance, a simple recursive one is fine for N=1024 per user talk event.
fft(real, imag)
// Calculate magnitudes and bucket into bands
// Freq resolution = SampleRate / WindowSize = 48000 / 1024 ~= 46.875 Hz per bin
// Nyquist = 24000 Hz (Bin 512)
// Band definitions (approximate range):
// 1. Sub/Bass: 0 - 250 Hz (Bins 0-5)
// 2. Low Mids: 250 - 500 Hz (Bins 6-10)
// 3. Mids: 500 - 2000 Hz (Bins 11-42)
// 4. Upper Mids: 2000 - 4000 Hz (Bins 43-85)
// 5. Highs: 4000Hz+ (Bins 86-512)
bands := make([]float64, 5)
// Helper to collect energy
collectEnergy := func(startBin, endBin int) float64 {
sum := 0.0
for i := startBin; i <= endBin && i < windowSize/2; i++ {
// Magnitude = sqrt(re^2 + im^2)
mag := math.Sqrt(real[i]*real[i] + imag[i]*imag[i])
sum += mag
}
// Average
count := float64(endBin - startBin + 1)
if count > 0 {
return sum / count
}
return 0
}
bands[0] = collectEnergy(1, 6) // Skip DC (bin 0)
bands[1] = collectEnergy(7, 12)
bands[2] = collectEnergy(13, 45)
bands[3] = collectEnergy(46, 90)
bands[4] = collectEnergy(91, 511)
// Normalize output for visualization (0.0 to 1.0)
// We need some scaling factor. Based on expected signals.
const scale = 10.0 // Reduced from 50.0 to fix saturation
for i := range bands {
bands[i] = bands[i] * scale
if bands[i] > 1.0 {
bands[i] = 1.0
}
}
return bands
}
// Simple in-place Cooley-Tukey FFT.
// n must be power of 2.
func fft(real, imag []float64) {
n := len(real)
if n <= 1 {
return
}
// Split even and odd
half := n / 2
realEven := make([]float64, half)
imagEven := make([]float64, half)
realOdd := make([]float64, half)
imagOdd := make([]float64, half)
for i := 0; i < half; i++ {
realEven[i] = real[2*i]
imagEven[i] = imag[2*i]
realOdd[i] = real[2*i+1]
imagOdd[i] = imag[2*i+1]
}
// Recursion
fft(realEven, imagEven)
fft(realOdd, imagOdd)
// Combine
for k := 0; k < half; k++ {
tReal := math.Cos(-2 * math.Pi * float64(k) / float64(n))
tImag := math.Sin(-2 * math.Pi * float64(k) / float64(n))
// Multiply odd by twist factor (tReal+itImag) * (oddReal+iOddImag)
// (ac - bd) + i(ad + bc)
twistReal := tReal*realOdd[k] - tImag*imagOdd[k]
twistImag := tReal*imagOdd[k] + tImag*realOdd[k]
real[k] = realEven[k] + twistReal
imag[k] = imagEven[k] + twistImag
real[k+half] = realEven[k] - twistReal
imag[k+half] = imagEven[k] - twistImag
}
}