139 lines
4.1 KiB
Go
139 lines
4.1 KiB
Go
package audio
|
|
|
|
import (
|
|
"math"
|
|
)
|
|
|
|
// CalculateEQBands computes frequency magnitudes for 5 EQ bands from PCM samples.
|
|
// It uses a simplified approach tailored for visualization:
|
|
// 1. Converts int16 PCM to float64
|
|
// 2. Applies a Window function (Hanning)
|
|
// 3. Performs a simple DFT (Discrete Fourier Transform) - sufficient for small N/visualization
|
|
// 4. Aggregates bins into 5 bands: Bass, Low-Mid, Mid, Hybrid-High, High
|
|
func CalculateEQBands(samples []int16, sampleRate int) []float64 {
|
|
// We'll use a relatively small window size for responsiveness and performance
|
|
// 512 samples at 48kHz is ~10ms, which is very fast.
|
|
// 1024 samples is ~21ms.
|
|
|
|
const windowSize = 1024
|
|
if len(samples) < windowSize {
|
|
// Not enough data, return empty or zeroed bands
|
|
// Pad with zeros if we really wanted to processing, but for vis just return what we have?
|
|
// Actually, let's just make a copy and pad with zeros to windowSize
|
|
padded := make([]int16, windowSize)
|
|
copy(padded, samples)
|
|
samples = padded
|
|
} else {
|
|
// Take the last windowSize samples (most recent audio)
|
|
samples = samples[len(samples)-windowSize:]
|
|
}
|
|
|
|
// Prepare complex input
|
|
real := make([]float64, windowSize)
|
|
imag := make([]float64, windowSize)
|
|
|
|
// Apply Hanning Window
|
|
for i := 0; i < windowSize; i++ {
|
|
val := float64(samples[i]) / 32768.0 // Normalize to -1.0..1.0
|
|
// Hanning window formula
|
|
window := 0.5 * (1 - math.Cos(2*math.Pi*float64(i)/float64(windowSize-1)))
|
|
real[i] = val * window
|
|
}
|
|
|
|
// Perform basic FFT (Cooley-Tukey)
|
|
// Since windowSize is power of 2 (1024), we can use a recursive or iterative FFT.
|
|
// For simplicity in a single file without deps, we'll write a small recursive one or iterative.
|
|
// Given typical Go performance, a simple recursive one is fine for N=1024 per user talk event.
|
|
fft(real, imag)
|
|
|
|
// Calculate magnitudes and bucket into bands
|
|
// Freq resolution = SampleRate / WindowSize = 48000 / 1024 ~= 46.875 Hz per bin
|
|
// Nyquist = 24000 Hz (Bin 512)
|
|
|
|
// Band definitions (approximate range):
|
|
// 1. Sub/Bass: 0 - 250 Hz (Bins 0-5)
|
|
// 2. Low Mids: 250 - 500 Hz (Bins 6-10)
|
|
// 3. Mids: 500 - 2000 Hz (Bins 11-42)
|
|
// 4. Upper Mids: 2000 - 4000 Hz (Bins 43-85)
|
|
// 5. Highs: 4000Hz+ (Bins 86-512)
|
|
|
|
bands := make([]float64, 5)
|
|
|
|
// Helper to collect energy
|
|
collectEnergy := func(startBin, endBin int) float64 {
|
|
sum := 0.0
|
|
for i := startBin; i <= endBin && i < windowSize/2; i++ {
|
|
// Magnitude = sqrt(re^2 + im^2)
|
|
mag := math.Sqrt(real[i]*real[i] + imag[i]*imag[i])
|
|
sum += mag
|
|
}
|
|
// Average
|
|
count := float64(endBin - startBin + 1)
|
|
if count > 0 {
|
|
return sum / count
|
|
}
|
|
return 0
|
|
}
|
|
|
|
bands[0] = collectEnergy(1, 6) // Skip DC (bin 0)
|
|
bands[1] = collectEnergy(7, 12)
|
|
bands[2] = collectEnergy(13, 45)
|
|
bands[3] = collectEnergy(46, 90)
|
|
bands[4] = collectEnergy(91, 511)
|
|
|
|
// Normalize output for visualization (0.0 to 1.0)
|
|
// We need some scaling factor. Based on expected signals.
|
|
const scale = 10.0 // Reduced from 50.0 to fix saturation
|
|
for i := range bands {
|
|
bands[i] = bands[i] * scale
|
|
if bands[i] > 1.0 {
|
|
bands[i] = 1.0
|
|
}
|
|
}
|
|
|
|
return bands
|
|
}
|
|
|
|
// Simple in-place Cooley-Tukey FFT.
|
|
// n must be power of 2.
|
|
func fft(real, imag []float64) {
|
|
n := len(real)
|
|
if n <= 1 {
|
|
return
|
|
}
|
|
|
|
// Split even and odd
|
|
half := n / 2
|
|
realEven := make([]float64, half)
|
|
imagEven := make([]float64, half)
|
|
realOdd := make([]float64, half)
|
|
imagOdd := make([]float64, half)
|
|
|
|
for i := 0; i < half; i++ {
|
|
realEven[i] = real[2*i]
|
|
imagEven[i] = imag[2*i]
|
|
realOdd[i] = real[2*i+1]
|
|
imagOdd[i] = imag[2*i+1]
|
|
}
|
|
|
|
// Recursion
|
|
fft(realEven, imagEven)
|
|
fft(realOdd, imagOdd)
|
|
|
|
// Combine
|
|
for k := 0; k < half; k++ {
|
|
tReal := math.Cos(-2 * math.Pi * float64(k) / float64(n))
|
|
tImag := math.Sin(-2 * math.Pi * float64(k) / float64(n))
|
|
|
|
// Multiply odd by twist factor (tReal+itImag) * (oddReal+iOddImag)
|
|
// (ac - bd) + i(ad + bc)
|
|
twistReal := tReal*realOdd[k] - tImag*imagOdd[k]
|
|
twistImag := tReal*imagOdd[k] + tImag*realOdd[k]
|
|
|
|
real[k] = realEven[k] + twistReal
|
|
imag[k] = imagEven[k] + twistImag
|
|
real[k+half] = realEven[k] - twistReal
|
|
imag[k+half] = imagEven[k] - twistImag
|
|
}
|
|
}
|