Compare commits
2 Commits
2026.02.09
...
e84cb67500
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e84cb67500 | ||
|
|
3db526f42b |
@@ -44,7 +44,7 @@ Barnard includes real-time noise suppression for microphone input to filter out
|
|||||||
|
|
||||||
### Features
|
### Features
|
||||||
- **Real-time processing**: Noise suppression is applied during audio capture with minimal latency
|
- **Real-time processing**: Noise suppression is applied during audio capture with minimal latency
|
||||||
- **Configurable threshold**: Adjustable noise gate threshold (default: 0.02)
|
- **Configurable amount**: Adjustable suppression amount via threshold value (default: `0.08`)
|
||||||
- **Persistent settings**: Noise suppression preferences are saved in your configuration file
|
- **Persistent settings**: Noise suppression preferences are saved in your configuration file
|
||||||
- **Multiple control methods**: Toggle via hotkey, command line flag, or FIFO commands
|
- **Multiple control methods**: Toggle via hotkey, command line flag, or FIFO commands
|
||||||
|
|
||||||
@@ -57,10 +57,11 @@ Barnard includes real-time noise suppression for microphone input to filter out
|
|||||||
### Configuration Example
|
### Configuration Example
|
||||||
```toml
|
```toml
|
||||||
noisesuppressionenabled = true
|
noisesuppressionenabled = true
|
||||||
noisesuppressionthreshold = 0.02
|
noisesuppressionthreshold = 0.08
|
||||||
```
|
```
|
||||||
|
|
||||||
The noise suppression algorithm uses a combination of high-pass filtering and noise gating to reduce unwanted background sounds while preserving voice quality.
|
`noisesuppressionthreshold` accepts values from `0.0` to `1.0`, where higher values apply stronger suppression.
|
||||||
|
The noise suppression algorithm uses adaptive noise-floor tracking, transient suppression, and smoothed gain reduction to reduce background noise while preserving voice quality.
|
||||||
|
|
||||||
## FIFO Control
|
## FIFO Control
|
||||||
|
|
||||||
|
|||||||
@@ -128,7 +128,7 @@ func (c *Config) LoadConfig() {
|
|||||||
jc.NoiseSuppressionEnabled = &enabled
|
jc.NoiseSuppressionEnabled = &enabled
|
||||||
}
|
}
|
||||||
if c.config.NoiseSuppressionThreshold == nil {
|
if c.config.NoiseSuppressionThreshold == nil {
|
||||||
threshold := float32(0.02)
|
threshold := float32(0.08)
|
||||||
jc.NoiseSuppressionThreshold = &threshold
|
jc.NoiseSuppressionThreshold = &threshold
|
||||||
}
|
}
|
||||||
if c.config.VoiceEffect == nil {
|
if c.config.VoiceEffect == nil {
|
||||||
@@ -249,12 +249,25 @@ func (c *Config) SetNoiseSuppressionEnabled(enabled bool) {
|
|||||||
|
|
||||||
func (c *Config) GetNoiseSuppressionThreshold() float32 {
|
func (c *Config) GetNoiseSuppressionThreshold() float32 {
|
||||||
if c.config.NoiseSuppressionThreshold == nil {
|
if c.config.NoiseSuppressionThreshold == nil {
|
||||||
return 0.02
|
return 0.08
|
||||||
}
|
}
|
||||||
return *c.config.NoiseSuppressionThreshold
|
threshold := *c.config.NoiseSuppressionThreshold
|
||||||
|
if threshold < 0.0 {
|
||||||
|
return 0.0
|
||||||
|
}
|
||||||
|
if threshold > 1.0 {
|
||||||
|
return 1.0
|
||||||
|
}
|
||||||
|
return threshold
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *Config) SetNoiseSuppressionThreshold(threshold float32) {
|
func (c *Config) SetNoiseSuppressionThreshold(threshold float32) {
|
||||||
|
if threshold < 0.0 {
|
||||||
|
threshold = 0.0
|
||||||
|
}
|
||||||
|
if threshold > 1.0 {
|
||||||
|
threshold = 1.0
|
||||||
|
}
|
||||||
c.config.NoiseSuppressionThreshold = &threshold
|
c.config.NoiseSuppressionThreshold = &threshold
|
||||||
c.SaveConfig()
|
c.SaveConfig()
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ package noise
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"math"
|
"math"
|
||||||
|
"sync"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Ensure Suppressor implements the NoiseProcessor interface
|
// Ensure Suppressor implements the NoiseProcessor interface
|
||||||
@@ -12,129 +13,200 @@ var _ interface {
|
|||||||
|
|
||||||
// Suppressor handles noise suppression for audio samples
|
// Suppressor handles noise suppression for audio samples
|
||||||
type Suppressor struct {
|
type Suppressor struct {
|
||||||
enabled bool
|
mu sync.Mutex
|
||||||
threshold float32
|
|
||||||
gainFactor float32
|
enabled bool
|
||||||
|
threshold float32
|
||||||
// Simple high-pass filter state for DC removal
|
|
||||||
|
// High-pass filter state for low-frequency rumble/DC removal.
|
||||||
prevInput float32
|
prevInput float32
|
||||||
prevOutput float32
|
prevOutput float32
|
||||||
alpha float32
|
hpAlpha float32
|
||||||
|
|
||||||
// Click detection state
|
// Adaptive suppression state.
|
||||||
clickThreshold float32
|
envelope float32
|
||||||
clickDecay float32
|
noiseFloor float32
|
||||||
recentClickEnergy float32
|
suppressionGain float32
|
||||||
|
clickEnergy float32
|
||||||
|
|
||||||
|
// Tunables.
|
||||||
|
envelopeAttack float32
|
||||||
|
envelopeRelease float32
|
||||||
|
noiseAttack float32
|
||||||
|
noiseRelease float32
|
||||||
|
gainAttack float32
|
||||||
|
gainRelease float32
|
||||||
|
speechRatio float32
|
||||||
|
clickDecay float32
|
||||||
|
minNoiseFloor float32
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewSuppressor creates a new noise suppressor
|
// NewSuppressor creates a new noise suppressor
|
||||||
func NewSuppressor() *Suppressor {
|
func NewSuppressor() *Suppressor {
|
||||||
return &Suppressor{
|
s := &Suppressor{
|
||||||
enabled: false,
|
enabled: false,
|
||||||
threshold: 0.01, // Reduced noise threshold level for less aggressive filtering
|
threshold: 0.08,
|
||||||
gainFactor: 0.9, // Less aggressive gain reduction for noise
|
hpAlpha: 0.995,
|
||||||
alpha: 0.98, // More stable high-pass filter coefficient
|
envelopeAttack: 0.18,
|
||||||
clickThreshold: 0.15, // Threshold for detecting keyboard clicks
|
envelopeRelease: 0.02,
|
||||||
clickDecay: 0.95, // How quickly click energy decays
|
noiseAttack: 0.08,
|
||||||
recentClickEnergy: 0.0, // Tracks recent click activity
|
noiseRelease: 0.002,
|
||||||
|
gainAttack: 0.35,
|
||||||
|
gainRelease: 0.02,
|
||||||
|
speechRatio: 4.0,
|
||||||
|
clickDecay: 0.93,
|
||||||
|
minNoiseFloor: 0.0008,
|
||||||
|
suppressionGain: 1.0,
|
||||||
}
|
}
|
||||||
|
s.resetStateLocked()
|
||||||
|
return s
|
||||||
}
|
}
|
||||||
|
|
||||||
// SetEnabled enables or disables noise suppression
|
// SetEnabled enables or disables noise suppression
|
||||||
func (s *Suppressor) SetEnabled(enabled bool) {
|
func (s *Suppressor) SetEnabled(enabled bool) {
|
||||||
|
s.mu.Lock()
|
||||||
|
defer s.mu.Unlock()
|
||||||
|
if s.enabled == enabled {
|
||||||
|
return
|
||||||
|
}
|
||||||
s.enabled = enabled
|
s.enabled = enabled
|
||||||
|
s.resetStateLocked()
|
||||||
}
|
}
|
||||||
|
|
||||||
// IsEnabled returns whether noise suppression is enabled
|
// IsEnabled returns whether noise suppression is enabled
|
||||||
func (s *Suppressor) IsEnabled() bool {
|
func (s *Suppressor) IsEnabled() bool {
|
||||||
|
s.mu.Lock()
|
||||||
|
defer s.mu.Unlock()
|
||||||
return s.enabled
|
return s.enabled
|
||||||
}
|
}
|
||||||
|
|
||||||
// SetThreshold sets the noise threshold (0.0 to 1.0)
|
// SetThreshold sets the noise threshold (0.0 to 1.0)
|
||||||
func (s *Suppressor) SetThreshold(threshold float32) {
|
func (s *Suppressor) SetThreshold(threshold float32) {
|
||||||
if threshold >= 0.0 && threshold <= 1.0 {
|
s.mu.Lock()
|
||||||
s.threshold = threshold
|
defer s.mu.Unlock()
|
||||||
}
|
s.threshold = clampFloat32(threshold, 0.0, 1.0)
|
||||||
}
|
}
|
||||||
|
|
||||||
// GetThreshold returns the current noise threshold
|
// GetThreshold returns the current noise threshold
|
||||||
func (s *Suppressor) GetThreshold() float32 {
|
func (s *Suppressor) GetThreshold() float32 {
|
||||||
|
s.mu.Lock()
|
||||||
|
defer s.mu.Unlock()
|
||||||
return s.threshold
|
return s.threshold
|
||||||
}
|
}
|
||||||
|
|
||||||
// ProcessSamples applies noise suppression to audio samples
|
// ProcessSamples applies noise suppression to audio samples
|
||||||
func (s *Suppressor) ProcessSamples(samples []int16) {
|
func (s *Suppressor) ProcessSamples(samples []int16) {
|
||||||
|
s.mu.Lock()
|
||||||
|
defer s.mu.Unlock()
|
||||||
|
|
||||||
if !s.enabled || len(samples) == 0 {
|
if !s.enabled || len(samples) == 0 {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Calculate frame energy for click detection
|
intensity := s.thresholdToIntensity()
|
||||||
var frameEnergy float32 = 0.0
|
minGain := 1.0 - (0.92 * intensity)
|
||||||
for _, sample := range samples {
|
eps := float32(1e-6)
|
||||||
floatSample := float32(sample) / 32767.0
|
|
||||||
frameEnergy += floatSample * floatSample
|
|
||||||
}
|
|
||||||
frameEnergy = float32(math.Sqrt(float64(frameEnergy / float32(len(samples)))))
|
|
||||||
|
|
||||||
// Detect sudden energy spikes (likely keyboard clicks)
|
|
||||||
energySpike := frameEnergy - s.recentClickEnergy
|
|
||||||
isClick := energySpike > s.clickThreshold && frameEnergy > 0.05
|
|
||||||
|
|
||||||
// Update recent click energy with decay
|
|
||||||
s.recentClickEnergy = s.recentClickEnergy*s.clickDecay + frameEnergy*(1.0-s.clickDecay)
|
|
||||||
|
|
||||||
// Improved noise suppression algorithm
|
|
||||||
for i, sample := range samples {
|
for i, sample := range samples {
|
||||||
// Convert to float for processing
|
floatSample := float32(sample) / 32768.0
|
||||||
floatSample := float32(sample) / 32767.0
|
filtered := s.highPassFilterLocked(floatSample)
|
||||||
|
absSample := float32(math.Abs(float64(filtered)))
|
||||||
// Apply high-pass filter for DC removal
|
|
||||||
filtered := s.highPassFilter(floatSample)
|
s.updateEnvelopeLocked(absSample)
|
||||||
|
s.updateNoiseFloorLocked()
|
||||||
// Calculate signal strength (RMS-like)
|
|
||||||
strength := float32(math.Abs(float64(filtered)))
|
snr := s.envelope / (s.noiseFloor + eps)
|
||||||
|
voicePresence := clampFloat32((snr-1.0)/(s.speechRatio-1.0), 0.0, 1.0)
|
||||||
// Apply noise gate with smooth transition
|
|
||||||
var gainReduction float32 = 1.0
|
targetGain := minGain + ((1.0 - minGain) * voicePresence)
|
||||||
|
targetGain = s.applyTransientSuppressionLocked(absSample, voicePresence, minGain, targetGain)
|
||||||
// If we detected a click, apply stronger suppression
|
|
||||||
if isClick {
|
s.applyGainSmoothingLocked(targetGain)
|
||||||
gainReduction = s.gainFactor * 0.3 // Much stronger reduction for clicks
|
|
||||||
} else if strength < s.threshold {
|
processed := filtered * s.suppressionGain
|
||||||
// Normal noise gate for low-level sounds
|
processed = clampFloat32(processed, -1.0, 1.0)
|
||||||
gainReduction = strength / s.threshold
|
samples[i] = int16(processed * 32767.0)
|
||||||
if gainReduction < s.gainFactor {
|
|
||||||
gainReduction = s.gainFactor
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Apply gain reduction
|
|
||||||
processed := filtered * gainReduction
|
|
||||||
|
|
||||||
// Convert back to int16 with proper clipping
|
|
||||||
processedInt := processed * 32767.0
|
|
||||||
if processedInt > 32767 {
|
|
||||||
processedInt = 32767
|
|
||||||
} else if processedInt < -32767 {
|
|
||||||
processedInt = -32767
|
|
||||||
}
|
|
||||||
|
|
||||||
samples[i] = int16(processedInt)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// highPassFilter applies a simple high-pass filter to remove DC component
|
func (s *Suppressor) highPassFilterLocked(input float32) float32 {
|
||||||
func (s *Suppressor) highPassFilter(input float32) float32 {
|
|
||||||
// Simple high-pass filter: y[n] = alpha * (y[n-1] + x[n] - x[n-1])
|
// Simple high-pass filter: y[n] = alpha * (y[n-1] + x[n] - x[n-1])
|
||||||
output := s.alpha * (s.prevOutput + input - s.prevInput)
|
output := s.hpAlpha * (s.prevOutput + input - s.prevInput)
|
||||||
s.prevInput = input
|
s.prevInput = input
|
||||||
s.prevOutput = output
|
s.prevOutput = output
|
||||||
return output
|
return output
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s *Suppressor) thresholdToIntensity() float32 {
|
||||||
|
// Keep lower legacy threshold values meaningful while allowing up to very aggressive suppression.
|
||||||
|
return 1.0 - float32(math.Exp(float64(-28.0*clampFloat32(s.threshold, 0.0, 1.0))))
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Suppressor) updateEnvelopeLocked(absSample float32) {
|
||||||
|
if absSample > s.envelope {
|
||||||
|
s.envelope += s.envelopeAttack * (absSample - s.envelope)
|
||||||
|
} else {
|
||||||
|
s.envelope += s.envelopeRelease * (absSample - s.envelope)
|
||||||
|
}
|
||||||
|
if s.envelope < s.minNoiseFloor {
|
||||||
|
s.envelope = s.minNoiseFloor
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Suppressor) updateNoiseFloorLocked() {
|
||||||
|
coef := s.noiseRelease
|
||||||
|
if s.envelope < s.noiseFloor*2.2 {
|
||||||
|
coef = s.noiseAttack
|
||||||
|
}
|
||||||
|
s.noiseFloor += coef * (s.envelope - s.noiseFloor)
|
||||||
|
if s.noiseFloor < s.minNoiseFloor {
|
||||||
|
s.noiseFloor = s.minNoiseFloor
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Suppressor) applyTransientSuppressionLocked(absSample float32, voicePresence float32, minGain float32, targetGain float32) float32 {
|
||||||
|
s.clickEnergy = (s.clickEnergy * s.clickDecay) + (absSample * (1.0 - s.clickDecay))
|
||||||
|
transient := absSample - s.clickEnergy
|
||||||
|
transientThreshold := 0.04 + (0.08 * (1.0 - voicePresence))
|
||||||
|
if transient > transientThreshold && voicePresence < 0.65 {
|
||||||
|
clickGain := minGain * 0.55
|
||||||
|
if clickGain < targetGain {
|
||||||
|
targetGain = clickGain
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return clampFloat32(targetGain, 0.02, 1.0)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Suppressor) applyGainSmoothingLocked(targetGain float32) {
|
||||||
|
if targetGain < s.suppressionGain {
|
||||||
|
s.suppressionGain += s.gainAttack * (targetGain - s.suppressionGain)
|
||||||
|
} else {
|
||||||
|
s.suppressionGain += s.gainRelease * (targetGain - s.suppressionGain)
|
||||||
|
}
|
||||||
|
s.suppressionGain = clampFloat32(s.suppressionGain, 0.02, 1.0)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Suppressor) resetStateLocked() {
|
||||||
|
s.prevInput = 0.0
|
||||||
|
s.prevOutput = 0.0
|
||||||
|
s.envelope = s.minNoiseFloor
|
||||||
|
s.noiseFloor = s.minNoiseFloor
|
||||||
|
s.suppressionGain = 1.0
|
||||||
|
s.clickEnergy = 0.0
|
||||||
|
}
|
||||||
|
|
||||||
|
func clampFloat32(value float32, min float32, max float32) float32 {
|
||||||
|
if value < min {
|
||||||
|
return min
|
||||||
|
}
|
||||||
|
if value > max {
|
||||||
|
return max
|
||||||
|
}
|
||||||
|
return value
|
||||||
|
}
|
||||||
|
|
||||||
// ProcessSamplesAdvanced applies more sophisticated noise suppression
|
// ProcessSamplesAdvanced applies more sophisticated noise suppression
|
||||||
// This is a placeholder for future RNNoise integration
|
// Placeholder for future RNNoise integration.
|
||||||
func (s *Suppressor) ProcessSamplesAdvanced(samples []int16) {
|
func (s *Suppressor) ProcessSamplesAdvanced(samples []int16) {
|
||||||
// TODO: Integrate RNNoise or other advanced algorithms
|
|
||||||
s.ProcessSamples(samples)
|
s.ProcessSamples(samples)
|
||||||
}
|
}
|
||||||
|
|||||||
103
noise/suppression_test.go
Normal file
103
noise/suppression_test.go
Normal file
@@ -0,0 +1,103 @@
|
|||||||
|
package noise
|
||||||
|
|
||||||
|
import (
|
||||||
|
"math"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestSuppressorDisabledBypassesSamples(t *testing.T) {
|
||||||
|
suppressor := NewSuppressor()
|
||||||
|
samples := []int16{100, -200, 300, -400, 500}
|
||||||
|
original := append([]int16(nil), samples...)
|
||||||
|
|
||||||
|
suppressor.ProcessSamples(samples)
|
||||||
|
|
||||||
|
for i := range samples {
|
||||||
|
if samples[i] != original[i] {
|
||||||
|
t.Fatalf("expected sample %d to remain unchanged, got %d want %d", i, samples[i], original[i])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSuppressorAttenuatesLowLevelNoise(t *testing.T) {
|
||||||
|
suppressor := NewSuppressor()
|
||||||
|
suppressor.SetEnabled(true)
|
||||||
|
suppressor.SetThreshold(0.08)
|
||||||
|
|
||||||
|
input := makeSineFrame(600, 700)
|
||||||
|
originalRMS := frameRMS(input)
|
||||||
|
processed := append([]int16(nil), input...)
|
||||||
|
suppressor.ProcessSamples(processed)
|
||||||
|
processedRMS := frameRMS(processed)
|
||||||
|
|
||||||
|
if processedRMS >= originalRMS*0.8 {
|
||||||
|
t.Fatalf("expected low-level noise attenuation, got RMS %.2f from %.2f", processedRMS, originalRMS)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSuppressorPreservesSpeechLikeSignal(t *testing.T) {
|
||||||
|
suppressor := NewSuppressor()
|
||||||
|
suppressor.SetEnabled(true)
|
||||||
|
suppressor.SetThreshold(0.08)
|
||||||
|
|
||||||
|
voice := makeSineFrame(1000, 9000)
|
||||||
|
originalRMS := frameRMS(voice)
|
||||||
|
processed := append([]int16(nil), voice...)
|
||||||
|
suppressor.ProcessSamples(processed)
|
||||||
|
processedRMS := frameRMS(processed)
|
||||||
|
|
||||||
|
if processedRMS <= originalRMS*0.6 {
|
||||||
|
t.Fatalf("expected speech-like signal to be mostly preserved, got RMS %.2f from %.2f", processedRMS, originalRMS)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestHigherThresholdAppliesStrongerSuppression(t *testing.T) {
|
||||||
|
lowSuppressor := NewSuppressor()
|
||||||
|
lowSuppressor.SetEnabled(true)
|
||||||
|
lowSuppressor.SetThreshold(0.02)
|
||||||
|
|
||||||
|
highSuppressor := NewSuppressor()
|
||||||
|
highSuppressor.SetEnabled(true)
|
||||||
|
highSuppressor.SetThreshold(0.20)
|
||||||
|
|
||||||
|
noiseFrame := makeSineFrame(500, 700)
|
||||||
|
lowRMS := runFrameWarmup(lowSuppressor, noiseFrame, 8)
|
||||||
|
highRMS := runFrameWarmup(highSuppressor, noiseFrame, 8)
|
||||||
|
|
||||||
|
if highRMS >= lowRMS*0.8 {
|
||||||
|
t.Fatalf("expected stronger suppression at higher threshold, got low %.2f high %.2f", lowRMS, highRMS)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func runFrameWarmup(suppressor *Suppressor, frame []int16, repeats int) float64 {
|
||||||
|
var processed []int16
|
||||||
|
for i := 0; i < repeats; i++ {
|
||||||
|
processed = append([]int16(nil), frame...)
|
||||||
|
suppressor.ProcessSamples(processed)
|
||||||
|
}
|
||||||
|
return frameRMS(processed)
|
||||||
|
}
|
||||||
|
|
||||||
|
func makeSineFrame(frequency float64, amplitude float64) []int16 {
|
||||||
|
const sampleRate = 48000.0
|
||||||
|
const frameSize = 480
|
||||||
|
|
||||||
|
frame := make([]int16, frameSize)
|
||||||
|
for i := 0; i < frameSize; i++ {
|
||||||
|
value := math.Sin((2.0 * math.Pi * frequency * float64(i)) / sampleRate)
|
||||||
|
frame[i] = int16(value * amplitude)
|
||||||
|
}
|
||||||
|
return frame
|
||||||
|
}
|
||||||
|
|
||||||
|
func frameRMS(samples []int16) float64 {
|
||||||
|
if len(samples) == 0 {
|
||||||
|
return 0.0
|
||||||
|
}
|
||||||
|
var sumSquares float64
|
||||||
|
for _, sample := range samples {
|
||||||
|
normalized := float64(sample) / 32768.0
|
||||||
|
sumSquares += normalized * normalized
|
||||||
|
}
|
||||||
|
return math.Sqrt(sumSquares / float64(len(samples)))
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user