Add real-time voice effects for outgoing audio

Implements 7 voice effects that can be cycled through with F12:
- None (default)
- Echo: Single repeating delay with feedback (250ms)
- Reverb: Multiple short delays without feedback
- High Pitch: Chipmunk voice using cubic interpolation
- Low Pitch: Deep voice effect
- Robot: Ring modulation for robotic sound
- Chorus: Layered voices with pitch variations

The effects are applied after noise suppression and AGC in the audio
pipeline. Selected effect is persisted to config file. Includes
comprehensive documentation in README.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Storm Dragon
2025-10-13 16:27:08 -04:00
parent 82b308000d
commit f96cb1f79b
9 changed files with 493 additions and 8 deletions

View File

@@ -18,6 +18,26 @@ If a user is too soft to hear, you can boost their audio.
The audio should drastically increase once you have hit the VolumeUp key over 10 times (from the silent/0 position).
The boost setting is saved per user, just like per user volume.
## Voice Effects
Barnard includes real-time voice effects that can be applied to your outgoing microphone audio. Press F12 to cycle through the available effects.
### Available Effects
- **None**: No effect applied (default)
- **Echo**: Single repeating delay effect with feedback (250ms) - creates distinct repetitions that fade away
- **Reverb**: Multiple short delays (12.5ms, 20ms, 33ms) without feedback - adds thickness and fullness to your voice
- **High Pitch**: Chipmunk-style voice using pitch shifting
- **Low Pitch**: Deep voice using pitch shifting
- **Robot**: Ring modulation effect for robotic sound
- **Chorus**: Layered voices with slight pitch variations for a rich, ensemble sound
### Controls
- **F12 key**: Cycle through voice effects (configurable hotkey)
- **Configuration**: Your selected effect is saved in `~/.barnard.yaml`
### How It Works
Voice effects are applied to your outgoing audio in real-time, after noise suppression and automatic gain control. The effects use various digital signal processing techniques including delay lines, pitch shifting with cubic interpolation, and ring modulation.
## Noise Suppression
Barnard includes real-time noise suppression for microphone input to filter out background noise such as keyboard typing, computer fans, and other environmental sounds.
@@ -191,6 +211,7 @@ After running the command above, `barnard` will be compiled as `$(go env GOPATH)
- <kbd>F1</kbd>: toggle voice transmission
- <kbd>F9</kbd>: toggle noise suppression
- <kbd>F12</kbd>: cycle through voice effects
- <kbd>Ctrl+L</kbd>: clear chat log
- <kbd>Tab</kbd>: toggle focus between chat and user tree
- <kbd>Page Up</kbd>: scroll chat up

408
audio/effects.go Normal file
View File

@@ -0,0 +1,408 @@
package audio
import (
"math"
)
// VoiceEffect represents different voice effect types
type VoiceEffect int
const (
EffectNone VoiceEffect = iota
EffectEcho
EffectReverb
EffectHighPitch
EffectLowPitch
EffectRobot
EffectChorus
EffectCount // Keep this last for cycling
)
// String returns the name of the effect
func (e VoiceEffect) String() string {
switch e {
case EffectNone:
return "None"
case EffectEcho:
return "Echo"
case EffectReverb:
return "Reverb"
case EffectHighPitch:
return "High Pitch"
case EffectLowPitch:
return "Low Pitch"
case EffectRobot:
return "Robot"
case EffectChorus:
return "Chorus"
default:
return "Unknown"
}
}
// EffectsProcessor handles voice effects processing
type EffectsProcessor struct {
currentEffect VoiceEffect
enabled bool
// Echo parameters
echoDelay int // Delay in samples
echoFeedback float32 // Echo feedback amount (0-1)
echoMix float32 // Mix of echo with original (0-1)
echoBuffer []int16 // Circular buffer for echo
echoPosition int // Current position in echo buffer
// Reverb buffer
reverseInputBuffer []int16 // Delay line for reverb
reverseInputPos int // Write position in buffer
// Pitch shift parameters
pitchRatio float32 // Pitch shift ratio
pitchBuffer []int16 // Buffer for pitch shifting
pitchPhase float32 // Phase accumulator for resampling
// Robot voice parameters
robotFreq float32 // Modulation frequency
robotPhase float32 // Phase accumulator
sampleRate float32 // Audio sample rate
// Chorus parameters
chorusDelays []int // Multiple delay times
chorusBuffers [][]int16 // Multiple delay buffers
chorusPositions []int // Positions in chorus buffers
chorusRates []float32 // LFO rates for each chorus voice
chorusPhases []float32 // LFO phases
}
// NewEffectsProcessor creates a new voice effects processor
func NewEffectsProcessor(sampleRate int) *EffectsProcessor {
echoDelay := sampleRate / 4 // 250ms delay
return &EffectsProcessor{
currentEffect: EffectNone,
enabled: true,
sampleRate: float32(sampleRate),
// Echo setup
echoDelay: echoDelay,
echoFeedback: 0.4,
echoMix: 0.5,
echoBuffer: make([]int16, echoDelay),
echoPosition: 0,
// Reverb setup - 100ms buffer for delay lines
reverseInputBuffer: make([]int16, sampleRate/10), // 100ms buffer
reverseInputPos: 0,
// Pitch shift setup
pitchRatio: 1.0,
pitchBuffer: make([]int16, 4096),
pitchPhase: 0.0,
// Robot voice setup
robotFreq: 30.0, // 30 Hz modulation
robotPhase: 0.0,
// Chorus setup (3 voices)
chorusDelays: []int{sampleRate/50, sampleRate/40, sampleRate/35}, // ~20-30ms
chorusBuffers: make([][]int16, 3),
chorusPositions: make([]int, 3),
chorusRates: []float32{1.5, 2.0, 2.3}, // LFO rates in Hz
chorusPhases: make([]float32, 3),
}
}
// GetCurrentEffect returns the current effect
func (ep *EffectsProcessor) GetCurrentEffect() VoiceEffect {
return ep.currentEffect
}
// SetEffect sets the current effect
func (ep *EffectsProcessor) SetEffect(effect VoiceEffect) {
if effect >= 0 && effect < EffectCount {
ep.currentEffect = effect
ep.resetBuffers()
}
}
// CycleEffect cycles to the next effect
func (ep *EffectsProcessor) CycleEffect() VoiceEffect {
ep.currentEffect = (ep.currentEffect + 1) % EffectCount
ep.resetBuffers()
return ep.currentEffect
}
// SetEnabled enables or disables effects processing
func (ep *EffectsProcessor) SetEnabled(enabled bool) {
ep.enabled = enabled
}
// IsEnabled returns whether effects are enabled
func (ep *EffectsProcessor) IsEnabled() bool {
return ep.enabled
}
// ProcessSamples applies the current voice effect to audio samples
func (ep *EffectsProcessor) ProcessSamples(samples []int16) {
if !ep.enabled || ep.currentEffect == EffectNone || len(samples) == 0 {
return
}
switch ep.currentEffect {
case EffectEcho:
ep.processEcho(samples)
case EffectReverb:
ep.processReverb(samples)
case EffectHighPitch:
ep.processPitchShift(samples, 1.5)
case EffectLowPitch:
ep.processPitchShift(samples, 0.75)
case EffectRobot:
ep.processRobot(samples)
case EffectChorus:
ep.processChorus(samples)
}
}
// processEcho applies echo effect
func (ep *EffectsProcessor) processEcho(samples []int16) {
for i := range samples {
// Get delayed sample
delayedSample := ep.echoBuffer[ep.echoPosition]
// Mix original with echo
outputSample := float32(samples[i])*(1.0-ep.echoMix) +
float32(delayedSample)*ep.echoMix
// Create new echo sample (current + feedback)
newEchoSample := float32(samples[i]) + float32(delayedSample)*ep.echoFeedback
// Store in buffer with clipping
if newEchoSample > 32767 {
newEchoSample = 32767
} else if newEchoSample < -32767 {
newEchoSample = -32767
}
ep.echoBuffer[ep.echoPosition] = int16(newEchoSample)
// Advance buffer position
ep.echoPosition = (ep.echoPosition + 1) % len(ep.echoBuffer)
// Apply to output with clipping
if outputSample > 32767 {
outputSample = 32767
} else if outputSample < -32767 {
outputSample = -32767
}
samples[i] = int16(outputSample)
}
}
// processReverb applies reverb effect - like echo but with multiple short delays
func (ep *EffectsProcessor) processReverb(samples []int16) {
bufLen := len(ep.reverseInputBuffer)
// Three quick echoes instead of one long repeating echo
delays := []int{
bufLen / 8, // ~12.5ms
bufLen / 5, // ~20ms
bufLen / 3, // ~33ms
}
gains := []float32{0.3, 0.2, 0.15}
for i := range samples {
// Store current sample
ep.reverseInputBuffer[ep.reverseInputPos] = samples[i]
// Add the three quick echoes
reverbSample := float32(0)
for j := 0; j < len(delays); j++ {
readPos := (ep.reverseInputPos - delays[j] + bufLen) % bufLen
reverbSample += float32(ep.reverseInputBuffer[readPos]) * gains[j]
}
// Mix dry and wet signal
outputSample := float32(samples[i])*0.7 + reverbSample
// Advance position
ep.reverseInputPos = (ep.reverseInputPos + 1) % bufLen
// Apply with clipping
if outputSample > 32767 {
outputSample = 32767
} else if outputSample < -32767 {
outputSample = -32767
}
samples[i] = int16(outputSample)
}
}
// processPitchShift applies pitch shifting using cubic interpolation
func (ep *EffectsProcessor) processPitchShift(samples []int16, ratio float32) {
if ratio == 1.0 {
return
}
bufLen := len(ep.pitchBuffer)
// Copy samples to pitch buffer (maintaining history)
copy(ep.pitchBuffer[bufLen-len(samples):], samples)
// Resample using cubic interpolation for smoother output
for i := range samples {
// Calculate source position
srcPos := float32(bufLen-len(samples)) + float32(i)*ratio
// Bounds check with extra padding for cubic interpolation
if srcPos >= float32(bufLen-2) {
srcPos = float32(bufLen - 3)
}
if srcPos < 1 {
srcPos = 1
}
// Cubic interpolation (Hermite interpolation)
idx := int(srcPos)
frac := srcPos - float32(idx)
// Get 4 samples around the target position
y0 := float32(ep.pitchBuffer[idx-1])
y1 := float32(ep.pitchBuffer[idx])
y2 := float32(ep.pitchBuffer[idx+1])
y3 := float32(ep.pitchBuffer[idx+2])
// Cubic Hermite interpolation
c0 := y1
c1 := 0.5 * (y2 - y0)
c2 := y0 - 2.5*y1 + 2.0*y2 - 0.5*y3
c3 := 0.5*(y3-y0) + 1.5*(y1-y2)
interpolated := c0 + c1*frac + c2*frac*frac + c3*frac*frac*frac
// Soft clipping to reduce harshness
if interpolated > 32767 {
interpolated = 32767
} else if interpolated < -32767 {
interpolated = -32767
}
samples[i] = int16(interpolated)
}
// Shift buffer for next frame
copy(ep.pitchBuffer, ep.pitchBuffer[len(samples):])
}
// processRobot applies ring modulation for robot voice
func (ep *EffectsProcessor) processRobot(samples []int16) {
phaseIncrement := 2.0 * math.Pi * ep.robotFreq / ep.sampleRate
for i := range samples {
// Generate carrier wave (sine wave)
carrier := float32(math.Sin(float64(ep.robotPhase)))
// Ring modulation: multiply signal by carrier
modulated := float32(samples[i]) * (0.5 + carrier*0.5)
// Advance phase
ep.robotPhase += phaseIncrement
if ep.robotPhase >= 2.0*math.Pi {
ep.robotPhase -= 2.0 * math.Pi
}
// Apply with clipping
if modulated > 32767 {
modulated = 32767
} else if modulated < -32767 {
modulated = -32767
}
samples[i] = int16(modulated)
}
}
// processChorus applies chorus effect with multiple delayed voices
func (ep *EffectsProcessor) processChorus(samples []int16) {
// Initialize chorus buffers if needed
for j := range ep.chorusBuffers {
if len(ep.chorusBuffers[j]) == 0 {
ep.chorusBuffers[j] = make([]int16, ep.chorusDelays[j])
}
}
for i := range samples {
output := float32(samples[i]) * 0.4 // Original signal at 40%
// Add multiple chorus voices
for j := 0; j < len(ep.chorusDelays); j++ {
// LFO modulation for slight pitch variation
lfoPhaseInc := 2.0 * math.Pi * ep.chorusRates[j] / ep.sampleRate
lfo := float32(math.Sin(float64(ep.chorusPhases[j])))
ep.chorusPhases[j] += lfoPhaseInc
if ep.chorusPhases[j] >= 2.0*math.Pi {
ep.chorusPhases[j] -= 2.0 * math.Pi
}
// Get delayed sample with LFO modulation
modDelay := int(float32(ep.chorusDelays[j]) * (1.0 + lfo*0.03))
if modDelay >= len(ep.chorusBuffers[j]) {
modDelay = len(ep.chorusBuffers[j]) - 1
}
readPos := (ep.chorusPositions[j] - modDelay + len(ep.chorusBuffers[j])) % len(ep.chorusBuffers[j])
delayedSample := ep.chorusBuffers[j][readPos]
// Add this voice to output (20% each)
output += float32(delayedSample) * 0.2
// Store current sample in buffer
ep.chorusBuffers[j][ep.chorusPositions[j]] = samples[i]
ep.chorusPositions[j] = (ep.chorusPositions[j] + 1) % len(ep.chorusBuffers[j])
}
// Apply with clipping
if output > 32767 {
output = 32767
} else if output < -32767 {
output = -32767
}
samples[i] = int16(output)
}
}
// resetBuffers clears all effect buffers
func (ep *EffectsProcessor) resetBuffers() {
// Clear echo buffer
for i := range ep.echoBuffer {
ep.echoBuffer[i] = 0
}
ep.echoPosition = 0
// Clear reverb buffer
for i := range ep.reverseInputBuffer {
ep.reverseInputBuffer[i] = 0
}
ep.reverseInputPos = 0
// Clear pitch buffer
for i := range ep.pitchBuffer {
ep.pitchBuffer[i] = 0
}
ep.pitchPhase = 0
// Reset robot phase
ep.robotPhase = 0
// Clear chorus buffers
for j := range ep.chorusBuffers {
if len(ep.chorusBuffers[j]) > 0 {
for i := range ep.chorusBuffers[j] {
ep.chorusBuffers[j][i] = 0
}
}
ep.chorusPositions[j] = 0
ep.chorusPhases[j] = 0
}
}

View File

@@ -3,6 +3,7 @@ package main
import (
"crypto/tls"
"git.stormux.org/storm/barnard/audio"
"git.stormux.org/storm/barnard/config"
"git.stormux.org/storm/barnard/gumble/gumble"
"git.stormux.org/storm/barnard/gumble/gumbleopenal"
@@ -44,9 +45,12 @@ type Barnard struct {
// Added for channel muting
MutedChannels map[uint32]bool
// Added for noise suppression
NoiseSuppressor *noise.Suppressor
// Added for voice effects
VoiceEffects *audio.EffectsProcessor
}
func (b *Barnard) StopTransmission() {

View File

@@ -50,6 +50,7 @@ func (b *Barnard) connect(reconnect bool) bool {
b.Stream = stream
b.Stream.AttachStream(b.Client)
b.Stream.SetNoiseProcessor(b.NoiseSuppressor)
b.Stream.SetEffectsProcessor(b.VoiceEffects)
b.Connected = true
return true
}

View File

@@ -19,4 +19,5 @@ type Hotkeys struct {
ScrollToTop *uiterm.Key
ScrollToBottom *uiterm.Key
NoiseSuppressionToggle *uiterm.Key
CycleVoiceEffect *uiterm.Key
}

View File

@@ -28,6 +28,7 @@ type exportableConfig struct {
NotifyCommand *string
NoiseSuppressionEnabled *bool
NoiseSuppressionThreshold *float32
VoiceEffect *int
}
type server struct {
@@ -78,6 +79,7 @@ func (c *Config) LoadConfig() {
ScrollUp: key(uiterm.KeyPgup),
ScrollDown: key(uiterm.KeyPgdn),
NoiseSuppressionToggle: key(uiterm.KeyF9),
CycleVoiceEffect: key(uiterm.KeyF12),
}
if fileExists(c.fn) {
var data []byte
@@ -123,6 +125,10 @@ func (c *Config) LoadConfig() {
threshold := float32(0.02)
jc.NoiseSuppressionThreshold = &threshold
}
if c.config.VoiceEffect == nil {
effect := 0 // Default to EffectNone
jc.VoiceEffect = &effect
}
}
func (c *Config) findServer(address string) *server {
@@ -232,6 +238,18 @@ func (c *Config) SetNoiseSuppressionThreshold(threshold float32) {
c.SaveConfig()
}
func (c *Config) GetVoiceEffect() int {
if c.config.VoiceEffect == nil {
return 0
}
return *c.config.VoiceEffect
}
func (c *Config) SetVoiceEffect(effect int) {
c.config.VoiceEffect = &effect
c.SaveConfig()
}
func (c *Config) UpdateUser(u *gumble.User) {
var j *eUser
var uc *gumble.Client

View File

@@ -17,6 +17,12 @@ type NoiseProcessor interface {
IsEnabled() bool
}
// EffectsProcessor interface for voice effects
type EffectsProcessor interface {
ProcessSamples(samples []int16)
IsEnabled() bool
}
const (
maxBufferSize = 11520 // Max frame size (2880) * bytes per stereo sample (4)
)
@@ -49,9 +55,10 @@ type Stream struct {
deviceSink *openal.Device
contextSink *openal.Context
noiseProcessor NoiseProcessor
micAGC *audio.AGC
noiseProcessor NoiseProcessor
micAGC *audio.AGC
effectsProcessor EffectsProcessor
}
func New(client *gumble.Client, inputDevice *string, outputDevice *string, test bool) (*Stream, error) {
@@ -112,6 +119,14 @@ func (s *Stream) SetNoiseProcessor(np NoiseProcessor) {
s.noiseProcessor = np
}
func (s *Stream) SetEffectsProcessor(ep EffectsProcessor) {
s.effectsProcessor = ep
}
func (s *Stream) GetEffectsProcessor() EffectsProcessor {
return s.effectsProcessor
}
func (s *Stream) Destroy() {
if s.link != nil {
@@ -342,12 +357,17 @@ func (s *Stream) sourceRoutine(inputDevice *string) {
if s.noiseProcessor != nil && s.noiseProcessor.IsEnabled() {
s.noiseProcessor.ProcessSamples(int16Buffer)
}
// Apply AGC to outgoing microphone audio (always enabled)
if s.micAGC != nil {
s.micAGC.ProcessSamples(int16Buffer)
}
// Apply voice effects if available and enabled
if s.effectsProcessor != nil && s.effectsProcessor.IsEnabled() {
s.effectsProcessor.ProcessSamples(int16Buffer)
}
outgoing <- gumble.AudioBuffer(int16Buffer)
}
}

View File

@@ -16,6 +16,7 @@ import (
"crypto/tls"
"flag"
"github.com/alessio/shellescape"
"git.stormux.org/storm/barnard/audio"
"git.stormux.org/storm/barnard/config"
"git.stormux.org/storm/barnard/noise"
@@ -162,6 +163,7 @@ func main() {
Address: *server,
MutedChannels: make(map[uint32]bool),
NoiseSuppressor: noise.NewSuppressor(),
VoiceEffects: audio.NewEffectsProcessor(gumble.AudioSampleRate),
}
b.Config.Buffers = *buffers
@@ -176,7 +178,10 @@ func main() {
}
b.NoiseSuppressor.SetEnabled(enabled)
b.NoiseSuppressor.SetThreshold(b.UserConfig.GetNoiseSuppressionThreshold())
// Configure voice effects
b.VoiceEffects.SetEffect(audio.VoiceEffect(b.UserConfig.GetVoiceEffect()))
b.Config.Username = *username
b.Config.Password = *password

9
ui.go
View File

@@ -99,7 +99,7 @@ func (b *Barnard) OnNoiseSuppressionToggle(ui *uiterm.Ui, key uiterm.Key) {
enabled := !b.UserConfig.GetNoiseSuppressionEnabled()
b.UserConfig.SetNoiseSuppressionEnabled(enabled)
b.NoiseSuppressor.SetEnabled(enabled)
if enabled {
b.UpdateGeneralStatus("Noise suppression: ON", false)
} else {
@@ -107,6 +107,12 @@ func (b *Barnard) OnNoiseSuppressionToggle(ui *uiterm.Ui, key uiterm.Key) {
}
}
func (b *Barnard) OnVoiceEffectCycle(ui *uiterm.Ui, key uiterm.Key) {
effect := b.VoiceEffects.CycleEffect()
b.UserConfig.SetVoiceEffect(int(effect))
b.UpdateGeneralStatus(fmt.Sprintf("Voice effect: %s", effect.String()), false)
}
func (b *Barnard) UpdateGeneralStatus(text string, notice bool) {
if notice {
@@ -323,6 +329,7 @@ func (b *Barnard) OnUiInitialize(ui *uiterm.Ui) {
b.Ui.AddKeyListener(b.OnVoiceToggle, b.Hotkeys.Talk)
b.Ui.AddKeyListener(b.OnTimestampToggle, b.Hotkeys.ToggleTimestamps)
b.Ui.AddKeyListener(b.OnNoiseSuppressionToggle, b.Hotkeys.NoiseSuppressionToggle)
b.Ui.AddKeyListener(b.OnVoiceEffectCycle, b.Hotkeys.CycleVoiceEffect)
b.Ui.AddKeyListener(b.OnQuitPress, b.Hotkeys.Exit)
b.Ui.AddKeyListener(b.OnScrollOutputUp, b.Hotkeys.ScrollUp)
b.Ui.AddKeyListener(b.OnScrollOutputDown, b.Hotkeys.ScrollDown)