Speed updatabase generation. Generate and use a certificate by default.

This commit is contained in:
Storm Dragon
2025-12-12 22:34:11 -05:00
parent 8ededf6408
commit c310a1c318
9 changed files with 246 additions and 140 deletions
+2 -1
View File
@@ -101,7 +101,8 @@ class MumbleBot:
if args.certificate: if args.certificate:
certificate = args.certificate certificate = args.certificate
else: else:
certificate = util.solve_filepath(var.config.get("server", "certificate")) # Get configured cert or auto-generate one if needed
certificate = util.get_or_create_certificate(var.config.get("server", "certificate"))
if args.tokens: if args.tokens:
tokens = args.tokens tokens = args.tokens
+5
View File
@@ -54,6 +54,11 @@ music_database_path = music.db
music_folder = music_folder/ music_folder = music_folder/
pip3_path = venv/bin/pip pip3_path = venv/bin/pip
playback_mode = one-shot playback_mode = one-shot
# Number of parallel workers for database rebuild (scanning music files)
# 0 = auto (recommended: uses all CPU cores minus 1 to leave one free for audio/system)
# 1 = sequential (no parallelization, slowest but lowest resource usage)
# N = use exactly N worker processes (2 or higher for parallel processing)
rebuild_workers = 0
redirect_stderr = True redirect_stderr = True
refresh_cache_on_startup = True refresh_cache_on_startup = True
save_music_library = True save_music_library = True
+10 -13
View File
@@ -16,6 +16,10 @@ port = 64738
#password = #password =
#channel = #channel =
#tokens = token1,token2 #tokens = token1,token2
# 'certificate': Path to client certificate for Mumble authentication.
# If not specified, a self-signed certificate (bragi.pem) will be
# automatically generated in the bot's directory. This provides the bot
# with a persistent identity on the Mumble server.
#certificate = #certificate =
# The [bot] section stores some basic settings for the bot. # The [bot] section stores some basic settings for the bot.
@@ -27,10 +31,6 @@ port = 64738
#comment = "Hi, I'm here to play radio, local music or youtube/soundcloud music. Have fun!" #comment = "Hi, I'm here to play radio, local music or youtube/soundcloud music. Have fun!"
#avatar = #avatar =
# 'language': Language to use; available languages can be found inside
# the lang/ folder.
#language=en_US
# 'music_folder': Folder that stores your local songs. # 'music_folder': Folder that stores your local songs.
#music_folder = music_folder/ #music_folder = music_folder/
@@ -74,8 +74,11 @@ port = 64738
#autoplay_length = 5 #autoplay_length = 5
#clear_when_stop_in_oneshot = False #clear_when_stop_in_oneshot = False
# Auto-update system has been removed from Bragi # 'rebuild_workers': Number of parallel workers for database rebuild (scanning music files).
#target_version = stable # 0 = auto (recommended: uses all CPU cores minus 1 to leave one free for audio/system)
# 1 = sequential (no parallelization, slowest but lowest resource usage)
# N = use exactly N worker processes (2 or higher for parallel processing)
#rebuild_workers = 0
# 'tmp_folder': Folder that music will be downloaded into. # 'tmp_folder': Folder that music will be downloaded into.
# 'tmp_folder_max_size': Maximum size of tmp_folder in MB, or 0 to not cache # 'tmp_folder_max_size': Maximum size of tmp_folder in MB, or 0 to not cache
@@ -89,10 +92,6 @@ port = 64738
# 'download_attempts': How many times to attempt a download. # 'download_attempts': How many times to attempt a download.
#download_attempts = 2 #download_attempts = 2
# Auto-update system has been removed from Bragi
#auto_check_update = False
#pip3_path = venv/bin/pip
# 'logfile': File to write log messages to. # 'logfile': File to write log messages to.
# 'redirect_stderr': Whether to capture outputs from standard error and write # 'redirect_stderr': Whether to capture outputs from standard error and write
# it into the log file. Useful for capturing an exception message when the # it into the log file. Useful for capturing an exception message when the
@@ -105,7 +104,7 @@ port = 64738
#allow_private_message = True #allow_private_message = True
# 'delete_allowed': Whether to allow admins to delete a file from the library # 'delete_allowed': Whether to allow admins to delete a file from the library
# stored on disk. Works for both command and web interfaces. # stored on disk.
#delete_allowed = True #delete_allowed = True
# 'save_music_library': Whether to save music metadata to the database. # 'save_music_library': Whether to save music metadata to the database.
@@ -153,8 +152,6 @@ port = 64738
# query youtube", you should provide a value here. # query youtube", you should provide a value here.
#youtube_query_cookie = {"CONSENT": "paste your CONSENT cookie value here"} #youtube_query_cookie = {"CONSENT": "paste your CONSENT cookie value here"}
# Web interface has been removed from Bragi
# The [debug] section contains settings to enable debugging messages. # The [debug] section contains settings to enable debugging messages.
[debug] [debug]
# 'ffmpeg': Whether to display debug messages from ffmpeg. # 'ffmpeg': Whether to display debug messages from ffmpeg.
+106 -18
View File
@@ -1,10 +1,12 @@
# #
# Bragi - A Mumble music bot # Bragi - A Mumble music bot
# Forked from botamusique by azlux (https://github.com/azlux/botamusque) # Forked from botamusique by azlux (https://github.com/azlux/botamusque)
# #
import logging import logging
import os import os
import multiprocessing
from concurrent.futures import ProcessPoolExecutor, as_completed
import json import json
import threading import threading
@@ -23,6 +25,29 @@ class ItemNotCachedError(Exception):
pass pass
def _process_file_for_cache(file_path):
"""Worker function to process a single file for the cache.
This must be a module-level function for multiprocessing to work.
Args:
file_path: Relative path to the audio file
Returns:
dict: Music item dictionary ready for database insertion, or None on error
"""
try:
# Import inside function to avoid pickling issues
import variables as var
from media.item import item_builders
item = item_builders['file'](path=file_path)
return item.to_dict()
except Exception as e:
# Log errors but don't fail the whole process
logging.getLogger("bot").warning(f"library: failed to process file {file_path}: {e}")
return None
class MusicCache(dict): class MusicCache(dict):
def __init__(self, db: MusicDatabase): def __init__(self, db: MusicDatabase):
super().__init__() super().__init__()
@@ -115,27 +140,90 @@ class MusicCache(dict):
def build_dir_cache(self): def build_dir_cache(self):
self.dir_lock.acquire() self.dir_lock.acquire()
self.log.info("library: rebuild directory cache") try:
files = util.get_recursive_file_list_sorted(var.music_folder) self.log.info("library: rebuild directory cache")
files_list = util.get_recursive_file_list_sorted(var.music_folder)
files_on_disk = set(files_list) # Convert to set for O(1) lookup
# remove deleted files self.log.info(f"library: found {len(files_on_disk)} audio files on disk")
results = self.db.query_music(Condition().or_equal('type', 'file'))
for result in results: # Get all existing file paths from database as a set
if result['path'] not in files: db_paths = set(self.db.query_all_paths())
self.log.debug("library: music file missed: %s, delete from library." % result['path']) self.log.info(f"library: found {len(db_paths)} files in database")
self.db.delete_music(Condition().and_equal('id', result['id']))
# Find files to delete (in DB but not on disk)
files_to_delete = db_paths - files_on_disk
if files_to_delete:
self.log.info(f"library: removing {len(files_to_delete)} deleted files from database")
for path in files_to_delete:
self.log.debug(f"library: music file missed: {path}, delete from library.")
self.db.delete_music(Condition().and_equal('path', path))
# Find new files to add (on disk but not in DB)
new_files = files_on_disk - db_paths
if not new_files:
self.log.info("library: no new files to add")
self.db.manage_special_tags()
return
self.log.info(f"library: processing {len(new_files)} new files with parallel workers")
# Determine number of worker processes from config
# 0 = auto (cpu_count - 1), N = use N workers
configured_workers = var.config.getint('bot', 'rebuild_workers', fallback=0)
if configured_workers == 0:
# Auto mode: use all cores minus one (leave one free for audio/system)
num_workers = max(1, multiprocessing.cpu_count() - 1)
self.log.info(f"library: auto-detected {multiprocessing.cpu_count()} cores, using {num_workers} workers")
else: else:
files.remove(result['path']) # User specified: validate minimum of 1
num_workers = max(1, configured_workers)
if num_workers == 1:
self.log.info("library: using 1 worker (sequential processing)")
else:
self.log.info(f"library: using {num_workers} workers (configured)")
for file in files:
results = self.db.query_music(Condition().and_equal('path', file))
if not results:
item = item_builders['file'](path=file)
self.log.debug("library: music save into database: %s" % item.format_debug_string())
self.db.insert_music(item.to_dict())
self.db.manage_special_tags() # Process files in parallel
self.dir_lock.release() processed_items = []
with ProcessPoolExecutor(max_workers=num_workers) as executor:
# Submit all files for processing
future_to_file = {executor.submit(_process_file_for_cache, file_path): file_path
for file_path in new_files}
# Collect results as they complete
completed = 0
for future in as_completed(future_to_file):
file_path = future_to_file[future]
try:
result = future.result()
if result:
processed_items.append(result)
completed += 1
if completed % 100 == 0:
self.log.info(f"library: processed {completed}/{len(new_files)} files")
except Exception as e:
self.log.warning(f"library: failed to process {file_path}: {e}")
self.log.info(f"library: successfully processed {len(processed_items)} files")
# Batch insert all new items into database
if processed_items:
self.log.info(f"library: inserting {len(processed_items)} items into database")
import sqlite3
conn = sqlite3.connect(self.db.db_path)
try:
for item in processed_items:
self.db.insert_music(item, _conn=conn)
conn.commit()
self.log.info("library: database batch insert completed")
finally:
conn.close()
self.db.manage_special_tags()
self.log.info("library: directory cache rebuild complete")
finally:
self.dir_lock.release()
class CachedItemWrapper: class CachedItemWrapper:
+2 -68
View File
@@ -1,15 +1,12 @@
# #
# Bragi - A Mumble music bot # Bragi - A Mumble music bot
# Forked from botamusique by azlux (https://github.com/azlux/botamusque) # Forked from botamusique by azlux (https://github.com/azlux/botamusque)
# #
import os import os
import re import re
from io import BytesIO
import base64
import hashlib import hashlib
import mutagen import mutagen
from PIL import Image
import util import util
import variables as var import variables as var
@@ -23,7 +20,6 @@ type : file
title title
artist artist
duration duration
thumbnail
user user
''' '''
@@ -52,7 +48,6 @@ class FileItem(BaseItem):
self.path = path self.path = path
self.title = "" self.title = ""
self.artist = "" self.artist = ""
self.thumbnail = None
self.id = hashlib.md5(path.encode()).hexdigest() self.id = hashlib.md5(path.encode()).hexdigest()
if os.path.exists(self.uri()): if os.path.exists(self.uri()):
self._get_info_from_tag() self._get_info_from_tag()
@@ -62,7 +57,6 @@ class FileItem(BaseItem):
else: else:
super().__init__(from_dict) super().__init__(from_dict)
self.artist = from_dict['artist'] self.artist = from_dict['artist']
self.thumbnail = from_dict['thumbnail']
try: try:
self.validate() self.validate()
except ValidationFailedError: except ValidationFailedError:
@@ -95,112 +89,58 @@ class FileItem(BaseItem):
assert path is not None and file_name is not None assert path is not None and file_name is not None
try: try:
im = None
path_thumbnail = os.path.join(path, file_name + ".jpg")
if os.path.isfile(path_thumbnail):
im = Image.open(path_thumbnail)
else:
path_thumbnail = os.path.join(path, "cover.jpg")
if os.path.isfile(path_thumbnail):
im = Image.open(path_thumbnail)
if ext == ".mp3": if ext == ".mp3":
# title: TIT2 # title: TIT2
# artist: TPE1, TPE2 # artist: TPE1, TPE2
# album: TALB # album: TALB
# cover artwork: APIC:
tags = mutagen.File(self.uri()) tags = mutagen.File(self.uri())
if 'TIT2' in tags: if 'TIT2' in tags:
self.title = tags['TIT2'].text[0] self.title = tags['TIT2'].text[0]
if 'TPE1' in tags: # artist if 'TPE1' in tags: # artist
self.artist = tags['TPE1'].text[0] self.artist = tags['TPE1'].text[0]
if im is None:
if "APIC:" in tags:
im = Image.open(BytesIO(tags["APIC:"].data))
elif ext == ".m4a" or ext == ".m4b" or ext == ".mp4" or ext == ".m4p": elif ext == ".m4a" or ext == ".m4b" or ext == ".mp4" or ext == ".m4p":
# title: ©nam (\xa9nam) # title: ©nam (\xa9nam)
# artist: ©ART # artist: ©ART
# album: ©alb # album: ©alb
# cover artwork: covr
tags = mutagen.File(self.uri()) tags = mutagen.File(self.uri())
if '©nam' in tags: if '©nam' in tags:
self.title = tags['©nam'][0] self.title = tags['©nam'][0]
if '©ART' in tags: # artist if '©ART' in tags: # artist
self.artist = tags['©ART'][0] self.artist = tags['©ART'][0]
if im is None:
if "covr" in tags:
im = Image.open(BytesIO(tags["covr"][0]))
elif ext == ".opus": elif ext == ".opus":
# title: 'title' # title: 'title'
# artist: 'artist' # artist: 'artist'
# album: 'album' # album: 'album'
# cover artwork: 'metadata_block_picture', and then:
## |
## |
## v
## Decode string as base64 binary
## |
## v
## Open that binary as a mutagen.flac.Picture
## |
## v
## Extract binary image data
tags = mutagen.File(self.uri()) tags = mutagen.File(self.uri())
if 'title' in tags: if 'title' in tags:
self.title = tags['title'][0] self.title = tags['title'][0]
if 'artist' in tags: if 'artist' in tags:
self.artist = tags['artist'][0] self.artist = tags['artist'][0]
if im is None:
if 'metadata_block_picture' in tags:
pic_as_base64 = tags['metadata_block_picture'][0]
as_flac_picture = mutagen.flac.Picture(base64.b64decode(pic_as_base64))
im = Image.open(BytesIO(as_flac_picture.data))
elif ext == ".flac": elif ext == ".flac":
# title: 'title' # title: 'title'
# artist: 'artist' # artist: 'artist'
# album: 'album' # album: 'album'
# cover artwork: tags.pictures
tags = mutagen.File(self.uri()) tags = mutagen.File(self.uri())
if 'title' in tags: if 'title' in tags:
self.title = tags['title'][0] self.title = tags['title'][0]
if 'artist' in tags: if 'artist' in tags:
self.artist = tags['artist'][0] self.artist = tags['artist'][0]
if im is None:
for flac_picture in tags.pictures:
if flac_picture.type == 3:
im = Image.open(BytesIO(flac_picture.data))
if im:
self.thumbnail = self._prepare_thumbnail(im)
except: except:
pass pass
if not self.title: if not self.title:
self.title = file_name self.title = file_name
@staticmethod
def _prepare_thumbnail(im):
im.thumbnail((100, 100), Image.LANCZOS)
buffer = BytesIO()
im = im.convert('RGB')
im.save(buffer, format="JPEG")
return base64.b64encode(buffer.getvalue()).decode('utf-8')
def to_dict(self): def to_dict(self):
dict = super().to_dict() dict = super().to_dict()
dict['type'] = 'file' dict['type'] = 'file'
dict['path'] = self.path dict['path'] = self.path
dict['title'] = self.title dict['title'] = self.title
dict['artist'] = self.artist dict['artist'] = self.artist
dict['thumbnail'] = self.thumbnail
return dict return dict
def format_debug_string(self): def format_debug_string(self):
@@ -217,13 +157,7 @@ class FileItem(BaseItem):
) )
def format_current_playing(self, user): def format_current_playing(self, user):
display = tr("now_playing", item=self.format_song_string(user)) return tr("now_playing", item=self.format_song_string(user))
if self.thumbnail:
thumbnail_html = '<img width="80" src="data:image/jpge;base64,' + \
self.thumbnail + '"/>'
display += "<br />" + thumbnail_html
return display
def format_title(self): def format_title(self):
title = self.title if self.title else self.path title = self.title if self.title else self.path
+2 -29
View File
@@ -1,4 +1,4 @@
# #
# Bragi - A Mumble music bot # Bragi - A Mumble music bot
# Forked from botamusique by azlux (https://github.com/azlux/botamusque) # Forked from botamusique by azlux (https://github.com/azlux/botamusque)
# #
@@ -8,11 +8,8 @@ import logging
import os import os
import hashlib import hashlib
import traceback import traceback
from PIL import Image
import yt_dlp as youtube_dl import yt_dlp as youtube_dl
import glob import glob
from io import BytesIO
import base64
import util import util
from constants import tr_cli as tr from constants import tr_cli as tr
@@ -52,7 +49,6 @@ class URLItem(BaseItem):
self.duration = 0 self.duration = 0
self.id = hashlib.md5(url.encode()).hexdigest() self.id = hashlib.md5(url.encode()).hexdigest()
self.path = var.tmp_folder + self.id self.path = var.tmp_folder + self.id
self.thumbnail = ""
self.keywords = "" self.keywords = ""
else: else:
super().__init__(from_dict) super().__init__(from_dict)
@@ -60,7 +56,6 @@ class URLItem(BaseItem):
self.duration = from_dict['duration'] self.duration = from_dict['duration']
self.path = from_dict['path'] self.path = from_dict['path']
self.title = from_dict['title'] self.title = from_dict['title']
self.thumbnail = from_dict['thumbnail']
self.downloading = False self.downloading = False
self.type = "url" self.type = "url"
@@ -194,7 +189,6 @@ class URLItem(BaseItem):
'format': 'bestaudio/best', 'format': 'bestaudio/best',
'outtmpl': base_path, 'outtmpl': base_path,
'noplaylist': True, 'noplaylist': True,
'writethumbnail': True,
'updatetime': False, 'updatetime': False,
'verbose': var.config.getboolean('debug', 'youtube_dl'), 'verbose': var.config.getboolean('debug', 'youtube_dl'),
'postprocessors': [{ 'postprocessors': [{
@@ -232,7 +226,6 @@ class URLItem(BaseItem):
self.log.info( self.log.info(
"bot: finished downloading url (%s) %s, saved to %s." % (self.title, self.url, self.path)) "bot: finished downloading url (%s) %s, saved to %s." % (self.title, self.url, self.path))
self.downloading = False self.downloading = False
self._read_thumbnail_from_file(base_path + ".jpg")
self.version += 1 # notify wrapper to save me self.version += 1 # notify wrapper to save me
return True return True
else: else:
@@ -242,18 +235,6 @@ class URLItem(BaseItem):
self.downloading = False self.downloading = False
raise PreparationFailedError(tr('unable_download', item=self.format_title())) raise PreparationFailedError(tr('unable_download', item=self.format_title()))
def _read_thumbnail_from_file(self, path_thumbnail):
if os.path.isfile(path_thumbnail):
im = Image.open(path_thumbnail)
self.thumbnail = self._prepare_thumbnail(im)
def _prepare_thumbnail(self, im):
im.thumbnail((100, 100), Image.LANCZOS)
buffer = BytesIO()
im = im.convert('RGB')
im.save(buffer, format="JPEG")
return base64.b64encode(buffer.getvalue()).decode('utf-8')
def to_dict(self): def to_dict(self):
dict = super().to_dict() dict = super().to_dict()
dict['type'] = 'url' dict['type'] = 'url'
@@ -261,7 +242,6 @@ class URLItem(BaseItem):
dict['duration'] = self.duration dict['duration'] = self.duration
dict['path'] = self.path dict['path'] = self.path
dict['title'] = self.title dict['title'] = self.title
dict['thumbnail'] = self.thumbnail
return dict return dict
@@ -280,14 +260,7 @@ class URLItem(BaseItem):
return self.url return self.url
def format_current_playing(self, user): def format_current_playing(self, user):
display = tr("now_playing", item=self.format_song_string(user)) return tr("now_playing", item=self.format_song_string(user))
if self.thumbnail:
thumbnail_html = '<img width="80" src="data:image/jpge;base64,' + \
self.thumbnail + '"/>'
display += "<br />" + thumbnail_html
return display
def format_title(self): def format_title(self):
return self.title if self.title else self.url return self.title if self.title else self.url
+1 -8
View File
@@ -125,14 +125,7 @@ class PlaylistURLItem(URLItem):
user=user) user=user)
def format_current_playing(self, user): def format_current_playing(self, user):
display = tr("now_playing", item=self.format_song_string(user)) return tr("now_playing", item=self.format_song_string(user))
if self.thumbnail:
thumbnail_html = '<img width="80" src="data:image/jpge;base64,' + \
self.thumbnail + '"/>'
display += "<br />" + thumbnail_html
return display
def display_type(self): def display_type(self):
return tr("url_from_playlist") return tr("url_from_playlist")
+2 -2
View File
@@ -1,6 +1,5 @@
yt-dlp yt-dlp
python-magic python-magic
Pillow
mutagen mutagen
requests requests
packaging packaging
@@ -8,4 +7,5 @@ pyradios
opuslib==3.0.1 opuslib==3.0.1
numpy numpy
protobuf protobuf
pycryptodome pycryptodome
cryptography
+116 -1
View File
@@ -27,6 +27,108 @@ YT_PKG_NAME = 'yt-dlp'
log = logging.getLogger("bot") log = logging.getLogger("bot")
# Default certificate filename for auto-generation
DEFAULT_CERT_NAME = "bragi.pem"
def generate_certificate(cert_path):
"""Generate a self-signed certificate for Mumble authentication.
Args:
cert_path: Path where the certificate file will be saved
Returns:
True if certificate was generated successfully, False otherwise
"""
try:
from cryptography import x509
from cryptography.x509.oid import NameOID
from cryptography.hazmat.primitives import hashes, serialization
from cryptography.hazmat.primitives.asymmetric import rsa
import datetime
log.info(f"certificate: generating new self-signed certificate at {cert_path}")
# Generate RSA private key (2048 bits is standard for this use)
privateKey = rsa.generate_private_key(
public_exponent=65537,
key_size=2048,
)
# Create certificate subject/issuer
subject = issuer = x509.Name([
x509.NameAttribute(NameOID.COMMON_NAME, "Bragi Music Bot"),
x509.NameAttribute(NameOID.ORGANIZATION_NAME, "Bragi"),
])
# Build and sign certificate (valid for 10 years)
cert = (
x509.CertificateBuilder()
.subject_name(subject)
.issuer_name(issuer)
.public_key(privateKey.public_key())
.serial_number(x509.random_serial_number())
.not_valid_before(datetime.datetime.now(datetime.timezone.utc))
.not_valid_after(datetime.datetime.now(datetime.timezone.utc) + datetime.timedelta(days=3650))
.sign(privateKey, hashes.SHA256())
)
# Write both private key and certificate to same PEM file
# (this is the format Mumble expects)
with open(cert_path, "wb") as f:
f.write(privateKey.private_bytes(
encoding=serialization.Encoding.PEM,
format=serialization.PrivateFormat.TraditionalOpenSSL,
encryption_algorithm=serialization.NoEncryption(),
))
f.write(cert.public_bytes(serialization.Encoding.PEM))
log.info("certificate: successfully generated new certificate")
return True
except ImportError:
log.warning("certificate: cryptography library not installed, cannot generate certificate")
return False
except Exception as e:
log.error(f"certificate: failed to generate certificate: {e}")
return False
def get_or_create_certificate(config_cert_path):
"""Get existing certificate or create a new one if needed.
Args:
config_cert_path: Certificate path from config (may be empty)
Returns:
Path to certificate file, or empty string if none available
"""
# If user specified a certificate in config, use that
if config_cert_path:
resolved = solve_filepath(config_cert_path)
if resolved and os.path.exists(resolved):
log.debug(f"certificate: using configured certificate: {resolved}")
return resolved
elif config_cert_path:
log.warning(f"certificate: configured certificate not found: {config_cert_path}")
# Fall through to auto-generation
# Check for existing auto-generated certificate
scriptDir = os.path.dirname(os.path.realpath(__file__))
defaultCertPath = os.path.join(scriptDir, DEFAULT_CERT_NAME)
if os.path.exists(defaultCertPath):
log.debug(f"certificate: using existing auto-generated certificate: {defaultCertPath}")
return defaultCertPath
# Generate new certificate
if generate_certificate(defaultCertPath):
return defaultCertPath
# No certificate available
log.warning("certificate: no certificate available, connecting without one")
return ""
def solve_filepath(path): def solve_filepath(path):
if not path: if not path:
@@ -42,6 +144,12 @@ def solve_filepath(path):
def get_recursive_file_list_sorted(path): def get_recursive_file_list_sorted(path):
# Audio file extensions to include (fast check before expensive magic call)
AUDIO_EXTENSIONS = {
'.mp3', '.flac', '.ogg', '.opus', '.m4a', '.m4b', '.mp4', '.m4p',
'.wav', '.aac', '.wma', '.aiff', '.aif', '.ape', '.mka', '.webm'
}
filelist = [] filelist = []
for root, dirs, files in os.walk(path, topdown=True, onerror=None, followlinks=True): for root, dirs, files in os.walk(path, topdown=True, onerror=None, followlinks=True):
relroot = root.replace(path, '', 1) relroot = root.replace(path, '', 1)
@@ -55,9 +163,16 @@ def get_recursive_file_list_sorted(path):
if not os.access(fullpath, os.R_OK): if not os.access(fullpath, os.R_OK):
continue continue
# Fast path: check extension first (covers 99% of cases)
ext = os.path.splitext(file)[1].lower()
if ext in AUDIO_EXTENSIONS:
filelist.append(os.path.join(relroot, file))
continue
# Slow path: use magic for files without recognized extensions
try: try:
mime = magic.from_file(fullpath, mime=True) mime = magic.from_file(fullpath, mime=True)
if 'audio' in mime or 'audio' in magic.from_file(fullpath).lower() or 'video' in mime: if 'audio' in mime or 'video' in mime:
filelist.append(os.path.join(relroot, file)) filelist.append(os.path.join(relroot, file))
except: except:
pass pass