diff --git a/bragi.py b/bragi.py index c4d46f2..0d3d99d 100755 --- a/bragi.py +++ b/bragi.py @@ -101,7 +101,8 @@ class MumbleBot: if args.certificate: certificate = args.certificate else: - certificate = util.solve_filepath(var.config.get("server", "certificate")) + # Get configured cert or auto-generate one if needed + certificate = util.get_or_create_certificate(var.config.get("server", "certificate")) if args.tokens: tokens = args.tokens diff --git a/configuration.default.ini b/configuration.default.ini index af235a7..cdfd4e8 100644 --- a/configuration.default.ini +++ b/configuration.default.ini @@ -54,6 +54,11 @@ music_database_path = music.db music_folder = music_folder/ pip3_path = venv/bin/pip playback_mode = one-shot +# Number of parallel workers for database rebuild (scanning music files) +# 0 = auto (recommended: uses all CPU cores minus 1 to leave one free for audio/system) +# 1 = sequential (no parallelization, slowest but lowest resource usage) +# N = use exactly N worker processes (2 or higher for parallel processing) +rebuild_workers = 0 redirect_stderr = True refresh_cache_on_startup = True save_music_library = True diff --git a/configuration.example.ini b/configuration.example.ini index ff9bb63..318ce4a 100644 --- a/configuration.example.ini +++ b/configuration.example.ini @@ -16,6 +16,10 @@ port = 64738 #password = #channel = #tokens = token1,token2 +# 'certificate': Path to client certificate for Mumble authentication. +# If not specified, a self-signed certificate (bragi.pem) will be +# automatically generated in the bot's directory. This provides the bot +# with a persistent identity on the Mumble server. #certificate = # The [bot] section stores some basic settings for the bot. @@ -27,10 +31,6 @@ port = 64738 #comment = "Hi, I'm here to play radio, local music or youtube/soundcloud music. Have fun!" #avatar = -# 'language': Language to use; available languages can be found inside -# the lang/ folder. -#language=en_US - # 'music_folder': Folder that stores your local songs. #music_folder = music_folder/ @@ -74,8 +74,11 @@ port = 64738 #autoplay_length = 5 #clear_when_stop_in_oneshot = False -# Auto-update system has been removed from Bragi -#target_version = stable +# 'rebuild_workers': Number of parallel workers for database rebuild (scanning music files). +# 0 = auto (recommended: uses all CPU cores minus 1 to leave one free for audio/system) +# 1 = sequential (no parallelization, slowest but lowest resource usage) +# N = use exactly N worker processes (2 or higher for parallel processing) +#rebuild_workers = 0 # 'tmp_folder': Folder that music will be downloaded into. # 'tmp_folder_max_size': Maximum size of tmp_folder in MB, or 0 to not cache @@ -89,10 +92,6 @@ port = 64738 # 'download_attempts': How many times to attempt a download. #download_attempts = 2 -# Auto-update system has been removed from Bragi -#auto_check_update = False -#pip3_path = venv/bin/pip - # 'logfile': File to write log messages to. # 'redirect_stderr': Whether to capture outputs from standard error and write # it into the log file. Useful for capturing an exception message when the @@ -105,7 +104,7 @@ port = 64738 #allow_private_message = True # 'delete_allowed': Whether to allow admins to delete a file from the library -# stored on disk. Works for both command and web interfaces. +# stored on disk. #delete_allowed = True # 'save_music_library': Whether to save music metadata to the database. @@ -153,8 +152,6 @@ port = 64738 # query youtube", you should provide a value here. #youtube_query_cookie = {"CONSENT": "paste your CONSENT cookie value here"} -# Web interface has been removed from Bragi - # The [debug] section contains settings to enable debugging messages. [debug] # 'ffmpeg': Whether to display debug messages from ffmpeg. diff --git a/media/cache.py b/media/cache.py index 7a6cd00..ea1c1f2 100644 --- a/media/cache.py +++ b/media/cache.py @@ -1,10 +1,12 @@ -# +# # Bragi - A Mumble music bot # Forked from botamusique by azlux (https://github.com/azlux/botamusque) # import logging import os +import multiprocessing +from concurrent.futures import ProcessPoolExecutor, as_completed import json import threading @@ -23,6 +25,29 @@ class ItemNotCachedError(Exception): pass +def _process_file_for_cache(file_path): + """Worker function to process a single file for the cache. + This must be a module-level function for multiprocessing to work. + + Args: + file_path: Relative path to the audio file + + Returns: + dict: Music item dictionary ready for database insertion, or None on error + """ + try: + # Import inside function to avoid pickling issues + import variables as var + from media.item import item_builders + + item = item_builders['file'](path=file_path) + return item.to_dict() + except Exception as e: + # Log errors but don't fail the whole process + logging.getLogger("bot").warning(f"library: failed to process file {file_path}: {e}") + return None + + class MusicCache(dict): def __init__(self, db: MusicDatabase): super().__init__() @@ -115,27 +140,90 @@ class MusicCache(dict): def build_dir_cache(self): self.dir_lock.acquire() - self.log.info("library: rebuild directory cache") - files = util.get_recursive_file_list_sorted(var.music_folder) + try: + self.log.info("library: rebuild directory cache") + files_list = util.get_recursive_file_list_sorted(var.music_folder) + files_on_disk = set(files_list) # Convert to set for O(1) lookup - # remove deleted files - results = self.db.query_music(Condition().or_equal('type', 'file')) - for result in results: - if result['path'] not in files: - self.log.debug("library: music file missed: %s, delete from library." % result['path']) - self.db.delete_music(Condition().and_equal('id', result['id'])) + self.log.info(f"library: found {len(files_on_disk)} audio files on disk") + + # Get all existing file paths from database as a set + db_paths = set(self.db.query_all_paths()) + self.log.info(f"library: found {len(db_paths)} files in database") + + # Find files to delete (in DB but not on disk) + files_to_delete = db_paths - files_on_disk + if files_to_delete: + self.log.info(f"library: removing {len(files_to_delete)} deleted files from database") + for path in files_to_delete: + self.log.debug(f"library: music file missed: {path}, delete from library.") + self.db.delete_music(Condition().and_equal('path', path)) + + # Find new files to add (on disk but not in DB) + new_files = files_on_disk - db_paths + if not new_files: + self.log.info("library: no new files to add") + self.db.manage_special_tags() + return + + self.log.info(f"library: processing {len(new_files)} new files with parallel workers") + + # Determine number of worker processes from config + # 0 = auto (cpu_count - 1), N = use N workers + configured_workers = var.config.getint('bot', 'rebuild_workers', fallback=0) + if configured_workers == 0: + # Auto mode: use all cores minus one (leave one free for audio/system) + num_workers = max(1, multiprocessing.cpu_count() - 1) + self.log.info(f"library: auto-detected {multiprocessing.cpu_count()} cores, using {num_workers} workers") else: - files.remove(result['path']) + # User specified: validate minimum of 1 + num_workers = max(1, configured_workers) + if num_workers == 1: + self.log.info("library: using 1 worker (sequential processing)") + else: + self.log.info(f"library: using {num_workers} workers (configured)") - for file in files: - results = self.db.query_music(Condition().and_equal('path', file)) - if not results: - item = item_builders['file'](path=file) - self.log.debug("library: music save into database: %s" % item.format_debug_string()) - self.db.insert_music(item.to_dict()) - self.db.manage_special_tags() - self.dir_lock.release() + # Process files in parallel + processed_items = [] + with ProcessPoolExecutor(max_workers=num_workers) as executor: + # Submit all files for processing + future_to_file = {executor.submit(_process_file_for_cache, file_path): file_path + for file_path in new_files} + + # Collect results as they complete + completed = 0 + for future in as_completed(future_to_file): + file_path = future_to_file[future] + try: + result = future.result() + if result: + processed_items.append(result) + completed += 1 + if completed % 100 == 0: + self.log.info(f"library: processed {completed}/{len(new_files)} files") + except Exception as e: + self.log.warning(f"library: failed to process {file_path}: {e}") + + self.log.info(f"library: successfully processed {len(processed_items)} files") + + # Batch insert all new items into database + if processed_items: + self.log.info(f"library: inserting {len(processed_items)} items into database") + import sqlite3 + conn = sqlite3.connect(self.db.db_path) + try: + for item in processed_items: + self.db.insert_music(item, _conn=conn) + conn.commit() + self.log.info("library: database batch insert completed") + finally: + conn.close() + + self.db.manage_special_tags() + self.log.info("library: directory cache rebuild complete") + finally: + self.dir_lock.release() class CachedItemWrapper: diff --git a/media/file.py b/media/file.py index 54346f1..1d6cdac 100644 --- a/media/file.py +++ b/media/file.py @@ -1,15 +1,12 @@ -# +# # Bragi - A Mumble music bot # Forked from botamusique by azlux (https://github.com/azlux/botamusque) # import os import re -from io import BytesIO -import base64 import hashlib import mutagen -from PIL import Image import util import variables as var @@ -23,7 +20,6 @@ type : file title artist duration - thumbnail user ''' @@ -52,7 +48,6 @@ class FileItem(BaseItem): self.path = path self.title = "" self.artist = "" - self.thumbnail = None self.id = hashlib.md5(path.encode()).hexdigest() if os.path.exists(self.uri()): self._get_info_from_tag() @@ -62,7 +57,6 @@ class FileItem(BaseItem): else: super().__init__(from_dict) self.artist = from_dict['artist'] - self.thumbnail = from_dict['thumbnail'] try: self.validate() except ValidationFailedError: @@ -95,112 +89,58 @@ class FileItem(BaseItem): assert path is not None and file_name is not None try: - im = None - path_thumbnail = os.path.join(path, file_name + ".jpg") - - if os.path.isfile(path_thumbnail): - im = Image.open(path_thumbnail) - else: - path_thumbnail = os.path.join(path, "cover.jpg") - if os.path.isfile(path_thumbnail): - im = Image.open(path_thumbnail) - if ext == ".mp3": # title: TIT2 # artist: TPE1, TPE2 # album: TALB - # cover artwork: APIC: tags = mutagen.File(self.uri()) if 'TIT2' in tags: self.title = tags['TIT2'].text[0] if 'TPE1' in tags: # artist self.artist = tags['TPE1'].text[0] - if im is None: - if "APIC:" in tags: - im = Image.open(BytesIO(tags["APIC:"].data)) - elif ext == ".m4a" or ext == ".m4b" or ext == ".mp4" or ext == ".m4p": # title: ©nam (\xa9nam) # artist: ©ART # album: ©alb - # cover artwork: covr tags = mutagen.File(self.uri()) if '©nam' in tags: self.title = tags['©nam'][0] if '©ART' in tags: # artist self.artist = tags['©ART'][0] - if im is None: - if "covr" in tags: - im = Image.open(BytesIO(tags["covr"][0])) - elif ext == ".opus": # title: 'title' # artist: 'artist' # album: 'album' - # cover artwork: 'metadata_block_picture', and then: - ## | - ## | - ## v - ## Decode string as base64 binary - ## | - ## v - ## Open that binary as a mutagen.flac.Picture - ## | - ## v - ## Extract binary image data tags = mutagen.File(self.uri()) if 'title' in tags: self.title = tags['title'][0] if 'artist' in tags: self.artist = tags['artist'][0] - if im is None: - if 'metadata_block_picture' in tags: - pic_as_base64 = tags['metadata_block_picture'][0] - as_flac_picture = mutagen.flac.Picture(base64.b64decode(pic_as_base64)) - im = Image.open(BytesIO(as_flac_picture.data)) - elif ext == ".flac": # title: 'title' # artist: 'artist' # album: 'album' - # cover artwork: tags.pictures tags = mutagen.File(self.uri()) if 'title' in tags: self.title = tags['title'][0] if 'artist' in tags: self.artist = tags['artist'][0] - if im is None: - for flac_picture in tags.pictures: - if flac_picture.type == 3: - im = Image.open(BytesIO(flac_picture.data)) - - if im: - self.thumbnail = self._prepare_thumbnail(im) except: pass if not self.title: self.title = file_name - @staticmethod - def _prepare_thumbnail(im): - im.thumbnail((100, 100), Image.LANCZOS) - buffer = BytesIO() - im = im.convert('RGB') - im.save(buffer, format="JPEG") - return base64.b64encode(buffer.getvalue()).decode('utf-8') - def to_dict(self): dict = super().to_dict() dict['type'] = 'file' dict['path'] = self.path dict['title'] = self.title dict['artist'] = self.artist - dict['thumbnail'] = self.thumbnail return dict def format_debug_string(self): @@ -217,13 +157,7 @@ class FileItem(BaseItem): ) def format_current_playing(self, user): - display = tr("now_playing", item=self.format_song_string(user)) - if self.thumbnail: - thumbnail_html = '' - display += "
" + thumbnail_html - - return display + return tr("now_playing", item=self.format_song_string(user)) def format_title(self): title = self.title if self.title else self.path diff --git a/media/url.py b/media/url.py index 384b554..15ba911 100644 --- a/media/url.py +++ b/media/url.py @@ -1,4 +1,4 @@ -# +# # Bragi - A Mumble music bot # Forked from botamusique by azlux (https://github.com/azlux/botamusque) # @@ -8,11 +8,8 @@ import logging import os import hashlib import traceback -from PIL import Image import yt_dlp as youtube_dl import glob -from io import BytesIO -import base64 import util from constants import tr_cli as tr @@ -52,7 +49,6 @@ class URLItem(BaseItem): self.duration = 0 self.id = hashlib.md5(url.encode()).hexdigest() self.path = var.tmp_folder + self.id - self.thumbnail = "" self.keywords = "" else: super().__init__(from_dict) @@ -60,7 +56,6 @@ class URLItem(BaseItem): self.duration = from_dict['duration'] self.path = from_dict['path'] self.title = from_dict['title'] - self.thumbnail = from_dict['thumbnail'] self.downloading = False self.type = "url" @@ -194,7 +189,6 @@ class URLItem(BaseItem): 'format': 'bestaudio/best', 'outtmpl': base_path, 'noplaylist': True, - 'writethumbnail': True, 'updatetime': False, 'verbose': var.config.getboolean('debug', 'youtube_dl'), 'postprocessors': [{ @@ -232,7 +226,6 @@ class URLItem(BaseItem): self.log.info( "bot: finished downloading url (%s) %s, saved to %s." % (self.title, self.url, self.path)) self.downloading = False - self._read_thumbnail_from_file(base_path + ".jpg") self.version += 1 # notify wrapper to save me return True else: @@ -242,18 +235,6 @@ class URLItem(BaseItem): self.downloading = False raise PreparationFailedError(tr('unable_download', item=self.format_title())) - def _read_thumbnail_from_file(self, path_thumbnail): - if os.path.isfile(path_thumbnail): - im = Image.open(path_thumbnail) - self.thumbnail = self._prepare_thumbnail(im) - - def _prepare_thumbnail(self, im): - im.thumbnail((100, 100), Image.LANCZOS) - buffer = BytesIO() - im = im.convert('RGB') - im.save(buffer, format="JPEG") - return base64.b64encode(buffer.getvalue()).decode('utf-8') - def to_dict(self): dict = super().to_dict() dict['type'] = 'url' @@ -261,7 +242,6 @@ class URLItem(BaseItem): dict['duration'] = self.duration dict['path'] = self.path dict['title'] = self.title - dict['thumbnail'] = self.thumbnail return dict @@ -280,14 +260,7 @@ class URLItem(BaseItem): return self.url def format_current_playing(self, user): - display = tr("now_playing", item=self.format_song_string(user)) - - if self.thumbnail: - thumbnail_html = '' - display += "
" + thumbnail_html - - return display + return tr("now_playing", item=self.format_song_string(user)) def format_title(self): return self.title if self.title else self.url diff --git a/media/url_from_playlist.py b/media/url_from_playlist.py index 77a8405..0b5355f 100644 --- a/media/url_from_playlist.py +++ b/media/url_from_playlist.py @@ -125,14 +125,7 @@ class PlaylistURLItem(URLItem): user=user) def format_current_playing(self, user): - display = tr("now_playing", item=self.format_song_string(user)) - - if self.thumbnail: - thumbnail_html = '' - display += "
" + thumbnail_html - - return display + return tr("now_playing", item=self.format_song_string(user)) def display_type(self): return tr("url_from_playlist") diff --git a/requirements.txt b/requirements.txt index 501edf7..64a9cdb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,5 @@ yt-dlp python-magic -Pillow mutagen requests packaging @@ -8,4 +7,5 @@ pyradios opuslib==3.0.1 numpy protobuf -pycryptodome \ No newline at end of file +pycryptodome +cryptography \ No newline at end of file diff --git a/util.py b/util.py index d7139d1..33903c3 100644 --- a/util.py +++ b/util.py @@ -27,6 +27,108 @@ YT_PKG_NAME = 'yt-dlp' log = logging.getLogger("bot") +# Default certificate filename for auto-generation +DEFAULT_CERT_NAME = "bragi.pem" + + +def generate_certificate(cert_path): + """Generate a self-signed certificate for Mumble authentication. + + Args: + cert_path: Path where the certificate file will be saved + + Returns: + True if certificate was generated successfully, False otherwise + """ + try: + from cryptography import x509 + from cryptography.x509.oid import NameOID + from cryptography.hazmat.primitives import hashes, serialization + from cryptography.hazmat.primitives.asymmetric import rsa + import datetime + + log.info(f"certificate: generating new self-signed certificate at {cert_path}") + + # Generate RSA private key (2048 bits is standard for this use) + privateKey = rsa.generate_private_key( + public_exponent=65537, + key_size=2048, + ) + + # Create certificate subject/issuer + subject = issuer = x509.Name([ + x509.NameAttribute(NameOID.COMMON_NAME, "Bragi Music Bot"), + x509.NameAttribute(NameOID.ORGANIZATION_NAME, "Bragi"), + ]) + + # Build and sign certificate (valid for 10 years) + cert = ( + x509.CertificateBuilder() + .subject_name(subject) + .issuer_name(issuer) + .public_key(privateKey.public_key()) + .serial_number(x509.random_serial_number()) + .not_valid_before(datetime.datetime.now(datetime.timezone.utc)) + .not_valid_after(datetime.datetime.now(datetime.timezone.utc) + datetime.timedelta(days=3650)) + .sign(privateKey, hashes.SHA256()) + ) + + # Write both private key and certificate to same PEM file + # (this is the format Mumble expects) + with open(cert_path, "wb") as f: + f.write(privateKey.private_bytes( + encoding=serialization.Encoding.PEM, + format=serialization.PrivateFormat.TraditionalOpenSSL, + encryption_algorithm=serialization.NoEncryption(), + )) + f.write(cert.public_bytes(serialization.Encoding.PEM)) + + log.info("certificate: successfully generated new certificate") + return True + + except ImportError: + log.warning("certificate: cryptography library not installed, cannot generate certificate") + return False + except Exception as e: + log.error(f"certificate: failed to generate certificate: {e}") + return False + + +def get_or_create_certificate(config_cert_path): + """Get existing certificate or create a new one if needed. + + Args: + config_cert_path: Certificate path from config (may be empty) + + Returns: + Path to certificate file, or empty string if none available + """ + # If user specified a certificate in config, use that + if config_cert_path: + resolved = solve_filepath(config_cert_path) + if resolved and os.path.exists(resolved): + log.debug(f"certificate: using configured certificate: {resolved}") + return resolved + elif config_cert_path: + log.warning(f"certificate: configured certificate not found: {config_cert_path}") + # Fall through to auto-generation + + # Check for existing auto-generated certificate + scriptDir = os.path.dirname(os.path.realpath(__file__)) + defaultCertPath = os.path.join(scriptDir, DEFAULT_CERT_NAME) + + if os.path.exists(defaultCertPath): + log.debug(f"certificate: using existing auto-generated certificate: {defaultCertPath}") + return defaultCertPath + + # Generate new certificate + if generate_certificate(defaultCertPath): + return defaultCertPath + + # No certificate available + log.warning("certificate: no certificate available, connecting without one") + return "" + def solve_filepath(path): if not path: @@ -42,6 +144,12 @@ def solve_filepath(path): def get_recursive_file_list_sorted(path): + # Audio file extensions to include (fast check before expensive magic call) + AUDIO_EXTENSIONS = { + '.mp3', '.flac', '.ogg', '.opus', '.m4a', '.m4b', '.mp4', '.m4p', + '.wav', '.aac', '.wma', '.aiff', '.aif', '.ape', '.mka', '.webm' + } + filelist = [] for root, dirs, files in os.walk(path, topdown=True, onerror=None, followlinks=True): relroot = root.replace(path, '', 1) @@ -55,9 +163,16 @@ def get_recursive_file_list_sorted(path): if not os.access(fullpath, os.R_OK): continue + # Fast path: check extension first (covers 99% of cases) + ext = os.path.splitext(file)[1].lower() + if ext in AUDIO_EXTENSIONS: + filelist.append(os.path.join(relroot, file)) + continue + + # Slow path: use magic for files without recognized extensions try: mime = magic.from_file(fullpath, mime=True) - if 'audio' in mime or 'audio' in magic.from_file(fullpath).lower() or 'video' in mime: + if 'audio' in mime or 'video' in mime: filelist.append(os.path.join(relroot, file)) except: pass