Speed updatabase generation. Generate and use a certificate by default.

2025-12-12 22:34:11 -05:00
parent 8ededf6408
commit c310a1c318
9 changed files with 246 additions and 140 deletions
@@ -101,7 +101,8 @@ class MumbleBot:
        if args.certificate:
            certificate = args.certificate
        else:
-            certificate = util.solve_filepath(var.config.get("server", "certificate"))
+            # Get configured cert or auto-generate one if needed
            certificate = util.get_or_create_certificate(var.config.get("server", "certificate"))
        if args.tokens:
            tokens = args.tokens
@@ -54,6 +54,11 @@ music_database_path = music.db
 music_folder = music_folder/
 pip3_path = venv/bin/pip
 playback_mode = one-shot
 # Number of parallel workers for database rebuild (scanning music files)
 # 0 = auto (recommended: uses all CPU cores minus 1 to leave one free for audio/system)
 # 1 = sequential (no parallelization, slowest but lowest resource usage)
 # N = use exactly N worker processes (2 or higher for parallel processing)
 rebuild_workers = 0
 redirect_stderr = True
 refresh_cache_on_startup = True
 save_music_library = True
@@ -16,6 +16,10 @@ port = 64738
 #password =
 #channel =
 #tokens = token1,token2
 # 'certificate': Path to client certificate for Mumble authentication.
 #    If not specified, a self-signed certificate (bragi.pem) will be
 #    automatically generated in the bot's directory. This provides the bot
 #    with a persistent identity on the Mumble server.
 #certificate =
 # The [bot] section stores some basic settings for the bot.
@@ -27,10 +31,6 @@ port = 64738
 #comment = "Hi, I'm here to play radio, local music or youtube/soundcloud music. Have fun!"
 #avatar =
 # 'language': Language to use; available languages can be found inside
 #    the lang/ folder.
 #language=en_US
 # 'music_folder': Folder that stores your local songs.
 #music_folder = music_folder/
@@ -74,8 +74,11 @@ port = 64738
 #autoplay_length = 5
 #clear_when_stop_in_oneshot = False
-# Auto-update system has been removed from Bragi
+# 'rebuild_workers': Number of parallel workers for database rebuild (scanning music files).
-#target_version = stable
+#     0 = auto (recommended: uses all CPU cores minus 1 to leave one free for audio/system)
 #     1 = sequential (no parallelization, slowest but lowest resource usage)
 #     N = use exactly N worker processes (2 or higher for parallel processing)
 #rebuild_workers = 0
 # 'tmp_folder': Folder that music will be downloaded into.
 # 'tmp_folder_max_size': Maximum size of tmp_folder in MB, or 0 to not cache
@@ -89,10 +92,6 @@ port = 64738
 # 'download_attempts': How many times to attempt a download.
 #download_attempts = 2
 # Auto-update system has been removed from Bragi
 #auto_check_update = False
 #pip3_path = venv/bin/pip
 # 'logfile': File to write log messages to.
 # 'redirect_stderr': Whether to capture outputs from standard error and write
 #    it into the log file. Useful for capturing an exception message when the
@@ -105,7 +104,7 @@ port = 64738
 #allow_private_message = True
 # 'delete_allowed': Whether to allow admins to delete a file from the library
-#    stored on disk. Works for both command and web interfaces.
+#    stored on disk.
 #delete_allowed = True
 # 'save_music_library': Whether to save music metadata to the database.
@@ -153,8 +152,6 @@ port = 64738
 #    query youtube", you should provide a value here.
 #youtube_query_cookie = {"CONSENT": "paste your CONSENT cookie value here"}
 # Web interface has been removed from Bragi
 # The [debug] section contains settings to enable debugging messages.
 [debug]
 # 'ffmpeg': Whether to display debug messages from ffmpeg.
@@ -1,10 +1,12 @@
-# 
+#
 # Bragi - A Mumble music bot
 # Forked from botamusique by azlux (https://github.com/azlux/botamusque)
 #
 import logging
 import os
 import multiprocessing
 from concurrent.futures import ProcessPoolExecutor, as_completed
 import json
 import threading
@@ -23,6 +25,29 @@ class ItemNotCachedError(Exception):
    pass
 def _process_file_for_cache(file_path):
    """Worker function to process a single file for the cache.
    This must be a module-level function for multiprocessing to work.
    Args:
        file_path: Relative path to the audio file
    Returns:
        dict: Music item dictionary ready for database insertion, or None on error
    """
    try:
        # Import inside function to avoid pickling issues
        import variables as var
        from media.item import item_builders
        item = item_builders['file'](path=file_path)
        return item.to_dict()
    except Exception as e:
        # Log errors but don't fail the whole process
        logging.getLogger("bot").warning(f"library: failed to process file {file_path}: {e}")
        return None
 class MusicCache(dict):
    def __init__(self, db: MusicDatabase):
        super().__init__()
@@ -115,27 +140,90 @@ class MusicCache(dict):
    def build_dir_cache(self):
        self.dir_lock.acquire()
-        self.log.info("library: rebuild directory cache")
+        try:
-        files = util.get_recursive_file_list_sorted(var.music_folder)
+            self.log.info("library: rebuild directory cache")
            files_list = util.get_recursive_file_list_sorted(var.music_folder)
            files_on_disk = set(files_list)  # Convert to set for O(1) lookup
-        # remove deleted files
+            self.log.info(f"library: found {len(files_on_disk)} audio files on disk")
-        results = self.db.query_music(Condition().or_equal('type', 'file'))
+
-        for result in results:
+            # Get all existing file paths from database as a set
-            if result['path'] not in files:
+            db_paths = set(self.db.query_all_paths())
-                self.log.debug("library: music file missed: %s, delete from library." % result['path'])
+            self.log.info(f"library: found {len(db_paths)} files in database")
-                self.db.delete_music(Condition().and_equal('id', result['id']))
+
            # Find files to delete (in DB but not on disk)
            files_to_delete = db_paths - files_on_disk
            if files_to_delete:
                self.log.info(f"library: removing {len(files_to_delete)} deleted files from database")
                for path in files_to_delete:
                    self.log.debug(f"library: music file missed: {path}, delete from library.")
                    self.db.delete_music(Condition().and_equal('path', path))
            # Find new files to add (on disk but not in DB)
            new_files = files_on_disk - db_paths
            if not new_files:
                self.log.info("library: no new files to add")
                self.db.manage_special_tags()
                return
            self.log.info(f"library: processing {len(new_files)} new files with parallel workers")
            # Determine number of worker processes from config
            # 0 = auto (cpu_count - 1), N = use N workers
            configured_workers = var.config.getint('bot', 'rebuild_workers', fallback=0)
            if configured_workers == 0:
                # Auto mode: use all cores minus one (leave one free for audio/system)
                num_workers = max(1, multiprocessing.cpu_count() - 1)
                self.log.info(f"library: auto-detected {multiprocessing.cpu_count()} cores, using {num_workers} workers")
            else:
-                files.remove(result['path'])
+                # User specified: validate minimum of 1
                num_workers = max(1, configured_workers)
                if num_workers == 1:
                    self.log.info("library: using 1 worker (sequential processing)")
                else:
                    self.log.info(f"library: using {num_workers} workers (configured)")
        for file in files:
            results = self.db.query_music(Condition().and_equal('path', file))
            if not results:
                item = item_builders['file'](path=file)
                self.log.debug("library: music save into database: %s" % item.format_debug_string())
                self.db.insert_music(item.to_dict())
-        self.db.manage_special_tags()
+            # Process files in parallel
-        self.dir_lock.release()
+            processed_items = []
            with ProcessPoolExecutor(max_workers=num_workers) as executor:
                # Submit all files for processing
                future_to_file = {executor.submit(_process_file_for_cache, file_path): file_path
                                for file_path in new_files}
                # Collect results as they complete
                completed = 0
                for future in as_completed(future_to_file):
                    file_path = future_to_file[future]
                    try:
                        result = future.result()
                        if result:
                            processed_items.append(result)
                        completed += 1
                        if completed % 100 == 0:
                            self.log.info(f"library: processed {completed}/{len(new_files)} files")
                    except Exception as e:
                        self.log.warning(f"library: failed to process {file_path}: {e}")
            self.log.info(f"library: successfully processed {len(processed_items)} files")
            # Batch insert all new items into database
            if processed_items:
                self.log.info(f"library: inserting {len(processed_items)} items into database")
                import sqlite3
                conn = sqlite3.connect(self.db.db_path)
                try:
                    for item in processed_items:
                        self.db.insert_music(item, _conn=conn)
                    conn.commit()
                    self.log.info("library: database batch insert completed")
                finally:
                    conn.close()
            self.db.manage_special_tags()
            self.log.info("library: directory cache rebuild complete")
        finally:
            self.dir_lock.release()
 class CachedItemWrapper:
@@ -1,15 +1,12 @@
-# 
+#
 # Bragi - A Mumble music bot
 # Forked from botamusique by azlux (https://github.com/azlux/botamusque)
 #
 import os
 import re
 from io import BytesIO
 import base64
 import hashlib
 import mutagen
 from PIL import Image
 import util
 import variables as var
@@ -23,7 +20,6 @@ type : file
    title
    artist
    duration
    thumbnail
    user
 '''
@@ -52,7 +48,6 @@ class FileItem(BaseItem):
            self.path = path
            self.title = ""
            self.artist = ""
            self.thumbnail = None
            self.id = hashlib.md5(path.encode()).hexdigest()
            if os.path.exists(self.uri()):
                self._get_info_from_tag()
@@ -62,7 +57,6 @@ class FileItem(BaseItem):
        else:
            super().__init__(from_dict)
            self.artist = from_dict['artist']
            self.thumbnail = from_dict['thumbnail']
            try:
                self.validate()
            except ValidationFailedError:
@@ -95,112 +89,58 @@ class FileItem(BaseItem):
        assert path is not None and file_name is not None
        try:
            im = None
            path_thumbnail = os.path.join(path, file_name + ".jpg")
            if os.path.isfile(path_thumbnail):
                im = Image.open(path_thumbnail)
            else:
                path_thumbnail = os.path.join(path, "cover.jpg")
                if os.path.isfile(path_thumbnail):
                    im = Image.open(path_thumbnail)
            if ext == ".mp3":
                # title: TIT2
                # artist: TPE1, TPE2
                # album: TALB
                # cover artwork: APIC:
                tags = mutagen.File(self.uri())
                if 'TIT2' in tags:
                    self.title = tags['TIT2'].text[0]
                if 'TPE1' in tags:  # artist
                    self.artist = tags['TPE1'].text[0]
                if im is None:
                    if "APIC:" in tags:
                        im = Image.open(BytesIO(tags["APIC:"].data))
            elif ext == ".m4a" or ext == ".m4b" or ext == ".mp4" or ext == ".m4p":
                # title: ©nam (\xa9nam)
                # artist: ©ART
                # album: ©alb
                # cover artwork: covr
                tags = mutagen.File(self.uri())
                if '©nam' in tags:
                    self.title = tags['©nam'][0]
                if '©ART' in tags:  # artist
                    self.artist = tags['©ART'][0]
                if im is None:
                    if "covr" in tags:
                        im = Image.open(BytesIO(tags["covr"][0]))
            elif ext == ".opus":
                # title: 'title'
                # artist: 'artist'
                # album: 'album'
                # cover artwork: 'metadata_block_picture', and then:
                ##                          |
                ##                          |
                ##                          v
                ##            Decode string as base64 binary
                ##                          |
                ##                          v
                ##      Open that binary as a mutagen.flac.Picture
                ##                          |
                ##                          v
                ##              Extract binary image data
                tags = mutagen.File(self.uri())
                if 'title' in tags:
                    self.title = tags['title'][0]
                if 'artist' in tags:
                    self.artist = tags['artist'][0]
                if im is None:
                    if 'metadata_block_picture' in tags:
                        pic_as_base64 = tags['metadata_block_picture'][0]
                        as_flac_picture = mutagen.flac.Picture(base64.b64decode(pic_as_base64))
                        im = Image.open(BytesIO(as_flac_picture.data))
            elif ext == ".flac":
                # title: 'title'
                # artist: 'artist'
                # album: 'album'
                # cover artwork: tags.pictures
                tags = mutagen.File(self.uri())
                if 'title' in tags:
                    self.title = tags['title'][0]
                if 'artist' in tags:
                    self.artist = tags['artist'][0]
                if im is None:
                    for flac_picture in tags.pictures:
                        if flac_picture.type == 3:
                            im = Image.open(BytesIO(flac_picture.data))
            if im:
                self.thumbnail = self._prepare_thumbnail(im)
        except:
            pass
        if not self.title:
            self.title = file_name
    @staticmethod
    def _prepare_thumbnail(im):
        im.thumbnail((100, 100), Image.LANCZOS)
        buffer = BytesIO()
        im = im.convert('RGB')
        im.save(buffer, format="JPEG")
        return base64.b64encode(buffer.getvalue()).decode('utf-8')
    def to_dict(self):
        dict = super().to_dict()
        dict['type'] = 'file'
        dict['path'] = self.path
        dict['title'] = self.title
        dict['artist'] = self.artist
        dict['thumbnail'] = self.thumbnail
        return dict
    def format_debug_string(self):
@@ -217,13 +157,7 @@ class FileItem(BaseItem):
                  )
    def format_current_playing(self, user):
-        display = tr("now_playing", item=self.format_song_string(user))
+        return tr("now_playing", item=self.format_song_string(user))
        if self.thumbnail:
            thumbnail_html = '<img width="80" src="data:image/jpge;base64,' + \
                             self.thumbnail + '"/>'
            display += "<br />" + thumbnail_html
        return display
    def format_title(self):
        title = self.title if self.title else self.path
@@ -1,4 +1,4 @@
-# 
+#
 # Bragi - A Mumble music bot
 # Forked from botamusique by azlux (https://github.com/azlux/botamusque)
 #
@@ -8,11 +8,8 @@ import logging
 import os
 import hashlib
 import traceback
 from PIL import Image
 import yt_dlp as youtube_dl
 import glob
 from io import BytesIO
 import base64
 import util
 from constants import tr_cli as tr
@@ -52,7 +49,6 @@ class URLItem(BaseItem):
            self.duration = 0
            self.id = hashlib.md5(url.encode()).hexdigest()
            self.path = var.tmp_folder + self.id
            self.thumbnail = ""
            self.keywords = ""
        else:
            super().__init__(from_dict)
@@ -60,7 +56,6 @@ class URLItem(BaseItem):
            self.duration = from_dict['duration']
            self.path = from_dict['path']
            self.title = from_dict['title']
            self.thumbnail = from_dict['thumbnail']
        self.downloading = False
        self.type = "url"
@@ -194,7 +189,6 @@ class URLItem(BaseItem):
            'format': 'bestaudio/best',
            'outtmpl': base_path,
            'noplaylist': True,
            'writethumbnail': True,
            'updatetime': False,
            'verbose': var.config.getboolean('debug', 'youtube_dl'),
            'postprocessors': [{
@@ -232,7 +226,6 @@ class URLItem(BaseItem):
                self.log.info(
                    "bot: finished downloading url (%s) %s, saved to %s." % (self.title, self.url, self.path))
                self.downloading = False
                self._read_thumbnail_from_file(base_path + ".jpg")
                self.version += 1  # notify wrapper to save me
                return True
            else:
@@ -242,18 +235,6 @@ class URLItem(BaseItem):
                self.downloading = False
                raise PreparationFailedError(tr('unable_download', item=self.format_title()))
    def _read_thumbnail_from_file(self, path_thumbnail):
        if os.path.isfile(path_thumbnail):
            im = Image.open(path_thumbnail)
            self.thumbnail = self._prepare_thumbnail(im)
    def _prepare_thumbnail(self, im):
        im.thumbnail((100, 100), Image.LANCZOS)
        buffer = BytesIO()
        im = im.convert('RGB')
        im.save(buffer, format="JPEG")
        return base64.b64encode(buffer.getvalue()).decode('utf-8')
    def to_dict(self):
        dict = super().to_dict()
        dict['type'] = 'url'
@@ -261,7 +242,6 @@ class URLItem(BaseItem):
        dict['duration'] = self.duration
        dict['path'] = self.path
        dict['title'] = self.title
        dict['thumbnail'] = self.thumbnail
        return dict
@@ -280,14 +260,7 @@ class URLItem(BaseItem):
        return self.url
    def format_current_playing(self, user):
-        display = tr("now_playing", item=self.format_song_string(user))
+        return tr("now_playing", item=self.format_song_string(user))
        if self.thumbnail:
            thumbnail_html = '<img width="80" src="data:image/jpge;base64,' + \
                             self.thumbnail + '"/>'
            display += "<br />" + thumbnail_html
        return display
    def format_title(self):
        return self.title if self.title else self.url
@@ -125,14 +125,7 @@ class PlaylistURLItem(URLItem):
                  user=user)
    def format_current_playing(self, user):
-        display = tr("now_playing", item=self.format_song_string(user))
+        return tr("now_playing", item=self.format_song_string(user))
        if self.thumbnail:
            thumbnail_html = '<img width="80" src="data:image/jpge;base64,' + \
                             self.thumbnail + '"/>'
            display += "<br />" + thumbnail_html
        return display
    def display_type(self):
        return tr("url_from_playlist")
@@ -1,6 +1,5 @@
 yt-dlp
 python-magic
 Pillow
 mutagen
 requests
 packaging
@@ -8,4 +7,5 @@ pyradios
 opuslib==3.0.1
 numpy
 protobuf
-pycryptodome
+pycryptodome
 cryptography
@@ -27,6 +27,108 @@ YT_PKG_NAME = 'yt-dlp'
 log = logging.getLogger("bot")
 # Default certificate filename for auto-generation
 DEFAULT_CERT_NAME = "bragi.pem"
 def generate_certificate(cert_path):
    """Generate a self-signed certificate for Mumble authentication.
    Args:
        cert_path: Path where the certificate file will be saved
    Returns:
        True if certificate was generated successfully, False otherwise
    """
    try:
        from cryptography import x509
        from cryptography.x509.oid import NameOID
        from cryptography.hazmat.primitives import hashes, serialization
        from cryptography.hazmat.primitives.asymmetric import rsa
        import datetime
        log.info(f"certificate: generating new self-signed certificate at {cert_path}")
        # Generate RSA private key (2048 bits is standard for this use)
        privateKey = rsa.generate_private_key(
            public_exponent=65537,
            key_size=2048,
        )
        # Create certificate subject/issuer
        subject = issuer = x509.Name([
            x509.NameAttribute(NameOID.COMMON_NAME, "Bragi Music Bot"),
            x509.NameAttribute(NameOID.ORGANIZATION_NAME, "Bragi"),
        ])
        # Build and sign certificate (valid for 10 years)
        cert = (
            x509.CertificateBuilder()
            .subject_name(subject)
            .issuer_name(issuer)
            .public_key(privateKey.public_key())
            .serial_number(x509.random_serial_number())
            .not_valid_before(datetime.datetime.now(datetime.timezone.utc))
            .not_valid_after(datetime.datetime.now(datetime.timezone.utc) + datetime.timedelta(days=3650))
            .sign(privateKey, hashes.SHA256())
        )
        # Write both private key and certificate to same PEM file
        # (this is the format Mumble expects)
        with open(cert_path, "wb") as f:
            f.write(privateKey.private_bytes(
                encoding=serialization.Encoding.PEM,
                format=serialization.PrivateFormat.TraditionalOpenSSL,
                encryption_algorithm=serialization.NoEncryption(),
            ))
            f.write(cert.public_bytes(serialization.Encoding.PEM))
        log.info("certificate: successfully generated new certificate")
        return True
    except ImportError:
        log.warning("certificate: cryptography library not installed, cannot generate certificate")
        return False
    except Exception as e:
        log.error(f"certificate: failed to generate certificate: {e}")
        return False
 def get_or_create_certificate(config_cert_path):
    """Get existing certificate or create a new one if needed.
    Args:
        config_cert_path: Certificate path from config (may be empty)
    Returns:
        Path to certificate file, or empty string if none available
    """
    # If user specified a certificate in config, use that
    if config_cert_path:
        resolved = solve_filepath(config_cert_path)
        if resolved and os.path.exists(resolved):
            log.debug(f"certificate: using configured certificate: {resolved}")
            return resolved
        elif config_cert_path:
            log.warning(f"certificate: configured certificate not found: {config_cert_path}")
            # Fall through to auto-generation
    # Check for existing auto-generated certificate
    scriptDir = os.path.dirname(os.path.realpath(__file__))
    defaultCertPath = os.path.join(scriptDir, DEFAULT_CERT_NAME)
    if os.path.exists(defaultCertPath):
        log.debug(f"certificate: using existing auto-generated certificate: {defaultCertPath}")
        return defaultCertPath
    # Generate new certificate
    if generate_certificate(defaultCertPath):
        return defaultCertPath
    # No certificate available
    log.warning("certificate: no certificate available, connecting without one")
    return ""
 def solve_filepath(path):
    if not path:
@@ -42,6 +144,12 @@ def solve_filepath(path):
 def get_recursive_file_list_sorted(path):
    # Audio file extensions to include (fast check before expensive magic call)
    AUDIO_EXTENSIONS = {
        '.mp3', '.flac', '.ogg', '.opus', '.m4a', '.m4b', '.mp4', '.m4p',
        '.wav', '.aac', '.wma', '.aiff', '.aif', '.ape', '.mka', '.webm'
    }
    filelist = []
    for root, dirs, files in os.walk(path, topdown=True, onerror=None, followlinks=True):
        relroot = root.replace(path, '', 1)
@@ -55,9 +163,16 @@ def get_recursive_file_list_sorted(path):
            if not os.access(fullpath, os.R_OK):
                continue
            # Fast path: check extension first (covers 99% of cases)
            ext = os.path.splitext(file)[1].lower()
            if ext in AUDIO_EXTENSIONS:
                filelist.append(os.path.join(relroot, file))
                continue
            # Slow path: use magic for files without recognized extensions
            try:
                mime = magic.from_file(fullpath, mime=True)
-                if 'audio' in mime or 'audio' in magic.from_file(fullpath).lower() or 'video' in mime:
+                if 'audio' in mime or 'video' in mime:
                    filelist.append(os.path.join(relroot, file))
            except:
                pass