Files
bragi/media/cache.py

354 lines
12 KiB
Python

#
# Bragi - A Mumble music bot
# Forked from botamusique by azlux (https://github.com/azlux/botamusque)
#
import logging
import os
import multiprocessing
from concurrent.futures import ProcessPoolExecutor, as_completed
import json
import threading
from media.item import item_builders, item_id_generators, dict_to_item
import media.file
import media.url
import media.url_from_playlist
import media.radio
from database import MusicDatabase, Condition
import variables as var
import util
class ItemNotCachedError(Exception):
pass
def _process_file_for_cache(file_path):
"""Worker function to process a single file for the cache.
This must be a module-level function for multiprocessing to work.
Args:
file_path: Relative path to the audio file
Returns:
dict: Music item dictionary ready for database insertion, or None on error
"""
try:
# Import inside function to avoid pickling issues
import variables as var
from media.item import item_builders
item = item_builders['file'](path=file_path)
return item.to_dict()
except Exception as e:
# Log errors but don't fail the whole process
logging.getLogger("bot").warning(f"library: failed to process file {file_path}: {e}")
return None
class MusicCache(dict):
def __init__(self, db: MusicDatabase):
super().__init__()
self.db = db
self.log = logging.getLogger("bot")
self.dir_lock = threading.Lock()
def get_item_by_id(self, id):
if id in self:
return self[id]
# if not cached, query the database
item = self.fetch(id)
if item is not None:
self[id] = item
self.log.debug("library: music found in database: %s" % item.format_debug_string())
return item
else:
return None
# print(id)
# raise KeyError("Unable to fetch item from the database! Please try to refresh the cache by !recache.")
def get_item(self, **kwargs):
# kwargs should provide type and id, and parameters to build the item if not existed in the library.
# if cached
if 'id' in kwargs:
id = kwargs['id']
else:
id = item_id_generators[kwargs['type']](**kwargs)
if id in self:
return self[id]
# if not cached, query the database
item = self.fetch(id)
if item is not None:
self[id] = item
self.log.debug("library: music found in database: %s" % item.format_debug_string())
return item
# if not in the database, build one
self[id] = item_builders[kwargs['type']](**kwargs) # newly built item will not be saved immediately
return self[id]
def get_items_by_tags(self, tags):
music_dicts = self.db.query_music_by_tags(tags)
items = []
if music_dicts:
for music_dict in music_dicts:
id = music_dict['id']
self[id] = dict_to_item(music_dict)
items.append(self[id])
return items
def fetch(self, id):
music_dict = self.db.query_music_by_id(id)
if music_dict:
self[id] = dict_to_item(music_dict)
return self[id]
else:
return None
def save(self, id):
self.log.debug("library: music save into database: %s" % self[id].format_debug_string())
self.db.insert_music(self[id].to_dict())
self.db.manage_special_tags()
def free_and_delete(self, id):
item = self.get_item_by_id(id)
if item:
self.log.debug("library: DELETE item from the database: %s" % item.format_debug_string())
if item.type == 'url':
if os.path.exists(item.path):
os.remove(item.path)
if item.id in self:
del self[item.id]
self.db.delete_music(Condition().and_equal("id", item.id))
def free(self, id):
if id in self:
self.log.debug("library: cache freed for item: %s" % self[id].format_debug_string())
del self[id]
def free_all(self):
self.log.debug("library: all cache freed")
self.clear()
def build_dir_cache(self):
self.dir_lock.acquire()
try:
self.log.info("library: rebuild directory cache")
files_list = util.get_recursive_file_list_sorted(var.music_folder)
files_on_disk = set(files_list) # Convert to set for O(1) lookup
self.log.info(f"library: found {len(files_on_disk)} audio files on disk")
# Get all existing file paths from database as a set
db_paths = set(self.db.query_all_paths())
self.log.info(f"library: found {len(db_paths)} files in database")
# Find files to delete (in DB but not on disk)
files_to_delete = db_paths - files_on_disk
if files_to_delete:
self.log.info(f"library: removing {len(files_to_delete)} deleted files from database")
for path in files_to_delete:
self.log.debug(f"library: music file missed: {path}, delete from library.")
self.db.delete_music(Condition().and_equal('path', path))
# Find new files to add (on disk but not in DB)
new_files = files_on_disk - db_paths
if not new_files:
self.log.info("library: no new files to add")
self.db.manage_special_tags()
return
self.log.info(f"library: processing {len(new_files)} new files with parallel workers")
# Determine number of worker processes from config
# 0 = auto (cpu_count - 1), N = use N workers
configured_workers = var.config.getint('bot', 'rebuild_workers', fallback=0)
if configured_workers == 0:
# Auto mode: use all cores minus one (leave one free for audio/system)
num_workers = max(1, multiprocessing.cpu_count() - 1)
self.log.info(f"library: auto-detected {multiprocessing.cpu_count()} cores, using {num_workers} workers")
else:
# User specified: validate minimum of 1
num_workers = max(1, configured_workers)
if num_workers == 1:
self.log.info("library: using 1 worker (sequential processing)")
else:
self.log.info(f"library: using {num_workers} workers (configured)")
# Process files in parallel
processed_items = []
with ProcessPoolExecutor(max_workers=num_workers) as executor:
# Submit all files for processing
future_to_file = {executor.submit(_process_file_for_cache, file_path): file_path
for file_path in new_files}
# Collect results as they complete
completed = 0
for future in as_completed(future_to_file):
file_path = future_to_file[future]
try:
result = future.result()
if result:
processed_items.append(result)
completed += 1
if completed % 100 == 0:
self.log.info(f"library: processed {completed}/{len(new_files)} files")
except Exception as e:
self.log.warning(f"library: failed to process {file_path}: {e}")
self.log.info(f"library: successfully processed {len(processed_items)} files")
# Batch insert all new items into database
if processed_items:
self.log.info(f"library: inserting {len(processed_items)} items into database")
import sqlite3
conn = sqlite3.connect(self.db.db_path)
try:
for item in processed_items:
self.db.insert_music(item, _conn=conn)
conn.commit()
self.log.info("library: database batch insert completed")
finally:
conn.close()
self.db.manage_special_tags()
self.log.info("library: directory cache rebuild complete")
finally:
self.dir_lock.release()
class CachedItemWrapper:
def __init__(self, lib, id, type, user, silent=False):
self.lib = lib
self.id = id
self.user = user
self.type = type
self.silent = silent # If True, don't announce when playing (for join sounds)
self.log = logging.getLogger("bot")
self.version = 0
def item(self):
if self.id in self.lib:
return self.lib[self.id]
else:
raise ItemNotCachedError(f"Uncached item of id {self.id}, type {self.type}.")
def to_dict(self):
dict = self.item().to_dict()
dict['user'] = self.user
return dict
def validate(self):
ret = self.item().validate()
if ret and self.item().version > self.version:
self.version = self.item().version
self.lib.save(self.id)
return ret
def prepare(self):
ret = self.item().prepare()
if ret and self.item().version > self.version:
self.version = self.item().version
self.lib.save(self.id)
return ret
def uri(self):
return self.item().uri()
def add_tags(self, tags):
self.item().add_tags(tags)
if self.item().version > self.version:
self.version = self.item().version
self.lib.save(self.id)
def remove_tags(self, tags):
self.item().remove_tags(tags)
if self.item().version > self.version:
self.version = self.item().version
self.lib.save(self.id)
def clear_tags(self):
self.item().clear_tags()
if self.item().version > self.version:
self.version = self.item().version
self.lib.save(self.id)
def is_ready(self):
return self.item().is_ready()
def is_failed(self):
return self.item().is_failed()
def format_current_playing(self):
return self.item().format_current_playing(self.user)
def format_song_string(self):
return self.item().format_song_string(self.user)
def format_title(self):
return self.item().format_title()
def format_debug_string(self):
return self.item().format_debug_string()
def display_type(self):
return self.item().display_type()
# Remember!!! Get wrapper functions will automatically add items into the cache!
def get_cached_wrapper(item, user, silent=False):
if item:
var.cache[item.id] = item
return CachedItemWrapper(var.cache, item.id, item.type, user, silent=silent)
return None
def get_cached_wrappers(items, user):
wrappers = []
for item in items:
if item:
wrappers.append(get_cached_wrapper(item, user))
return wrappers
def get_cached_wrapper_from_scrap(**kwargs):
item = var.cache.get_item(**kwargs)
if 'user' not in kwargs:
raise KeyError("Which user added this song?")
silent = kwargs.get('silent', False)
return CachedItemWrapper(var.cache, item.id, kwargs['type'], kwargs['user'], silent=silent)
def get_cached_wrapper_from_dict(dict_from_db, user, silent=False):
if dict_from_db:
item = dict_to_item(dict_from_db)
return get_cached_wrapper(item, user, silent=silent)
return None
def get_cached_wrappers_from_dicts(dicts_from_db, user):
items = []
for dict_from_db in dicts_from_db:
if dict_from_db:
items.append(get_cached_wrapper_from_dict(dict_from_db, user))
return items
def get_cached_wrapper_by_id(id, user, silent=False):
item = var.cache.get_item_by_id(id)
if item:
return CachedItemWrapper(var.cache, item.id, item.type, user, silent=silent)
def get_cached_wrappers_by_tags(tags, user):
items = var.cache.get_items_by_tags(tags)
ret = []
for item in items:
ret.append(CachedItemWrapper(var.cache, item.id, item.type, user))
return ret