update update

2026-04-28 21:06:26 +02:00
parent 7b2135bfed
commit b4254c9e06
28 changed files with 2032 additions and 547 deletions
@@ -31,6 +31,7 @@ import subprocess
 import pty
 from typing import List, Optional
 from dataclasses import dataclass
+from datetime import timedelta

 console = Console()

@@ -378,10 +379,10 @@ class RecordTwitch:
        os.makedirs(output_directory, exist_ok=True)
        os.makedirs(in_record_directory, exist_ok=True)

+        # On encode l'heure de démarrage dans le nom pour pouvoir reconstruire une timeline stable
+        # et limiter le "drift" causé par le temps de transcription.
        timestamp_complet = datetime.now().strftime("%Y%m%d-%H%M%S")
-        timestamp = datetime.now().strftime("%Y-%m-%d")
-
-        output_file_path = os.path.join(in_record_directory, f"{timestamp}_%03d.mp3")
+        output_file_path = os.path.join(in_record_directory, f"{timestamp_complet}_%03d.mp3")

        sprint(self.script_name,"green", f"start record")
        thread_compteur = Thread(target=self.compteur, daemon=True)
@@ -565,13 +566,24 @@ class TwitchChatBot:


 class Subtitle_translation:
-    def __init__(self, pathjson):
+    def __init__(
+        self,
+        pathjson,
+        *,
+        storage_key: str = "subtitle_data",
+        on_new_subtitle: Optional[callable] = None,
+        segment_seconds: int = 30,
+        max_backlog_files: int = 3,
+    ):
        self.script_name = "translation"
        self.type_debug = "Debug"
        self.pathjson = pathjson
        self.reload_json()
        self.dir_whisperX = "whisperX"
-        self.filename_memory = "subtitle_data" 
+        self.filename_memory = storage_key
+        self._on_new_subtitle = on_new_subtitle
+        self.segment_seconds = int(segment_seconds) if segment_seconds else 30
+        self.max_backlog_files = int(max_backlog_files) if max_backlog_files else 3
        self.subtitle = {}
        self.all_subtitle = ""
        self.is_running = True
@@ -594,14 +606,37 @@ class Subtitle_translation:


    def verif_file_transcribe(self):
-        for file in os.listdir(self.dir_record):
-            file_path = os.path.join(self.dir_record, file)
-            if os.path.isfile(file_path):
-                # sprint(self.script_name,"blue",f"File find -> transcribe.")
-                debug_print("d", "Creation subtitle : "+file_path, self.type_debug, self.script_name)
-                # Exécuter une fonction sur le fichier
-                self.auto_create_subtitle(file)
-                del_pathfile(file_path)
+        # Traitement ordonné + rattrapage si backlog (sinon le décalage s'accumule).
+        files = [f for f in os.listdir(self.dir_record) if os.path.isfile(os.path.join(self.dir_record, f)) and f.endswith(".mp3")]
+        if not files:
+            return
+
+        # Trier par index de segment si possible, sinon lexicographique
+        def _seg_index(name: str) -> int:
+            try:
+                return int(name.split("_")[-1].split(".")[0])
+            except Exception:
+                return 10**9
+
+        files = sorted(files, key=lambda n: (_seg_index(n), n))
+
+        # Si la transcription n'arrive plus à suivre, on jette des fichiers anciens pour recoller au live.
+        if self.max_backlog_files > 0 and len(files) > self.max_backlog_files:
+            to_drop = files[: max(0, len(files) - self.max_backlog_files)]
+            for f in to_drop:
+                try:
+                    del_pathfile(os.path.join(self.dir_record, f))
+                    sprint(self.script_name, "yellow", f"Backlog trop grand -> drop segment: {f}")
+                except Exception:
+                    pass
+            files = files[len(to_drop) :]
+
+        # Traiter 1 fichier à la fois (GPU whisper) pour éviter la concurrence
+        file = files[0]
+        file_path = os.path.join(self.dir_record, file)
+        debug_print("d", "Creation subtitle : "+file_path, self.type_debug, self.script_name)
+        self.auto_create_subtitle(file)
+        del_pathfile(file_path)


    def remove_repetitions(self, message):
@@ -659,12 +694,30 @@ class Subtitle_translation:
        # num_ext = parties[-1]  # Prend la dernière partie qui contient "007.mp3" 
        speak_found =  self.remove_repetitions(speak_found)
        self.all_subtitle += speak_found+"\n"
+        # Utiliser un timestamp stable dérivé du nom de fichier + index segment,
+        # plutôt que l'heure de FIN de transcription (qui dérive si whisper est lent).
        current_time = get_current_time()
+        try:
+            import re
+            m = re.match(r"^(?P<base>\d{8}-\d{6})_(?P<idx>\d+)\.mp3$", str(file))
+            if m:
+                base_dt = datetime.strptime(m.group("base"), "%Y%m%d-%H%M%S")
+                idx = int(m.group("idx"))
+                dt = base_dt + timedelta(seconds=idx * int(self.segment_seconds))
+                current_time = dt.strftime("%H:%M:%S")
+        except Exception:
+            pass
        # if self.subtitle:
        self.subtitle[str(current_time)] = speak_found
        storage.write(self.filename_memory, str(current_time), speak_found)
        sprint(self.script_name,"yellow", "parole du streamer : \n" +speak_found)

+        try:
+            if self._on_new_subtitle:
+                self._on_new_subtitle(str(current_time), speak_found)
+        except Exception as e:
+            sprint(self.script_name, "red", f"on_new_subtitle error: {e}")
+
        sprint(self.script_name,"green","finish create_subtitle")

    def main_loop(self):
@@ -839,12 +892,42 @@ class IA_generator:
            sprint(self.script_name,"magenta",f"pas encore de sous titre on quitte génération")
            return

-        prompt_gpt = ['tgpt','-q','-w', '"'+self.list_prompt[self.index_prompt]+' "'+ str(self.streamer_word)+'"']
+        def _tlog(s: str, n: int = 220) -> str:
+            s = str(s or "").replace("\n", " ").strip()
+            if len(s) <= n:
+                return s
+            return s[: n - 3] + "..."
+
+        chosen_prompt = ""
+        try:
+            chosen_prompt = str(self.list_prompt[self.index_prompt] or "")
+        except Exception:
+            chosen_prompt = ""
+
+        debug_print(
+            "i",
+            f"IA input ({len(str(self.streamer_word))} chars): {_tlog(self.streamer_word)}",
+            self.type_debug,
+            self.script_name,
+        )
+        debug_print(
+            "i",
+            f"Prompt index={self.index_prompt}/{max(0, len(self.list_prompt) - 1)}: {_tlog(chosen_prompt)}",
+            self.type_debug,
+            self.script_name,
+        )
+
+        prompt_gpt = ['tgpt','-q','-w', '"'+chosen_prompt+' "'+ str(self.streamer_word)+'"']
+        debug_print("d", f"TGPT cmd: {prompt_gpt}", self.type_debug, self.script_name)
        
        # sprint(self.script_name,"DARKCYAN","index preprompt : "+str(self.index_prompt)+"\n")
        # sprint(self.script_name,"DARKCYAN","\nPrompt demandé  : "+str(prompt_gpt)+"\n")
        process = subprocess.run(prompt_gpt, capture_output=True, text=True, check=False)
+        debug_print("d", f"TGPT returncode={process.returncode}", self.type_debug, self.script_name)
+        if process.stderr:
+            debug_print("w", f"TGPT stderr: {_tlog(process.stderr, 300)}", self.type_debug, self.script_name)
        text_generation = process.stdout.replace("'", "").replace('"', "").replace("\n", "")
+        debug_print("d", f"TGPT raw: {_tlog(text_generation, 300)}", self.type_debug, self.script_name)
        # sprint(self.script_name,"CYAN","réponse imaginé sans formatage : \n"+text_generation+"\n\n") #debug
        textfinal = self.clear_response(text_generation)
        nombre_de_mots = len(textfinal.split())
@@ -859,7 +942,7 @@ class IA_generator:

        self.last_streamer_word = self.streamer_word
        # sprint(self.script_name,"CYAN","réponse imaginé : \n"+textfinal+"\n\n")
-        debug_print("d", "Réponse imaginé : \n"+textfinal, self.type_debug,self.script_name)
+        debug_print("g", f"Réponse imaginée ({nombre_de_mots} mots): {_tlog(textfinal, 400)}", self.type_debug,self.script_name)
        self.change_prompt()
        
        key = get_current_time()
@@ -875,7 +958,7 @@ class IA_generator:

    def main_ask(self, streamer_word_text):
        # sprint(self.script_name,"blue", "imagine_response start")
-        debug_print("i", "PRINCIPAL START imagine_response", self.type_debug,self.script_name)
+        debug_print("i", f"PRINCIPAL START imagine_response (manual) len={len(str(streamer_word_text or ''))}", self.type_debug,self.script_name)
        self.streamer_word = streamer_word_text
        imagine_response_thread = Thread(target=self.imagine_response)
        imagine_response_thread.start()
@@ -981,9 +1064,9 @@ class messageTwitch:
        self.channel_name = channel_name
        self.indexuser = 0
        self.type_debug = "Debug"
-        with open(self.path_file_config_user, 'r') as file:
-            self.userjson = json.load(file)
-        self.totaluser = len(self.userjson)
+        self.userjson = []
+        self.totaluser = 0
+        self._reload_users()

        # Se positionner sur le 1er utilisateur activé
        self.indexuser = self._first_enabled_index()
@@ -1033,7 +1116,38 @@ class messageTwitch:
        self.tw_acc_token = get_value_json_list(self.indexuser, "tw_acc_token", self.userjson)
        self.charactere = get_value_json_list(self.indexuser, "charactere", self.userjson)

+    def _reload_users(self):
+        """
+        Recharge config/user.json depuis le disque.
+        Important: permet de prendre en compte les nouveaux tokens sans redémarrer les bots.
+        """
+        try:
+            with open(self.path_file_config_user, 'r', encoding='utf-8') as file:
+                data = json.load(file) or []
+            if not isinstance(data, list):
+                data = []
+            self.userjson = data
+            self.totaluser = len(self.userjson)
+        except Exception:
+            # Conserver l'ancien état si lecture impossible
+            self.userjson = self.userjson or []
+            self.totaluser = len(self.userjson)
+
    def set_user(self, index_user):
+        # Toujours recharger la config pour récupérer les tokens à jour
+        self._reload_users()
+        # Si le fichier a changé et que l'index courant n'existe plus, fallback
+        if self.totaluser <= 0:
+            raise ValueError("Aucun utilisateur configuré (config/user.json)")
+        if index_user is None:
+            index_user = 0
+        try:
+            index_user = int(index_user)
+        except Exception:
+            index_user = 0
+        if index_user < 0 or index_user >= self.totaluser:
+            index_user = 0
+
        # Si l'utilisateur demandé est désactivé, on l'autorise seulement si bypass anti-boucle est actif
        if not self._is_enabled(index_user) and not self._bypass_antiloop(index_user):
            index_user = self._next_enabled_index(index_user)