update update

This commit is contained in:
gpatruno
2026-04-28 21:06:26 +02:00
parent 7b2135bfed
commit b4254c9e06
28 changed files with 2032 additions and 547 deletions
+133 -19
View File
@@ -31,6 +31,7 @@ import subprocess
import pty
from typing import List, Optional
from dataclasses import dataclass
from datetime import timedelta
console = Console()
@@ -378,10 +379,10 @@ class RecordTwitch:
os.makedirs(output_directory, exist_ok=True)
os.makedirs(in_record_directory, exist_ok=True)
# On encode l'heure de démarrage dans le nom pour pouvoir reconstruire une timeline stable
# et limiter le "drift" causé par le temps de transcription.
timestamp_complet = datetime.now().strftime("%Y%m%d-%H%M%S")
timestamp = datetime.now().strftime("%Y-%m-%d")
output_file_path = os.path.join(in_record_directory, f"{timestamp}_%03d.mp3")
output_file_path = os.path.join(in_record_directory, f"{timestamp_complet}_%03d.mp3")
sprint(self.script_name,"green", f"start record")
thread_compteur = Thread(target=self.compteur, daemon=True)
@@ -565,13 +566,24 @@ class TwitchChatBot:
class Subtitle_translation:
def __init__(self, pathjson):
def __init__(
self,
pathjson,
*,
storage_key: str = "subtitle_data",
on_new_subtitle: Optional[callable] = None,
segment_seconds: int = 30,
max_backlog_files: int = 3,
):
self.script_name = "translation"
self.type_debug = "Debug"
self.pathjson = pathjson
self.reload_json()
self.dir_whisperX = "whisperX"
self.filename_memory = "subtitle_data"
self.filename_memory = storage_key
self._on_new_subtitle = on_new_subtitle
self.segment_seconds = int(segment_seconds) if segment_seconds else 30
self.max_backlog_files = int(max_backlog_files) if max_backlog_files else 3
self.subtitle = {}
self.all_subtitle = ""
self.is_running = True
@@ -594,14 +606,37 @@ class Subtitle_translation:
def verif_file_transcribe(self):
for file in os.listdir(self.dir_record):
file_path = os.path.join(self.dir_record, file)
if os.path.isfile(file_path):
# sprint(self.script_name,"blue",f"File find -> transcribe.")
debug_print("d", "Creation subtitle : "+file_path, self.type_debug, self.script_name)
# Exécuter une fonction sur le fichier
self.auto_create_subtitle(file)
del_pathfile(file_path)
# Traitement ordonné + rattrapage si backlog (sinon le décalage s'accumule).
files = [f for f in os.listdir(self.dir_record) if os.path.isfile(os.path.join(self.dir_record, f)) and f.endswith(".mp3")]
if not files:
return
# Trier par index de segment si possible, sinon lexicographique
def _seg_index(name: str) -> int:
try:
return int(name.split("_")[-1].split(".")[0])
except Exception:
return 10**9
files = sorted(files, key=lambda n: (_seg_index(n), n))
# Si la transcription n'arrive plus à suivre, on jette des fichiers anciens pour recoller au live.
if self.max_backlog_files > 0 and len(files) > self.max_backlog_files:
to_drop = files[: max(0, len(files) - self.max_backlog_files)]
for f in to_drop:
try:
del_pathfile(os.path.join(self.dir_record, f))
sprint(self.script_name, "yellow", f"Backlog trop grand -> drop segment: {f}")
except Exception:
pass
files = files[len(to_drop) :]
# Traiter 1 fichier à la fois (GPU whisper) pour éviter la concurrence
file = files[0]
file_path = os.path.join(self.dir_record, file)
debug_print("d", "Creation subtitle : "+file_path, self.type_debug, self.script_name)
self.auto_create_subtitle(file)
del_pathfile(file_path)
def remove_repetitions(self, message):
@@ -659,12 +694,30 @@ class Subtitle_translation:
# num_ext = parties[-1] # Prend la dernière partie qui contient "007.mp3"
speak_found = self.remove_repetitions(speak_found)
self.all_subtitle += speak_found+"\n"
# Utiliser un timestamp stable dérivé du nom de fichier + index segment,
# plutôt que l'heure de FIN de transcription (qui dérive si whisper est lent).
current_time = get_current_time()
try:
import re
m = re.match(r"^(?P<base>\d{8}-\d{6})_(?P<idx>\d+)\.mp3$", str(file))
if m:
base_dt = datetime.strptime(m.group("base"), "%Y%m%d-%H%M%S")
idx = int(m.group("idx"))
dt = base_dt + timedelta(seconds=idx * int(self.segment_seconds))
current_time = dt.strftime("%H:%M:%S")
except Exception:
pass
# if self.subtitle:
self.subtitle[str(current_time)] = speak_found
storage.write(self.filename_memory, str(current_time), speak_found)
sprint(self.script_name,"yellow", "parole du streamer : \n" +speak_found)
try:
if self._on_new_subtitle:
self._on_new_subtitle(str(current_time), speak_found)
except Exception as e:
sprint(self.script_name, "red", f"on_new_subtitle error: {e}")
sprint(self.script_name,"green","finish create_subtitle")
def main_loop(self):
@@ -839,12 +892,42 @@ class IA_generator:
sprint(self.script_name,"magenta",f"pas encore de sous titre on quitte génération")
return
prompt_gpt = ['tgpt','-q','-w', '"'+self.list_prompt[self.index_prompt]+' "'+ str(self.streamer_word)+'"']
def _tlog(s: str, n: int = 220) -> str:
s = str(s or "").replace("\n", " ").strip()
if len(s) <= n:
return s
return s[: n - 3] + "..."
chosen_prompt = ""
try:
chosen_prompt = str(self.list_prompt[self.index_prompt] or "")
except Exception:
chosen_prompt = ""
debug_print(
"i",
f"IA input ({len(str(self.streamer_word))} chars): {_tlog(self.streamer_word)}",
self.type_debug,
self.script_name,
)
debug_print(
"i",
f"Prompt index={self.index_prompt}/{max(0, len(self.list_prompt) - 1)}: {_tlog(chosen_prompt)}",
self.type_debug,
self.script_name,
)
prompt_gpt = ['tgpt','-q','-w', '"'+chosen_prompt+' "'+ str(self.streamer_word)+'"']
debug_print("d", f"TGPT cmd: {prompt_gpt}", self.type_debug, self.script_name)
# sprint(self.script_name,"DARKCYAN","index preprompt : "+str(self.index_prompt)+"\n")
# sprint(self.script_name,"DARKCYAN","\nPrompt demandé : "+str(prompt_gpt)+"\n")
process = subprocess.run(prompt_gpt, capture_output=True, text=True, check=False)
debug_print("d", f"TGPT returncode={process.returncode}", self.type_debug, self.script_name)
if process.stderr:
debug_print("w", f"TGPT stderr: {_tlog(process.stderr, 300)}", self.type_debug, self.script_name)
text_generation = process.stdout.replace("'", "").replace('"', "").replace("\n", "")
debug_print("d", f"TGPT raw: {_tlog(text_generation, 300)}", self.type_debug, self.script_name)
# sprint(self.script_name,"CYAN","réponse imaginé sans formatage : \n"+text_generation+"\n\n") #debug
textfinal = self.clear_response(text_generation)
nombre_de_mots = len(textfinal.split())
@@ -859,7 +942,7 @@ class IA_generator:
self.last_streamer_word = self.streamer_word
# sprint(self.script_name,"CYAN","réponse imaginé : \n"+textfinal+"\n\n")
debug_print("d", "Réponse imaginé : \n"+textfinal, self.type_debug,self.script_name)
debug_print("g", f"Réponse imaginée ({nombre_de_mots} mots): {_tlog(textfinal, 400)}", self.type_debug,self.script_name)
self.change_prompt()
key = get_current_time()
@@ -875,7 +958,7 @@ class IA_generator:
def main_ask(self, streamer_word_text):
# sprint(self.script_name,"blue", "imagine_response start")
debug_print("i", "PRINCIPAL START imagine_response", self.type_debug,self.script_name)
debug_print("i", f"PRINCIPAL START imagine_response (manual) len={len(str(streamer_word_text or ''))}", self.type_debug,self.script_name)
self.streamer_word = streamer_word_text
imagine_response_thread = Thread(target=self.imagine_response)
imagine_response_thread.start()
@@ -981,9 +1064,9 @@ class messageTwitch:
self.channel_name = channel_name
self.indexuser = 0
self.type_debug = "Debug"
with open(self.path_file_config_user, 'r') as file:
self.userjson = json.load(file)
self.totaluser = len(self.userjson)
self.userjson = []
self.totaluser = 0
self._reload_users()
# Se positionner sur le 1er utilisateur activé
self.indexuser = self._first_enabled_index()
@@ -1033,7 +1116,38 @@ class messageTwitch:
self.tw_acc_token = get_value_json_list(self.indexuser, "tw_acc_token", self.userjson)
self.charactere = get_value_json_list(self.indexuser, "charactere", self.userjson)
def _reload_users(self):
"""
Recharge config/user.json depuis le disque.
Important: permet de prendre en compte les nouveaux tokens sans redémarrer les bots.
"""
try:
with open(self.path_file_config_user, 'r', encoding='utf-8') as file:
data = json.load(file) or []
if not isinstance(data, list):
data = []
self.userjson = data
self.totaluser = len(self.userjson)
except Exception:
# Conserver l'ancien état si lecture impossible
self.userjson = self.userjson or []
self.totaluser = len(self.userjson)
def set_user(self, index_user):
# Toujours recharger la config pour récupérer les tokens à jour
self._reload_users()
# Si le fichier a changé et que l'index courant n'existe plus, fallback
if self.totaluser <= 0:
raise ValueError("Aucun utilisateur configuré (config/user.json)")
if index_user is None:
index_user = 0
try:
index_user = int(index_user)
except Exception:
index_user = 0
if index_user < 0 or index_user >= self.totaluser:
index_user = 0
# Si l'utilisateur demandé est désactivé, on l'autorise seulement si bypass anti-boucle est actif
if not self._is_enabled(index_user) and not self._bypass_antiloop(index_user):
index_user = self._next_enabled_index(index_user)