Tenho o arquivo legenda.psv, que segue essa lógica...
Criado em: 2 de maio de 2025
Criado em: 2 de maio de 2025
Tenho o arquivo legenda.psv, que segue essa lógica de formato:
...
Q|12.818|12.919
u|12.919|12.960
a|12.960|12.980
n|12.980|13.041
d|13.041|13.102
o|13.102|13.183
|13.183|13.426
e|13.426|13.446
u|13.446|13.649
|13.649|13.669
o|13.669|13.953
l|13.953|13.974
h|13.974|14.014
o|14.014|14.034
|14.034|14.197
p|14.197|14.217
a|14.217|14.420
r|14.420|14.460
a|14.460|14.480
|14.480|14.663
a|14.663|14.683
|14.683|14.703
n|14.703|14.724
a|14.724|14.926
t|14.926|14.947
u|14.947|14.967
r|14.967|14.987
e|14.987|15.007
z|15.007|15.028
a.|15.028|15.068
...
Com base nessa explicação, crie um script python que cria um video karaoke com base na legenda.psv, onde terá um gradiente que vai passando sobre X frases no video, deverá ser extremamente fluido e preciso.
import cupy as cp
import numpy as np
from PIL import Image, ImageDraw, ImageFont
import time
import threading
import math
import os
import queue
import subprocess
from tqdm import tqdm
import psutil
import traceback
from cupyx.scipy import ndimage as cupy_ndimage
-- Mande o código completo, e pronto para uso sem erros.
pythonimport cupy as cp, math, numpy as np, PIL, sys, os, textwrap
python#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Karaokê GPU Gradient Renderer ============================= Gera um vídeo‐karaokê extremamente fluido e preciso (60 fps, 1280 × 720 px) a partir de um arquivo ``legenda.psv`` no formato: letra|timestamp_início|timestamp_fim onde « » (espaço) é o separador de palavras. Cada frase começa na primeira letra maiúscula e termina na primeira letra que contenha «.». Requisitos ---------- • Python ≥ 3.9 + FFmpeg instalado no *PATH* • pip install cupy-cuda11x pillow tqdm psutil Uso --- Coloque «legenda.psv» e a fonte «Roboto-Bold.ttf» na mesma pasta e rode: python karaoke_gpu.py (Opcionalmente edite as variáveis de configuração logo abaixo.) """ ############################################################################## # Imports # ############################################################################## import cupy as cp import numpy as np from PIL import Image, ImageDraw, ImageFont import time import threading import math import os import queue import subprocess from tqdm import tqdm import psutil import traceback from cupyx.scipy import ndimage as cupy_ndimage ############################################################################## # Configurações # ############################################################################## LEGENDA_PATH = "legenda.psv" # arquivo de entrada OUTPUT_VIDEO = "karaoke.mp4" # vídeo final FPS = 60 # frames por segundo RESOLUTION = (1280, 720) # (largura, altura) FONT_PATH = "Roboto-Bold.ttf" # caminho da fonte FONT_SIZE = 60 # px PHRASES_ON_SCREEN = 3 # quantas linhas simultâneas THREADS = max(1, os.cpu_count() - 1) GRAD_BLUR_SIGMA = 6.0 # suavização do degradê ############################################################################## # Utilidades # ############################################################################## def parse_legenda(path: str): """Lê o .psv e devolve lista de frases com suas letras & tempos.""" phrases, current_chars = [], [] with open(path, encoding="utf-8") as f: for raw in f: raw = raw.strip() if not raw: continue char, start, end = raw.split("|") start, end = float(start), float(end) if not current_chars: # começo de nova frase current_chars = [] current_chars.append({"char": char, "start": start, "end": end}) if char and char[-1] == ".": # fim de frase txt = "".join(c["char"] for c in current_chars) phrases.append( {"text": txt, "start": current_chars[0]["start"], "end": current_chars[-1]["end"], "chars": current_chars} ) current_chars = [] return phrases def load_font(): try: return ImageFont.truetype(FONT_PATH, FONT_SIZE) except OSError: # fallback – fonte padrão do Pillow (menos bonita) return ImageFont.load_default() def measure_phrases(phrases, font): """Pré-calcula larguras cumulatvas dos caracteres de cada frase.""" dummy_img = Image.new("RGB", (8, 8)) draw = ImageDraw.Draw(dummy_img) for ph in phrases: cumul = [0] for c in ph["chars"]: w = draw.textlength(c["char"], font=font) cumul.append(cumul[-1] + w) ph["cum_widths"] = cumul # len == len(chars)+1 ph["width"] = cumul[-1] # px def make_gradient(width: int, height: int) -> np.ndarray: """Gera gradiente horizontal (GPU) em RGB uint8.""" if width == 0 or height == 0: return np.zeros((height, width, 3), dtype=np.uint8) u = cp.linspace(0, 1, width, dtype=cp.float32)[None, :] # 1×w r = cp.full_like(u, 255) g = cp.sin(u * math.pi) * 255 # 0→255→0 b = cp.zeros_like(u) grad= cp.concatenate((r, g, b), axis=0) # 3×w grad= cp.tile(grad.T[None, :, :], (height, 1, 1)) # h×w×3 if GRAD_BLUR_SIGMA: grad = cupy_ndimage.gaussian_filter(grad, sigma=GRAD_BLUR_SIGMA) return cp.asnumpy(cp.clip(grad, 0, 255).astype(cp.uint8)) ############################################################################## # Gerador de Frames # ############################################################################## class FrameWorker(threading.Thread): def __init__(self, tasks_q: queue.Queue, phrases, font): super().__init__(daemon=True) self.q = tasks_q self.phrases = phrases self.font = font def run(self): try: while True: item = self.q.get() if item is None: # poison-pill break idx, t_now = item self.render_frame(idx, t_now) self.q.task_done() except Exception: traceback.print_exc() os._exit(1) # ---------------------------------------------------------------------- # def render_frame(self, idx: int, t_now: float): img = Image.new("RGB", RESOLUTION, (0, 0, 0)) draw = ImageDraw.Draw(img) line_h = FONT_SIZE + 12 base_y = RESOLUTION[1] - (PHRASES_ON_SCREEN * line_h) - 30 visible = [p for p in self.phrases if p["start"] <= t_now] visible = visible[-PHRASES_ON_SCREEN:] for ln, ph in enumerate(visible): y = base_y + ln * line_h # centrado horizontalmente x = (RESOLUTION[0] - ph["width"]) // 2 # texto "não cantado" (cinza) draw.text((x, y), ph["text"], font=self.font, fill=(90, 90, 90)) # largura já cantada no tempo atual (char-precision) sung_w = 0 for i, ch in enumerate(ph["chars"]): if t_now >= ch["end"]: sung_w = ph["cum_widths"][i + 1] continue if ch["start"] <= t_now < ch["end"]: frac = (t_now - ch["start"]) / (ch["end"] - ch["start"]) partial= (ph["cum_widths"][i + 1] - ph["cum_widths"][i]) * frac sung_w = ph["cum_widths"][i] + partial break sung_w = int(min(max(sung_w, 0), ph["width"])) if sung_w == 0: continue # máscara da frase mask = Image.new("L", (ph["width"], FONT_SIZE + 4), 0) mdrw = ImageDraw.Draw(mask) mdrw.text((0, 0), ph["text"], font=self.font, fill=255) # degradê para a parte cantada grad = make_gradient(sung_w, mask.height) grad = Image.fromarray(grad) mask_crop = mask.crop((0, 0, sung_w, mask.height)) img.paste(grad, (x, y), mask_crop) # opcional – exibe uso de RAM a cada 3000 frames if idx % 3000 == 0: mem = psutil.Process(os.getpid()).memory_info().rss / 2 ** 20 print(f"[DEBUG] frame {idx} – RAM {mem:.1f} MiB") # salva o PNG img.save(f"frames/{idx:08d}.png") ############################################################################## # Pipeline principal # ############################################################################## def main(): # -------------------------------------------------- Preparação phrases = parse_legenda(LEGENDA_PATH) font = load_font() measure_phrases(phrases, font) total_dur = phrases[-1]["end"] total_frames= int(total_dur * FPS) + 1 os.makedirs("frames", exist_ok=True) # -------------------------------------------------- Fila & threads task_q = queue.Queue(maxsize=THREADS * 4) workers= [FrameWorker(task_q, phrases, font) for _ in range(THREADS)] for w in workers: w.start() # -------------------------------------------------- Enfileira trabalho for f in tqdm(range(total_frames), desc="Gerando frames"): t_now = f / FPS task_q.put((f, t_now)) # poison pills & espera task_q.join() for _ in workers: task_q.put(None) for w in workers: w.join() # -------------------------------------------------- FFmpeg cmd = [ "ffmpeg", "-y", "-framerate", str(FPS), "-i", "frames/%08d.png", "-c:v", "libx264", "-pix_fmt", "yuv420p", OUTPUT_VIDEO ] print("\n[FFmpeg] Renderizando vídeo final…") subprocess.run(cmd, check=True) print("✅ Karaokê gerado com sucesso ➜", OUTPUT_VIDEO) if __name__ == "__main__": tic = time.time() main() print(f"⏱️ Tempo total: {time.time() - tic:.1f}s")
aspalavrasestãojuntasnovideo
corrija isso, e també corrija o tamanho minusculo da letra no video e deve usar essa fonte: "C:/Users/lucas/LilitaOne-Regular.ttf"
python#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ GPU Karaokê Renderer v2 ======================= • Corrigido o problema das palavras “grudadas”: agora o parser preserva os espaços existentes no legenda.psv. • Tamanho da fonte aumentado para melhor leitura. • Passa a utilizar a fonte «Lilita One» indicada em C:/Users/lucas/LilitaOne-Regular.ttf. Requisitos ---------- Python ≥ 3.9, FFmpeg no PATH e: pip install cupy-cuda11x pillow tqdm psutil Coloque «legenda.psv» na mesma pasta do script e execute: python karaoke_gpu.py """ ############################################################################## # Imports # ############################################################################## import cupy as cp import numpy as np from PIL import Image, ImageDraw, ImageFont import time import threading import math import os import queue import subprocess from tqdm import tqdm import psutil import traceback from cupyx.scipy import ndimage as cupy_ndimage ############################################################################## # Configurações # ############################################################################## LEGENDA_PATH = "legenda.psv" # arquivo de entrada OUTPUT_VIDEO = "karaoke.mp4" # vídeo final FPS = 60 # frames por segundo RESOLUTION = (1280, 720) # (largura, altura) FONT_PATH = r"C:/Users/lucas/LilitaOne-Regular.ttf" # fonte solicitada FONT_SIZE = 96 # px (maior que antes) PHRASES_ON_SCREEN = 3 # linhas simultâneas THREADS = max(1, (os.cpu_count() or 4) - 1) GRAD_BLUR_SIGMA = 6.0 # suavização do degradê ############################################################################## # Utilidades # ############################################################################## def parse_legenda(path: str): """ Lê o .psv e devolve lista de frases. Preserva espaços — a linha cujo primeiro campo é “ ” é mantida. """ phrases, cur = [], [] with open(path, encoding="utf-8") as f: for raw in f: raw = raw.rstrip("\n") # NÃO strip() → preserva espaços if not raw: continue try: char, start, end = raw.split("|", 2) start, end = float(start), float(end) except ValueError: continue # ignora linhas mal-formadas if not cur: # início de nova frase cur = [] cur.append({"char": char, "start": start, "end": end}) if char.endswith("."): # fim da frase text = "".join(c["char"] for c in cur) phrases.append({ "text": text, "start": cur[0]["start"], "end": cur[-1]["end"], "chars": cur }) cur = [] return phrases def load_font(): try: return ImageFont.truetype(FONT_PATH, FONT_SIZE) except OSError: print(f"[WARN] Fonte «{FONT_PATH}» não encontrada; usando fallback.") return ImageFont.load_default() def measure_phrases(phrases, font): """Pré-calcula larguras cumulativas dos caracteres.""" dummy = Image.new("RGB", (8, 8)) d = ImageDraw.Draw(dummy) for ph in phrases: cumul = [0] for c in ph["chars"]: cumul.append(cumul[-1] + d.textlength(c["char"], font=font)) ph["cum_widths"] = cumul # len == len(chars)+1 ph["width"] = cumul[-1] def make_gradient(width: int, height: int) -> np.ndarray: """Gera degradê horizontal RGB em uint8, suavizado na GPU.""" if width == 0 or height == 0: return np.zeros((height, width, 3), dtype=np.uint8) lin = cp.linspace(0, 1, width, dtype=cp.float32)[None, :] r = cp.full_like(lin, 255) g = cp.sin(lin * math.pi) * 255 # pico verde ao centro b = cp.zeros_like(lin) grad= cp.concatenate((r, g, b), axis=0) # 3×w grad= cp.tile(grad.T[None, :, :], (height, 1, 1)) # h×w×3 if GRAD_BLUR_SIGMA: grad = cupy_ndimage.gaussian_filter(grad, sigma=GRAD_BLUR_SIGMA) return cp.asnumpy(cp.clip(grad, 0, 255).astype(np.uint8)) ############################################################################## # Gerador de Frames # ############################################################################## class FrameWorker(threading.Thread): def __init__(self, q: queue.Queue, phrases, font): super().__init__(daemon=True) self.q, self.phrases, self.font = q, phrases, font def run(self): try: while True: job = self.q.get() if job is None: break idx, t_now = job self.render(idx, t_now) self.q.task_done() except Exception: traceback.print_exc() os._exit(1) # ------------------------------------------------------------------ # def render(self, idx: int, t_now: float): img = Image.new("RGB", RESOLUTION, (0, 0, 0)) draw = ImageDraw.Draw(img) line_h = int(FONT_SIZE * 1.25) base_y = RESOLUTION[1] - (PHRASES_ON_SCREEN * line_h) - 40 visible = [p for p in self.phrases if p["start"] <= t_now][-PHRASES_ON_SCREEN:] for ln, ph in enumerate(visible): y = base_y + ln * line_h x = (RESOLUTION[0] - ph["width"]) // 2 # Texto ainda não cantado (cinza) draw.text((x, y), ph["text"], font=self.font, fill=(120, 120, 120)) # Calcula largura já cantada sung_w = 0 for i, ch in enumerate(ph["chars"]): if t_now >= ch["end"]: sung_w = ph["cum_widths"][i + 1] continue if ch["start"] <= t_now < ch["end"]: frac = (t_now - ch["start"]) / (ch["end"] - ch["start"]) sung_w = ph["cum_widths"][i] + (ph["cum_widths"][i + 1] - ph["cum_widths"][i]) * frac break sung_w = int(min(max(sung_w, 0), ph["width"])) if sung_w == 0: continue # Máscara da frase mask = Image.new("L", (ph["width"], FONT_SIZE + 6), 0) mdrw = ImageDraw.Draw(mask) mdrw.text((0, 0), ph["text"], font=self.font, fill=255) # Degradê colorido grad = Image.fromarray(make_gradient(sung_w, mask.height)) img.paste(grad, (x, y), mask.crop((0, 0, sung_w, mask.height))) if idx % 3000 == 0: mem = psutil.Process(os.getpid()).memory_info().rss / 2**20 print(f"[DEBUG] frame {idx} – {mem:.0f} MiB") img.save(f"frames/{idx:08d}.png") ############################################################################## # Pipeline principal # ############################################################################## def main(): phrases = parse_legenda(LEGENDA_PATH) font = load_font() measure_phrases(phrases, font) total_frames = int(phrases[-1]["end"] * FPS) + 1 os.makedirs("frames", exist_ok=True) task_q = queue.Queue(maxsize=THREADS * 5) workers = [FrameWorker(task_q, phrases, font) for _ in range(THREADS)] for w in workers: w.start() for f in tqdm(range(total_frames), desc="Gerando frames"): task_q.put((f, f / FPS)) task_q.join() for _ in workers: task_q.put(None) for w in workers: w.join() cmd = [ "ffmpeg", "-y", "-framerate", str(FPS), "-i", "frames/%08d.png", "-c:v", "libx264", "-pix_fmt", "yuv420p", OUTPUT_VIDEO ] print("\n[FFmpeg] Renderizando vídeo final…") subprocess.run(cmd, check=True) print(f"✅ Pronto → {OUTPUT_VIDEO}") if __name__ == "__main__": t0 = time.time() main() print(f"⏱️ Concluído em {time.time() - t0:.1f}s")