por gentileza, corrija isso: mesmo usando 1920x108...

Question

por gentileza, corrija isso: mesmo usando 1920x1080 a qualidade do texto está baixa, com borrões e pixalizado..

import os
import shutil
import traceback
import re
import subprocess
import time
import multiprocessing

Definir método 'spawn' para multiprocessing no Windows

if os.name == 'nt':
multiprocessing.set_start_method('spawn', force=True)
from datetime import timedelta
from functools import lru_cache
from PIL import Image, ImageDraw, ImageFont, ImageFilter
from tqdm import tqdm

CONFIGURAÇÕES DE VÍDEO E IMAGEM

IMAGE_SIZE = (1920, 1080) # Tamanho base das imagens/frames
FPS = 60 # Quadros por segundo do vídeo

CONFIGURAÇÕES DE LAYOUT E POSICIONAMENTO DE TEXTO

AJUSTE_HORIZONTAL = 0 # Ajuste fino horizontal para o texto
AJUSTE_VERTICAL = -40 # Ajuste fino vertical para o texto
CENTRALIZAR = True # Se o texto principal deve ser centralizado horizontalmente
POSICAO_VERTICAL = 2 # 1: Topo, 2: Meio, 3: Baixo, 4: Alternado (Topo/Baixo)
MAX_WIDTH = IMAGE_SIZE[0] - 40 # Largura máxima para o texto nas legendas
ESPACO_ENTRE_FRASES = 25 # Espaçamento vertical entre frases dentro de um mesmo grupo

CONFIGURAÇÕES DE FONTE E TEXTO

FONT_SIZE_COMPLETE = 130 # Tamanho da fonte para as legendas
UPPER_CASE = False # Converter todo o texto da legenda para maiúsculas

Tenta encontrar a fonte no diretório atual ou usar uma fonte do sistema como fallback

FONT_PATH = os.path.join(os.getcwd(), "FiraSansCompressed-Bold.otf")
if not os.path.exists(FONT_PATH):
# Fallback para uma fonte do sistema que provavelmente existe
if os.name == 'nt': # Windows
FONT_PATH = r"C:\Windows\Fonts\Arial.ttf"
else: # Linux/Mac
FONT_PATH = "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf"

CONFIGURAÇÕES DE CORES

FONT_COLOR = "#8B0000" # LK: ffffff
HIGHLIGHT_COLOR = "#FF0000" # LK: FFFF00
PROGRESS_BAR_COLOR = "#FF0000" # LK: FFFF00
BORDER_COLOR = None # Lk: 000000
BORDER_GROSSURA = 2 # Tamanho atual 3

CONFIGURAÇÕES DA BARRA DE PROGRESSO E TEXTOS ADICIONAIS

PROGRESS_BAR_SIZE = (1000, 53) # Tamanho da barra de progresso (largura, altura)
SUBSCRIBE_TEXT_SIZE = 23 # Tamanho da fonte para o texto "Inscreva-se"

CONFIGURAÇÕES DE TRANSIÇÃO E TIMING

TRANSITION_DURATION_GAPS = 0.1 # Duração da transição para o aparecimento de texto durante gaps
FADE_OUT_DURATION_LINES = 2.0 # Duração do fade-out para as linhas de legenda durante a transição de grupo
FADE_IN_DURATION_NEXT_LINES = 2.0 # Duração do fade-in para as novas linhas do próximo grupo
MIN_GAP_DURATION = 4.0 # Duração mínima de um "gap" para exibir a barra de progresso e texto adicional (em segundos)

CONFIGURAÇÕES DE AGRUPAMENTO DE LEGENDAS

MAX_PALAVRAS_POR_LINHA = 5 # Número máximo de palavras por linha de legenda (Esta linha não é mais usada para a nova lógica)

NUM_FRASES_GRUPO = 4 # Número de linhas (segmentos capitalizados) a serem exibidas juntas na tela

CAMINHOS E DIRETÓRIOS

BACKGROUND_SOURCE = r"C:\Users\lucas\Downloads\Scripts\Karaoke-Creator#shorts\capa-fs.mp4" # Caminho para imagem/vídeo de fundo
BACKGROUND_FRAMES_DIR = "background_frames" # Diretório para armazenar frames extraídos do vídeo de fundo

def hex_to_rgb(hex_color):
"""Converte cor hexadecimal para RGB."""
return tuple(int(hex_color.lstrip("#")[i: i + 2], 16) for i in (0, 2, 4))

def parse_time(time_str):
"""Converte string de tempo para timedelta."""
match = re.match(r"(\d+):(\d{2}):(\d{1,2})#", time_str)
if match:
minutes, seconds, deciseconds_val = map(int, match.groups())

text
    # Corrigir a lógica de 'last_minutes' para usar o valor parseado de minutes
    # e não a variável deciseconds_val que foi renomeada de 'deciseconds'
    current_minutes = minutes
    if current_minutes == 0:
        if hasattr(parse_time, 'last_minutes') and parse_time.last_minutes > 0:
            current_minutes = parse_time.last_minutes
    
    parse_time.last_minutes = current_minutes

    total_seconds = current_minutes * 60 + seconds + deciseconds_val / 10.0
    return timedelta(seconds=total_seconds)
return timedelta(0)

parse_time.last_minutes = 0

def get_audio_duration(audio_file):
"""Obtém a duração de um arquivo de áudio usando ffprobe."""
command = [
"ffprobe",
"-v", "error",
"-show_entries", "format=duration",
"-of", "default=noprint_wrappers=1:nokey=1",
audio_file,
]
result = subprocess.run(command, capture_output=True, text=True)
return float(result.stdout)

Função auxiliar baseada no código de referência fornecido pelo usuário

def _add_sentence_chunks(sentences_list_to_fill,

words_of_one_sentence,

indices_in_raw_for_those_words,

raw_subtitle_entries_full_list):

"""

Divide uma lista de palavras (representando uma sentença gramatical) em chunks menores.

Cada chunk se torna uma "sentença" (linha) para o pipeline de vídeo,

contendo no máximo MAX_PALAVRAS_POR_LINHA palavras.

"""

# global NUM_FRASES_GRUPO # Não é mais usado para determinar o tamanho do chunk aqui

for chunk_offset in range(0, len(words_of_one_sentence), MAX_PALAVRAS_POR_LINHA):

current_chunk_processed_words = words_of_one_sentence[chunk_offset : chunk_offset + MAX_PALAVRAS_POR_LINHA]

current_chunk_original_indices = indices_in_raw_for_those_words[chunk_offset : chunk_offset + MAX_PALAVRAS_POR_LINHA]

if not current_chunk_processed_words:

continue

first_word_original_idx = current_chunk_original_indices[0]

last_word_original_idx = current_chunk_original_indices[-1]

chunk_dict_start_index = first_word_original_idx

chunk_dict_end_index = last_word_original_idx + 1

chunk_start_time = raw_subtitle_entries_full_list[first_word_original_idx][0]

final_words_for_chunk_dict = list(current_chunk_processed_words)

if final_words_for_chunk_dict and not final_words_for_chunk_dict[-1].endswith('.'):

final_words_for_chunk_dict[-1] += '.'

sentences_list_to_fill.append({

"start_time": chunk_start_time,

"end_time": None,

"words": final_words_for_chunk_dict,

"start_index": chunk_dict_start_index,

"end_index": chunk_dict_end_index,

})

Função _extract_sentences atualizada

def _extract_sentences(raw_subtitle_entries):
"""
Extrai "sentenças" (segmentos de palavras) das legendas brutas.
Cada segmento começa com uma palavra capitalizada (do arquivo original .srt)
e continua até a palavra anterior à próxima palavra capitalizada.
"""
resulting_sentences = []
if not raw_subtitle_entries:
return resulting_sentences

text
current_segment_words_display = []
current_segment_start_time = None
current_segment_start_raw_index = -1
collecting_segment = False # True quando estamos ativamente coletando palavras para um segmento

for i, entry_tuple in enumerate(raw_subtitle_entries):
    time_val, word_for_display, original_word_from_file = entry_tuple
    
    if not original_word_from_file: # Pula se a palavra original for vazia
        continue
    
    # Verifica se a palavra original (do .srt) começa com maiúscula
    is_capitalized = original_word_from_file[0].isupper()

    if is_capitalized:
        if collecting_segment:
            # Finaliza o segmento anterior, pois encontramos uma nova palavra capitalizada
            resulting_sentences.append({
                "start_time": current_segment_start_time,
                "end_time": None, # Será definido por _set_sentence_end_times
                "words": list(current_segment_words_display),
                "start_index": current_segment_start_raw_index,
                "end_index": i, # O segmento termina ANTES da palavra atual (i)
            })
        
        # Inicia um novo segmento com a palavra capitalizada atual
        current_segment_words_display = [word_for_display]
        current_segment_start_time = time_val
        current_segment_start_raw_index = i
        collecting_segment = True
    elif collecting_segment:
        # Palavra não é capitalizada, mas estamos dentro de um segmento, então adiciona
        current_segment_words_display.append(word_for_display)
    # Se não for capitalizada e não estivermos coletando (collecting_segment = False),
    # a palavra é ignorada (ex: palavras minúsculas antes da primeira maiúscula).

# Adiciona o último segmento coletado, se houver
if collecting_segment and current_segment_words_display:
    resulting_sentences.append({
        "start_time": current_segment_start_time,
        "end_time": None, # Será definido por _set_sentence_end_times
        "words": list(current_segment_words_display),
        "start_index": current_segment_start_raw_index,
        "end_index": len(raw_subtitle_entries), # O segmento vai até o final das legendas brutas
    })
                         
return resulting_sentences

def read_subtitles(filename, audio_duration):
"""
Lê e processa o arquivo de legendas, criando estruturas
para gerenciar as frases e intervalos.
"""
raw_subtitle_entries = []
with open(filename, "r", encoding="utf-8") as file:
content = file.read().strip()
lines = content.split("\n")

text
    if lines[-1].endswith('#P'):
        lines = lines[:-1]

    for line in lines:
        match = re.match(r"(\d+:\d{2}:\d{1,2})#(.+)", line.strip())
        if match:
            time_str, word_from_file = match.groups()

            word_for_display = word_from_file.upper() if UPPER_CASE else word_from_file

            raw_subtitle_entries.append((parse_time(time_str + "#"), word_for_display, word_from_file))

if not raw_subtitle_entries:
    raise ValueError("Nenhuma legenda foi encontrada no arquivo.")

# _extract_sentences agora retorna chunks de palavras, que são as novas "sentenças"
sentences = _extract_sentences(raw_subtitle_entries)

final_timestamp = timedelta(seconds=audio_duration)
_set_sentence_end_times(sentences, final_timestamp)

# processed_subtitle_words ainda se refere à lista original de palavras individuais
processed_subtitle_words = [(entry[0], entry[1]) for entry in raw_subtitle_entries]

gaps = _identify_gaps(sentences, processed_subtitle_words, final_timestamp)

# paired_sentences agrupará os chunks (que são as novas "sentenças")
paired_sentences = _group_sentences(sentences, final_timestamp)

return processed_subtitle_words, paired_sentences, gaps

def _set_sentence_end_times(sentences, final_timestamp):
"""Define os tempos finais para cada sentença."""
for i in range(len(sentences) - 1):
sentences[i]["end_time"] = sentences[i + 1]["start_time"]

text
if sentences:
    sentences[-1]["end_time"] = final_timestamp

def _identify_gaps(sentences, subtitles, final_timestamp):
"""Identifica os intervalos entre sentenças e palavras."""

text
gaps = []

if sentences and sentences[0]["start_time"] > timedelta(0):
    gaps.append({"start_time": timedelta(0), "end_time": sentences[0]["start_time"]})

for i in range(len(sentences) - 1):
    gap_start = sentences[i]["end_time"]
    gap_end = sentences[i + 1]["start_time"]
    if gap_end > gap_start:
        gaps.append({"start_time": gap_start, "end_time": gap_end})

if sentences and sentences[-1]["end_time"] < final_timestamp:
    gaps.append({"start_time": sentences[-1]["end_time"], "end_time": final_timestamp})

additional_gaps = _identify_word_gaps(subtitles, final_timestamp)

all_gaps = gaps + additional_gaps
merged_gaps = _merge_intervals(all_gaps)

return [gap for gap in merged_gaps if
        (gap['end_time'] - gap['start_time']).total_seconds() >= MIN_GAP_DURATION]

def _identify_word_gaps(subtitles, final_timestamp):
"""Identifica intervalos longos entre palavras individuais."""
additional_gaps = []

text
for i in range(len(subtitles) - 1):
    current_time = subtitles[i][0]
    next_time = subtitles[i + 1][0]
    time_diff = (next_time - current_time).total_seconds()

    word_duration = time_diff
    effective_word_duration = min(word_duration, 3.0)
    effective_end_time = current_time + timedelta(seconds=effective_word_duration)

    gap_length = (next_time - effective_end_time).total_seconds()
    if gap_length >= MIN_GAP_DURATION:
        additional_gaps.append({'start_time': effective_end_time, 'end_time': next_time})

if subtitles:
    last_word_time = subtitles[-1][0]
    last_word_duration = (final_timestamp - last_word_time).total_seconds()
    effective_last_word_duration = min(last_word_duration, 3.0)
    effective_end_time = last_word_time + timedelta(seconds=effective_last_word_duration)

    gap_length = (final_timestamp - effective_end_time).total_seconds()
    if gap_length >= MIN_GAP_DURATION:
        additional_gaps.append({'start_time': effective_end_time, 'end_time': final_timestamp})

return additional_gaps

def _merge_intervals(intervals):
"""Combina intervalos que se sobrepõem."""
if not intervals:
return []

text
intervals.sort(key=lambda x: x['start_time'])

merged = [intervals[0]]
for current in intervals[1:]:
    last = merged[-1]

    if current['start_time'] <= last['end_time']:

        last['end_time'] = max(last['end_time'], current['end_time'])
    else:
        merged.append(current)

return merged

def _group_sentences(sentences, final_timestamp):
"""Agrupa sentenças em conjuntos para exibição."""
paired_sentences = []
num_sentences = len(sentences)

text
for i in range(0, num_sentences, NUM_FRASES_GRUPO):
    group_sentences = sentences[i: i + NUM_FRASES_GRUPO]

    if not group_sentences:
        continue

    start_time = group_sentences[0]["start_time"]

    if i + NUM_FRASES_GRUPO < num_sentences:
        end_time = sentences[i + NUM_FRASES_GRUPO]["start_time"]
    else:
        end_time = final_timestamp

    group = {
        "start_time": start_time,
        "end_time": end_time,
        "sentences": group_sentences,
    }
    paired_sentences.append(group)

# Preencher o último grupo com entradas vazias, se necessário
if len(paired_sentences) > 0:
    last_group = paired_sentences[-1]["sentences"]
    if len(last_group) < NUM_FRASES_GRUPO:
        while len(last_group) < NUM_FRASES_GRUPO:
            last_group.append({
                "start_time": final_timestamp,
                "end_time": final_timestamp,
                "words": [],
                "start_index": 0,
                "end_index": 0,
            })

return paired_sentences

def adjust_font_size_to_fit(words, max_width, max_font_size):
"""Ajusta o tamanho da fonte para caber dentro da largura máxima."""
font_size = max_font_size
font = ImageFont.truetype(FONT_PATH, font_size)
total_width = sum(font.getlength(word) for word in words) + font.getlength(' ') * (len(words) - 1)

text
while total_width > max_width and font_size > 10:
    font_size -= 1
    font = ImageFont.truetype(FONT_PATH, font_size)
    total_width = sum(font.getlength(word) for word in words) + font.getlength(' ') * (len(words) - 1)

return font_size, font

def calcular_posicao_texto(largura_texto, altura_texto, posicao_idx=0):
"""
Calcula a posição ideal do texto baseado nas configurações globais
e no índice de posição (para posicionamento alternado)
"""
global IMAGE_SIZE, CENTRALIZAR, AJUSTE_HORIZONTAL, POSICAO_VERTICAL, AJUSTE_VERTICAL

text
if CENTRALIZAR:
    x = (IMAGE_SIZE[0] - largura_texto) // 2
else:
    x = 20

x += AJUSTE_HORIZONTAL

posicao_v = POSICAO_VERTICAL
if posicao_v == 4:
    posicao_v = 1 if posicao_idx % 2 == 0 else 3

if posicao_v == 1:
    y = IMAGE_SIZE[1] // 8
elif posicao_v == 2:
    y = (IMAGE_SIZE[1] - altura_texto) // 2
else:
    y = (IMAGE_SIZE[1] * 7) // 8 - altura_texto

y += AJUSTE_VERTICAL

return x, y

def calculate_sentence_positions(paired_sentences):
"""Calcula a posição de cada sentença na tela."""
positions = []
max_width = MAX_WIDTH # Largura máxima para cada linha de legenda

text
MARGEM_INFERIOR_SEGURANCA = 15  # Margem para evitar que o texto fique colado na borda inferior
# Define um tamanho mínimo para a fonte, não menor que 10 e não menor que metade do tamanho original.
TAMANHO_MINIMO_FONTE_AJUSTE = max(10, FONT_SIZE_COMPLETE // 2)

for i, group in enumerate(paired_sentences):
    # group["sentences"] é uma lista de objetos de sentença (que representam linhas individuais)
    
    # Passo 1: Determinar a fonte inicial e a altura visual para cada linha no grupo
    initial_fonts_and_lines_data = []
    if not group["sentences"]: # Lidar com grupos vazios se possível
        positions.append([])
        continue

    for sentence_obj in group["sentences"]:
        # adjust_font_size_to_fit garante que a linha caiba na max_width
        font_size, font = adjust_font_size_to_fit(sentence_obj["words"], max_width, FONT_SIZE_COMPLETE)
        ascent, descent = font.getmetrics()  # Métricas da fonte para altura precisa
        line_visual_height = ascent + descent
        initial_fonts_and_lines_data.append({
            'font': font,
            'words': sentence_obj["words"],
            'obj': sentence_obj,
            'visual_height': line_visual_height
        })

    # Passo 2: Calcular a altura total inicial do grupo para determinar y_base_group
    # y_base_group será o ponto Y superior onde o bloco de legendas do grupo começa
    current_total_height_for_block = 0
    if initial_fonts_and_lines_data:
        for idx, line_data in enumerate(initial_fonts_and_lines_data):
            current_total_height_for_block += line_data['visual_height']
        # Adicionar espaçamento entre as linhas
        current_total_height_for_block += ESPACO_ENTRE_FRASES * (len(initial_fonts_and_lines_data) - 1)
    
    # Obtém a posição (x,y) para o canto superior esquerdo do bloco de texto do grupo
    x_base_group, y_base_group = calcular_posicao_texto(max_width, current_total_height_for_block, i)

    # Passo 3: Posicionar cada linha individualmente e ajustar a última linha do grupo se necessário
    sentence_positions_for_group = []
    current_y_top_for_line = y_base_group  # O Y atual começa no topo do bloco do grupo

    for line_idx_in_group, line_data in enumerate(initial_fonts_and_lines_data):
        sentence_obj = line_data['obj']
        current_font = line_data['font']
        line_words = line_data['words']
        # Altura visual da linha com a fonte determinada no Passo 1 (ajustada para max_width)
        current_line_visual_height = line_data['visual_height']
        
        y_pos_for_render = current_y_top_for_line # O Y onde esta linha específica será renderizada (seu topo)

        is_last_line_in_group = (line_idx_in_group == len(initial_fonts_and_lines_data) - 1)

        if is_last_line_in_group:
            # Verificar se esta última linha, com sua fonte atual, seria cortada
            y_bottom_of_line_visual = y_pos_for_render + current_line_visual_height
            print(f"DEBUG: y_bottom_of_line_visual={y_bottom_of_line_visual}, IMAGE_SIZE[1]={IMAGE_SIZE[1]}, MARGEM_INFERIOR_SEGURANCA={MARGEM_INFERIOR_SEGURANCA}")

            temp_font = current_font
            temp_line_visual_height = current_line_visual_height
            temp_font_size_iter = temp_font.size # Começa com o tamanho da fonte atual

            # Loop para reduzir a fonte se estiver cortando e acima do tamanho mínimo
            while y_bottom_of_line_visual > (IMAGE_SIZE[1] - MARGEM_INFERIOR_SEGURANCA) and \
                  temp_font_size_iter > TAMANHO_MINIMO_FONTE_AJUSTE:
                
                temp_font_size_iter = max(TAMANHO_MINIMO_FONTE_AJUSTE, temp_font_size_iter - 2) # Reduz em 2px
                temp_font = ImageFont.truetype(FONT_PATH, temp_font_size_iter)
                
                ascent_temp, descent_temp = temp_font.getmetrics()
                temp_line_visual_height = ascent_temp + descent_temp
                # Recalcula a borda inferior com a nova altura da fonte
                y_bottom_of_line_visual = y_pos_for_render + temp_line_visual_height
                print(f"DEBUG: Ajustando fonte - nova y_bottom_of_line_visual={y_bottom_of_line_visual}, temp_font_size_iter={temp_font_size_iter}")

            current_font = temp_font  # Atualiza para a fonte ajustada (ou a mesma se não houve ajuste)
            current_line_visual_height = temp_line_visual_height # Atualiza a altura visual da linha

        # Calcular largura e posição X da linha com a fonte final (ajustada ou original)
        line_total_width = sum(current_font.getlength(word) for word in line_words) + current_font.getlength(' ') * (len(line_words) - 1)

        if CENTRALIZAR:
            x_for_line = (IMAGE_SIZE[0] - line_total_width) // 2
        else:
            # Se não for centralizado globalmente, usa o x_base_group (canto esquerdo do bloco)
            x_for_line = x_base_group 
        x_for_line += AJUSTE_HORIZONTAL # Aplica ajuste horizontal global

        # Estrutura de dados que o `process_frame` espera:
        # Cada elemento em `sentence_positions_for_group` é uma lista contendo uma tupla para a linha.
        # Armazenar FONT_PATH e current_font.size em vez do objeto current_font
        font_params_for_line = (FONT_PATH, current_font.size)
        line_render_info = [(
            x_for_line,
            y_pos_for_render, # Este é o Y do topo da linha para renderização
            font_params_for_line, # Alterado de current_font para font_params_for_line
            line_total_width,
            line_words,
            list(range(sentence_obj["start_index"], sentence_obj["end_index"]))
        )]
        sentence_positions_for_group.append(line_render_info)

        # Avançar o y_top para a próxima linha no grupo
        # Usa a altura visual da linha ATUAL (que pode ter sido ajustada)
        current_y_top_for_line += current_line_visual_height
        if line_idx_in_group < len(initial_fonts_and_lines_data) - 1: # Não adicionar espaço após a última linha do grupo
             current_y_top_for_line += ESPACO_ENTRE_FRASES
    
    positions.append(sentence_positions_for_group)

return positions

@lru_cache(maxsize=1000)
def create_gradient_word(word, width, start_color, end_color, progress, font_size):
"""
Cria uma imagem de palavra com efeito de gradiente.
Cacheia resultados para melhor performance.
"""
font = ImageFont.truetype(FONT_PATH, font_size)

text
padding_x = 20
padding_y = 30

ascent, descent = font.getmetrics()
text_height = ascent + descent
image_height = text_height + (padding_y * 2)
image_width = int(width) + (padding_x * 2)

base_image = Image.new("RGBA", (image_width, image_height), (0, 0, 0, 0))
draw = ImageDraw.Draw(base_image)

x = padding_x
y = padding_y + (image_height - text_height) // 2 - descent

# Desenha a borda SE BORDER_COLOR estiver definido e for uma cor válida
if BORDER_COLOR and BORDER_GROSSURA > 0:
    border_width = BORDER_GROSSURA
    try:
        border_color_rgb = hex_to_rgb(BORDER_COLOR)
        for offset_x in range(-border_width, border_width + 1):
            for offset_y in range(-border_width, border_width + 1):
                if offset_x != 0 or offset_y != 0: # Evita redesenhar o centro sobre si mesmo
                    draw.text((x + offset_x, y + offset_y), word, font=font, fill=(*border_color_rgb, 255), anchor="la")
    except ValueError:
        # Se BORDER_COLOR for uma string inválida (ex: "corinvalida"), hex_to_rgb pode falhar.
        # Neste caso, a borda não será desenhada.
        # Você pode adicionar um log aqui se desejar, por exemplo:
        # print(f"Aviso: Cor da borda '{BORDER_COLOR}' é inválida. Borda não será desenhada.")
        pass

shadow_offset = 2
shadow_color = (0, 0, 0, 180)
draw.text((x + shadow_offset, y + shadow_offset), word, font=font, fill=shadow_color, anchor="la")

draw.text((x, y), word, font=font, fill=(255, 255, 255, 255), anchor="la")

mask = Image.new("L", base_image.size, 0)
mask_draw = ImageDraw.Draw(mask)
mask_draw.text((x, y), word, font=font, fill=255, anchor="la")

gradient = Image.new("RGBA", base_image.size, (0, 0, 0, 0))
start_rgb = hex_to_rgb(start_color)
end_rgb = hex_to_rgb(end_color)

cut_point = int(image_width * progress)
transition_width = 35

for i in range(image_width):
    if i <= cut_point - transition_width:

        color = end_rgb
    elif i >= cut_point + transition_width:

        color = start_rgb
    else:

        progress_in_transition = (i - (cut_point - transition_width)) / (2 * transition_width)
        color = tuple(
            int(end_rgb[j] + (start_rgb[j] - end_rgb[j]) * progress_in_transition)
            for j in range(3)
        )

    ImageDraw.Draw(gradient).line([(i, 0), (i, image_height)], fill=(*color, 255))

gradient.putalpha(mask)

result = Image.alpha_composite(base_image, gradient)
return result

def draw_text_with_alpha(image, text, x, y, font_params, color, alpha=255): # font alterado para font_params
"""Desenha texto com transparência especificada."""
font = ImageFont.truetype(font_params[0], font_params[1]) # Recria a fonte
display_text = text.rstrip(".")
word_width = font.getlength(display_text)
word_image = create_gradient_word(display_text, word_width, color, color, 1.0, font.size)

text
if alpha != 255:
    word_image = word_image.convert("RGBA")
    data = word_image.getdata()
    new_data = [(r, g, b, int(a * alpha / 255)) for r, g, b, a in data]
    word_image.putdata(new_data)


padding_x_compensation = 20
image.paste(word_image, (int(x) - padding_x_compensation, int(y)), word_image)

def draw_current_line_text(image, sentence, x, y, font_params, current_time, subtitles, line_words, line_word_indices, alpha=255): # font alterado para font_params
"""Desenha uma linha de texto com animação de progresso."""
font = ImageFont.truetype(font_params[0], font_params[1]) # Recria a fonte
x_current = x
space_width = font.getlength(" ")

text
word_positions = []
for word in line_words:
    display_word = word.rstrip(".")
    word_width = font.getlength(display_word)
    word_positions.append((x_current, word_width))
    x_current += word_width + space_width

padding_x_compensation = 20

for (word_x, word_width), word, word_index in zip(word_positions, line_words, line_word_indices):
    display_word = word.rstrip(".")
    word_time = subtitles[word_index][0]

    if word_index + 1 < len(subtitles):
        next_word_time = subtitles[word_index + 1][0]
    else:
        next_word_time = word_time + timedelta(seconds=2)

    word_duration = (next_word_time - word_time).total_seconds()
    effective_word_duration = min(word_duration, 3.0)
    time_since_word_start = (current_time - word_time).total_seconds()

    if time_since_word_start >= effective_word_duration:
        progress = 1.0
    elif time_since_word_start >= 0:
        progress = min(max(time_since_word_start / effective_word_duration, 0.0), 1.0)
    else:
        progress = 0.0
    
    # create_gradient_word usa FONT_PATH global e recebe font.size, o que está ok
    word_image = create_gradient_word(
        display_word,
        word_width,
        FONT_COLOR,
        HIGHLIGHT_COLOR,
        progress,
        font.size
    )

    if alpha != 255:
        word_image = word_image.convert("RGBA")
        data = word_image.getdata()
        new_data = [(r, g, b, int(a * alpha / 255)) for r, g, b, a in data]
        word_image.putdata(new_data)


    image.paste(word_image, (int(word_x) - padding_x_compensation, int(y)), word_image)

def draw_progress_bar(image, progress, bar_position=None, bar_size=None, number_to_display=None, alpha=255):
"""Desenha uma barra de progresso animada."""
if bar_position is None:
bar_position = (IMAGE_SIZE[0] // 2, IMAGE_SIZE[1] - 200)
if bar_size is None:
bar_size = PROGRESS_BAR_SIZE

text
x_center, y_center = bar_position
bar_width, bar_height = bar_size

x_start = x_center - bar_width // 2
y_start = y_center - bar_height // 2

overlay = Image.new("RGBA", image.size, (255, 255, 255, 0))
draw = ImageDraw.Draw(overlay)

draw.rectangle([x_start, y_start, x_start + bar_width, y_start + bar_height], fill=(200, 200, 200, alpha))

progress_color = hex_to_rgb(PROGRESS_BAR_COLOR)
progress_width = int(bar_width * progress)
draw.rectangle(
    [x_start, y_start, x_start + progress_width, y_start + bar_height],
    fill=(*progress_color, alpha)
)

if number_to_display is not None:
    font_size = int(bar_height * 2.2)
    font = ImageFont.truetype(FONT_PATH, font_size)
    text = str(number_to_display)
    text_width, text_height = draw.textsize(text, font=font)
    text_x = x_center - text_width // 2
    text_y = y_center - text_height // 2 - int(bar_height * 0.3)
    draw.text((text_x, text_y), text, font=font, fill=(255, 255, 255, alpha))

image.alpha_composite(overlay)

def draw_animated_text(image, current_time, gap, text, font_path, alpha=255):
"""Desenha texto animado com efeito de movimento."""
bar_position = (IMAGE_SIZE[0] // 2, IMAGE_SIZE[1] - 200)
bar_width, bar_height = PROGRESS_BAR_SIZE
base_text_y = bar_position[1] + bar_height // 2 + 20

text
draw = ImageDraw.Draw(image)
font_size = SUBSCRIBE_TEXT_SIZE
font = ImageFont.truetype(font_path, font_size)
text_width, text_height = draw.textsize(text, font=font)
text_x = (IMAGE_SIZE[0] - text_width) // 2

gap_duration = (gap["end_time"] - gap["start_time"]).total_seconds()
time_since_gap_start = (current_time - gap["start_time"]).total_seconds()
time_to_gap_end = (gap["end_time"] - current_time).total_seconds()

fade_duration = 1

if time_since_gap_start < fade_duration:
    movement_progress = time_since_gap_start / fade_duration
elif time_to_gap_end < fade_duration:
    movement_progress = time_to_gap_end / fade_duration
else:
    movement_progress = 1.0

max_movement = 20
y_offset = max_movement * (1 - movement_progress)
text_y = base_text_y - y_offset

text_image = Image.new('RGBA', (text_width, text_height), (0, 0, 0, 0))
text_draw = ImageDraw.Draw(text_image)
text_draw.text((0, 0), text, font=font, fill=(255, 255, 255, alpha))

image.paste(text_image, (text_x, int(text_y)), text_image)

@lru_cache(maxsize=None)
def load_and_resize_background(image_path, size):
"""Carrega e redimensiona a imagem de fundo, cacheando o resultado."""
try:
img = Image.open(image_path).convert("RGBA")
img = img.resize(size, Image.LANCZOS)
return img
except FileNotFoundError:
print(f"Erro: Imagem de fundo não encontrada em {image_path}")

text
    return Image.new("RGBA", size, (0, 0, 0, 255))
except Exception as e:
    print(f"Erro ao carregar ou redimensionar a imagem de fundo: {e}")
    return Image.new("RGBA", size, (0, 0, 0, 255))

def extract_background_frames(video_path, output_dir, size, fps):
"""Extrai frames de um vídeo de fundo usando ffmpeg, redimensionando-os."""
os.makedirs(output_dir, exist_ok=True)
frame_pattern = os.path.join(output_dir, "bg_frame_%04d.jpg")
width, height = size
try:
full_command = [
"ffmpeg", "-y", "-hide_banner",
"-i", video_path,
"-vf", f"fps={fps},scale={width}:{height}",
"-q:v", "2",
"-start_number", "0",
frame_pattern
]
print(f"Extraindo frames de {video_path} para {output_dir}...")

text
    result = subprocess.run(full_command, check=True, capture_output=True, text=True, encoding='utf-8', errors='replace')
    
    extracted_files = [f for f in os.listdir(output_dir) if f.startswith("bg_frame_") and f.endswith(".jpg")]
    if not extracted_files:
        print(f"AVISO CRÍTICO: Nenhum frame (bg_frame_*.jpg) encontrado em {output_dir} após a execução do ffmpeg.")
    else:
        print(f"{len(extracted_files)} frames de fundo foram extraídos para {output_dir}.")

except subprocess.CalledProcessError as e:
    print(f"Erro CRÍTICO ao extrair frames do vídeo de fundo: {e}")
    print(f"Comando: {' '.join(e.cmd)}")

    if e.stdout:
        print(f"Stdout do FFMPEG (erro): {e.stdout}")
    if e.stderr:
        print(f"Stderr do FFMPEG (erro): {e.stderr}")
    raise
except FileNotFoundError:
    print("Erro: ffmpeg não encontrado. Certifique-se de que está instalado e no PATH do sistema.")
    raise

def process_frame(frame_index, subtitles, paired_sentences, gaps, num_frames, sentence_positions, audio_duration, fps, is_background_video, current_background_frames_dir, num_actual_background_frames):
"""
Processa um único frame do vídeo, aplicando legendas e efeitos sobre um fundo (estático ou de vídeo).
"""
global TRANSITION_DURATION_GAPS, BACKGROUND_SOURCE, IMAGE_SIZE, FADE_OUT_DURATION_LINES, NUM_FRASES_GRUPO, FADE_IN_DURATION_NEXT_LINES, FONT_COLOR

text
current_time = timedelta(seconds=frame_index / fps)

if is_background_video:
    if num_actual_background_frames > 0:

        bg_frame_to_load_idx = frame_index % num_actual_background_frames
        bg_frame_path = os.path.join(current_background_frames_dir, f"bg_frame_{bg_frame_to_load_idx:04d}.jpg")
        try:
            background_image = Image.open(bg_frame_path).convert("RGBA")
        except FileNotFoundError:
            print(f"Aviso: Frame de fundo {bg_frame_path} (índice {bg_frame_to_load_idx} de {num_actual_background_frames} frames) não encontrado. Usando fundo preto.")
            background_image = Image.new("RGBA", IMAGE_SIZE, (0, 0, 0, 255))
        except Exception as e:
            print(f"Erro ao carregar frame de fundo {bg_frame_path}: {e}. Usando fundo preto.")
            background_image = Image.new("RGBA", IMAGE_SIZE, (0, 0, 0, 255))
    else:


        background_image = Image.new("RGBA", IMAGE_SIZE, (0, 0, 0, 255))
else:

    background_image = load_and_resize_background(BACKGROUND_SOURCE, IMAGE_SIZE)

image = background_image.copy()

subtitles_visible = True
# alpha = 255 # alpha é agora tratado por linha/slot

current_pair_index = None
for i, pair in enumerate(paired_sentences):
    if pair["start_time"] <= current_time < pair["end_time"]:
        current_pair_index = i
        break

in_gap = False
for gap_index, gap in enumerate(gaps):
    if gap["start_time"] <= current_time < gap["end_time"]:
        in_gap = True
        # alpha_gap = 255 # Alpha para elementos do gap
        gap_duration = (gap["end_time"] - gap["start_time"]).total_seconds()
        gap_progress = (current_time - gap["start_time"]).total_seconds() / gap_duration


        is_start_gap = gap_index == 0 and gap["start_time"] == timedelta(0)
        if is_start_gap:

            total_countdown_duration = 3.0
            countdown_start_time = gap["end_time"] - timedelta(seconds=total_countdown_duration)

            if countdown_start_time <= current_time < gap["end_time"]:
                gap_remaining_time = (gap["end_time"] - current_time).total_seconds()
                number_to_display = int(gap_remaining_time) + 1

                if 1 <= number_to_display <= 3:
                    draw_progress_bar(image, gap_progress, number_to_display=number_to_display, alpha=255)
                else:
                    draw_progress_bar(image, gap_progress, alpha=255)
                draw_animated_text(image, current_time, gap, "Curta o vídeo e inscreva-se no canal", FONT_PATH, alpha=255)
            else:
                draw_progress_bar(image, gap_progress, alpha=255)
                draw_animated_text(image, current_time, gap, "Curta o vídeo e inscreva-se no canal", FONT_PATH, alpha=255)
        else:

            draw_progress_bar(image, gap_progress, alpha=255)
            draw_animated_text(image, current_time, gap, "Curta o vídeo e inscreva-se no canal", FONT_PATH, alpha=255)

        subtitles_visible = False
        break

if subtitles_visible and current_pair_index is not None:
    pair_data = paired_sentences[current_pair_index]
    current_pair_sentences = pair_data["sentences"] # Lista de sentenças no grupo atual
    current_positions_for_pair = sentence_positions[current_pair_index] # Lista de posições para o grupo atual

    # Sentenças & posições do GRUPO ANTERIOR – necessárias p/ slots da segunda metade
    prev_pair_sentences_list = None
    prev_pair_positions_list = None
    if current_pair_index > 0:
        prev_pair_sentences_list = paired_sentences[current_pair_index - 1]["sentences"]
        prev_pair_positions_list = sentence_positions[current_pair_index - 1]

    group_alpha = 255 # Alpha base para sentenças totalmente visíveis

    # Inicialização dos arrays de alpha e visibilidade para cada slot de display
    currentGroupLineAlphas = [group_alpha] * NUM_FRASES_GRUPO
    currentGroupLineShouldDraw = [False] * NUM_FRASES_GRUPO
    emergingLineAlphas = [0] * NUM_FRASES_GRUPO # Alpha para a linha correspondente do *próximo* grupo que aparece nos slots atuais
    emergingLineShouldDraw = [False] * NUM_FRASES_GRUPO # Se a linha do *próximo* grupo deve ser desenhada

    # Define quais linhas do grupo atual existem e devem ser inicialmente consideradas para desenho
    for i in range(min(len(current_pair_sentences), NUM_FRASES_GRUPO)):
        currentGroupLineShouldDraw[i] = True
    
    # Obtém as sentenças do próximo grupo, se existir
    next_pair_sentences_list = None
    if current_pair_index + 1 < len(paired_sentences):
        next_pair_sentences_list = paired_sentences[current_pair_index + 1]["sentences"]

    # Calcula as transições (fade out da linha atual, fade in da linha do próximo grupo) para cada slot
    # offset_for_trigger determina qual linha futura dispara a transição.
    # Para NUM_FRASES_GRUPO=4, offset=2 (linha 0 transiciona com linha 2, linha 1 com linha 3, etc.)
    # Para NUM_FRASES_GRUPO=N, linha k transiciona com linha k + N//2 (conceitualmente)
    offset_for_trigger = NUM_FRASES_GRUPO // 2
    if NUM_FRASES_GRUPO == 1 and offset_for_trigger == 0: # Evita que uma linha se auto-acione para N=1
        offset_for_trigger = 1


    for k in range(NUM_FRASES_GRUPO): # k é o índice do slot de display (0 a NUM_FRASES_GRUPO-1)
        
        currentLineFadeOutTriggerSentence = None # Sentença que dispara o fade out da linha atual no slot k
        emergingLineFadeInTriggerSentence = None  # Sentença que dispara o fade in da linha do próximo grupo no slot k
        isEmergingLineTriggerFromCurrentPair = False     # True se a sentença gatilho para a linha emergente pertence ao grupo atual

        # Determinar as sentenças gatilho com base no slot k e no offset_for_trigger
        trigger_sentence_conceptual_idx = k + offset_for_trigger
        
        if trigger_sentence_conceptual_idx < NUM_FRASES_GRUPO:
            # Gatilho está dentro do grupo atual
            if trigger_sentence_conceptual_idx < len(current_pair_sentences):
                currentLineFadeOutTriggerSentence = current_pair_sentences[trigger_sentence_conceptual_idx]
                emergingLineFadeInTriggerSentence = current_pair_sentences[trigger_sentence_conceptual_idx]
                isEmergingLineTriggerFromCurrentPair = True
        else:
            # Gatilho está no próximo grupo
            trigger_idx_in_next_group = trigger_sentence_conceptual_idx - NUM_FRASES_GRUPO
            if next_pair_sentences_list and trigger_idx_in_next_group < len(next_pair_sentences_list):
                currentLineFadeOutTriggerSentence = next_pair_sentences_list[trigger_idx_in_next_group]
                emergingLineFadeInTriggerSentence = next_pair_sentences_list[trigger_idx_in_next_group]
                isEmergingLineTriggerFromCurrentPair = False
        
        # Aplicar lógica de FADE OUT para current_pair_sentences[k] (linha do grupo atual no slot k)
        if currentLineFadeOutTriggerSentence and k < len(current_pair_sentences) and currentGroupLineShouldDraw[k]:
            fadeOutStartTime = currentLineFadeOutTriggerSentence["start_time"]
            fadeOutEndTime = fadeOutStartTime + timedelta(seconds=FADE_OUT_DURATION_LINES)

            if current_time >= fadeOutStartTime:
                if current_time < fadeOutEndTime and FADE_OUT_DURATION_LINES > 0:
                    timeInFadeOut = (current_time - fadeOutStartTime).total_seconds()
                    fadeOutProgress = min(timeInFadeOut / FADE_OUT_DURATION_LINES, 1.0)
                    currentGroupLineAlphas[k] = int(group_alpha * (1 - fadeOutProgress))
                else: 
                    currentGroupLineShouldDraw[k] = False 
                    currentGroupLineAlphas[k] = 0
        
        # Aplicar lógica de FADE IN para next_pair_sentences_list[k] (linha k do próximo grupo, emergindo no slot k)
        if emergingLineFadeInTriggerSentence and next_pair_sentences_list:
            if next_pair_sentences_list and k < len(next_pair_sentences_list):
                fadeInTriggerActualStartTime = emergingLineFadeInTriggerSentence["start_time"]
                
                fadeInTriggerEffectiveEndTime = None
                fadeInTriggerDurationSourceSentences = current_pair_sentences if isEmergingLineTriggerFromCurrentPair else next_pair_sentences_list
               
                fadeInTriggerDurationSourceGroupEndTime = None
                if isEmergingLineTriggerFromCurrentPair:
                    fadeInTriggerDurationSourceGroupEndTime = pair_data["end_time"]
                elif current_pair_index + 1 < len(paired_sentences): 
                    fadeInTriggerDurationSourceGroupEndTime = paired_sentences[current_pair_index + 1]["end_time"]

                fadeInTriggerIndexInSource = -1
                if fadeInTriggerDurationSourceSentences:
                    for idx, sentence_obj in enumerate(fadeInTriggerDurationSourceSentences):
                        if sentence_obj is emergingLineFadeInTriggerSentence: 
                            fadeInTriggerIndexInSource = idx
                            break
                
                if fadeInTriggerDurationSourceSentences and fadeInTriggerIndexInSource != -1:
                    if fadeInTriggerIndexInSource + 1 < len(fadeInTriggerDurationSourceSentences):
                        fadeInTriggerEffectiveEndTime = fadeInTriggerDurationSourceSentences[fadeInTriggerIndexInSource + 1]["start_time"]
                    elif fadeInTriggerDurationSourceGroupEndTime:
                        fadeInTriggerEffectiveEndTime = fadeInTriggerDurationSourceGroupEndTime
                
                if fadeInTriggerEffectiveEndTime is None: 
                    fadeInTriggerEffectiveEndTime = fadeInTriggerActualStartTime + timedelta(seconds=2) 

                # Calcular tempos para o fade in da linha emergente
                fadeInStartTimeForEmergingText = fadeInTriggerActualStartTime + (
                    fadeInTriggerEffectiveEndTime - fadeInTriggerActualStartTime
                ) / 2

                mid_word_idx = (
                    emergingLineFadeInTriggerSentence["start_index"]
                    + emergingLineFadeInTriggerSentence["end_index"] -1 
                ) // 2

                if emergingLineFadeInTriggerSentence["start_index"] <= mid_word_idx < emergingLineFadeInTriggerSentence["end_index"] and \
                   mid_word_idx < len(subtitles):
                    fadeInStartTimeForEmergingText = subtitles[mid_word_idx][0]
                
                actualFadeInStartTime = fadeInStartTimeForEmergingText # Este é o ponto médio calculado
                fadeInFullyAppearsTime = actualFadeInStartTime + timedelta(seconds=FADE_IN_DURATION_NEXT_LINES)
                
                if current_time >= actualFadeInStartTime:
                    emergingLineShouldDraw[k] = True
                    if current_time < fadeInFullyAppearsTime and FADE_IN_DURATION_NEXT_LINES > 0:
                        timeInFadeIn = (current_time - actualFadeInStartTime).total_seconds()
                        fadeInProgress = min(timeInFadeIn / FADE_IN_DURATION_NEXT_LINES, 1.0)
                        emergingLineAlphas[k] = int(group_alpha * fadeInProgress)
                    else: 
                        emergingLineAlphas[k] = group_alpha 
    # FIM DA LÓGICA DE TRANSIÇÃO PARA CADA SLOT

    # Loop de DESENHO: iterar sobre cada slot de display
    for display_slot_idx in range(NUM_FRASES_GRUPO):
        # FASE 1: Desenhar linhas da SEGUNDA METADE do GRUPO ANTERIOR persistindo 
        # nos slots da SEGUNDA METADE do grupo atual, enquanto as linhas da PRIMEIRA METADE do grupo ATUAL aparecem.
        # Ex: NUM_FRASES_GRUPO = 4. Slots 2,3 (segunda metade).
        # prev_group[2] (em slot 2) some qdo current_group[0] (primeira metade) aparece.
        # prev_group[3] (em slot 3) some qdo current_group[1] (primeira metade) aparece.
        
        # Condição para aplicar a lógica de linhas persistentes (lingering lines)
        # Só se aplica se houver um grupo anterior e para os slots da segunda metade do display
        if prev_pair_sentences_list and display_slot_idx >= (NUM_FRASES_GRUPO // 2):
            
            lingeringLineIndexInPrevGroup = display_slot_idx # A linha do grupo anterior que estaria neste slot
           
            # O gatilho do fade-out da linha antiga é uma linha da PRIMEIRA METADE do grupo ATUAL
            # Ex: N=4, N//2=2. Slot 2 (idx 2), trigger é current[2-2=0]. Slot 3 (idx 3), trigger é current[3-2=1].
            # lingeringLineFadeOutTriggerIndexInCurrentGroup é o índice da linha que dispara o fade-out
            lingeringLineFadeOutTriggerIndexInCurrentGroup = display_slot_idx - (NUM_FRASES_GRUPO // 2)

            if (lingeringLineIndexInPrevGroup < len(prev_pair_sentences_list) and
                    prev_pair_positions_list and lingeringLineIndexInPrevGroup < len(prev_pair_positions_list) and
                    lingeringLineFadeOutTriggerIndexInCurrentGroup < len(current_pair_sentences)):

                lingeringLineFadeOutTriggerSentence = current_pair_sentences[lingeringLineFadeOutTriggerIndexInCurrentGroup]
                lingeringLineFadeOut_StartTime = lingeringLineFadeOutTriggerSentence["start_time"]

                lingeringLineFadeOutTrigger_EffectiveEndTime = None
                if lingeringLineFadeOutTriggerIndexInCurrentGroup + 1 < len(current_pair_sentences):
                    lingeringLineFadeOutTrigger_EffectiveEndTime = current_pair_sentences[lingeringLineFadeOutTriggerIndexInCurrentGroup + 1]["start_time"]
                else: 
                    lingeringLineFadeOutTrigger_EffectiveEndTime = pair_data["end_time"] 

                if lingeringLineFadeOutTrigger_EffectiveEndTime <= lingeringLineFadeOut_StartTime:
                    lingeringLineFadeOutTrigger_EffectiveEndTime = lingeringLineFadeOut_StartTime + timedelta(seconds=0.1) 

                # Fade-out da linha antiga termina na METADE da duração da sentença gatilho do grupo ATUAL
                lingeringLineFadeOut_EndTime = lingeringLineFadeOut_StartTime + (lingeringLineFadeOutTrigger_EffectiveEndTime - lingeringLineFadeOut_StartTime) / 2

                if current_time < lingeringLineFadeOut_EndTime:
                    lingeringLineFadeOut_Duration = (lingeringLineFadeOut_EndTime - lingeringLineFadeOut_StartTime).total_seconds()
                    lingeringLineFadeOut_Progress = 1.0 
                    if lingeringLineFadeOut_Duration > 0:
                        lingeringLineTimeInFadeOut = (current_time - lingeringLineFadeOut_StartTime).total_seconds()
                        lingeringLineFadeOut_Progress = max(0.0, min(1.0, lingeringLineTimeInFadeOut / lingeringLineFadeOut_Duration))
                    
                    lingeringLineAlphaValue = int(group_alpha * (1.0 - lingeringLineFadeOut_Progress))

                    if lingeringLineAlphaValue > 5: 
                        lingeringSentenceObject = prev_pair_sentences_list[lingeringLineIndexInPrevGroup]
                        lingeringSentenceLinePositions = prev_pair_positions_list[lingeringLineIndexInPrevGroup]
                        
                        for line_info in lingeringSentenceLinePositions:
                            # x, y, font, _width, line_words, line_word_indices = line_info
                            # Agora o terceiro elemento é font_params
                            x_l, y_l, font_params_l, _width_l, line_words_l, line_word_indices_l = line_info
                            draw_current_line_text(
                                image, lingeringSentenceObject, x_l, y_l, font_params_l, # Passa font_params
                                current_time, subtitles, line_words_l, line_word_indices_l,
                                alpha=lingeringLineAlphaValue
                            )
                        continue      # Slot resolvido com linha antiga, passar para o próximo display_slot_idx
        # -------------  FIM FASE 1  -------------

        # FASE 2: Lógica de desenho padrão (slots da primeira metade, ou slots da segunda metade se FASE 1 não continuou)
        finalSentenceToDraw = None
        finalLinePositionsToUse = None
        finalAlphaToUse = 0
        isDrawingEmergingLine = False
        staticColorForEmergingLine = None
        finalFontParamsToUse = None # Adicionado para armazenar os parâmetros da fonte

        # Tentar desenhar linha EMERGENTE (do PRÓXIMO grupo) se deve aparecer e tem alfa > 0
        if emergingLineShouldDraw[display_slot_idx] and emergingLineAlphas[display_slot_idx] > 0:
            if next_pair_sentences_list and display_slot_idx < len(next_pair_sentences_list):
                if current_pair_index + 1 < len(sentence_positions):
                    next_pair_positions_list = sentence_positions[current_pair_index + 1]
                    if display_slot_idx < len(next_pair_positions_list):
                        finalSentenceToDraw = next_pair_sentences_list[display_slot_idx]
                        # A linha de posições já contém font_params
                        finalLinePositionsToUse = next_pair_positions_list[display_slot_idx]
                        # Extrair font_params da primeira linha (assumindo que todas as linhas em line_info usam os mesmos params)
                        if finalLinePositionsToUse:
                            finalFontParamsToUse = finalLinePositionsToUse[0][2] # O terceiro elemento é font_params

                        isDrawingEmergingLine = True
                        staticColorForEmergingLine = FONT_COLOR
                        finalAlphaToUse = emergingLineAlphas[display_slot_idx]
        
        # Se não for desenhar linha emergente, tentar desenhar linha do grupo ATUAL
        if not finalSentenceToDraw and currentGroupLineShouldDraw[display_slot_idx] and currentGroupLineAlphas[display_slot_idx] > 0:
             if display_slot_idx < len(current_pair_sentences) and display_slot_idx < len(current_positions_for_pair):
                finalSentenceToDraw = current_pair_sentences[display_slot_idx]
                finalLinePositionsToUse = current_positions_for_pair[display_slot_idx]
                if finalLinePositionsToUse:
                    finalFontParamsToUse = finalLinePositionsToUse[0][2] # O terceiro elemento é font_params

                finalAlphaToUse = currentGroupLineAlphas[display_slot_idx]
                isDrawingEmergingLine = False
                staticColorForEmergingLine = None

        if finalSentenceToDraw and finalLinePositionsToUse and finalFontParamsToUse and finalAlphaToUse > 5:
            for line_info in finalLinePositionsToUse:
                # x, y, font, _width, line_words, line_word_indices = line_info
                # Agora o terceiro elemento é font_params, que já pegamos em finalFontParamsToUse
                x_f, y_f, _font_params_in_line_info, _width_f, line_words_f, line_word_indices_f = line_info
                
                if isDrawingEmergingLine and staticColorForEmergingLine:
                    text_to_draw_static = ' '.join(line_words_f)
                    # Passa finalFontParamsToUse para draw_text_with_alpha
                    draw_text_with_alpha(image, text_to_draw_static, x_f, y_f, finalFontParamsToUse, staticColorForEmergingLine, alpha=finalAlphaToUse)
                else:
                    # Passa finalFontParamsToUse para draw_current_line_text
                    draw_current_line_text(
                        image, finalSentenceToDraw, x_f, y_f, finalFontParamsToUse,
                        current_time, subtitles, line_words_f, line_word_indices_f,
                        alpha=finalAlphaToUse
                    )

elif subtitles_visible and paired_sentences and current_time < paired_sentences[0]["start_time"]:

    next_pair_sents = paired_sentences[0]["sentences"]
    next_positions_list = sentence_positions[0]
    next_pair_display_start_time = paired_sentences[0]["start_time"] - timedelta(seconds=TRANSITION_DURATION_GAPS)

    if next_pair_display_start_time <= current_time:
        opacity = 255
        # Opcional: Adicionar um fade-in gradual para o primeiro grupo se TRANSITION_DURATION_GAPS > 0
        if TRANSITION_DURATION_GAPS > 0:
            time_into_transition = (current_time - next_pair_display_start_time).total_seconds()
            opacity_progress = min(time_into_transition / TRANSITION_DURATION_GAPS, 1.0)
            opacity = int(255 * opacity_progress)


        for idx, (sentence_data, line_positions_for_sentence) in enumerate(zip(next_pair_sents, next_positions_list)):
            if idx >= NUM_FRASES_GRUPO: break # Não desenhar mais linhas do que o grupo permite
            for line_info in line_positions_for_sentence: # line_info é (x, y, font_params, width, words, indices)
                x_n, y_n, font_params_n, _width_n, line_words_n, _line_word_indices_n = line_info
                text = ' '.join(line_words_n)
                # font_params_n já é (path, size), draw_text_with_alpha recria a fonte
                draw_text_with_alpha(image, text, x_n, y_n, font_params_n, FONT_COLOR, alpha=opacity)

return image.convert("RGB")

def create_video(frame_folder, output_video, audio_file):
"""Cria um vídeo a partir dos frames gerados e adiciona áudio."""
frame_pattern = os.path.join(frame_folder, "frame_%04d.jpg")
duration = get_audio_duration(audio_file)

text
command = [
    "ffmpeg",
    "-y",
    "-framerate", str(FPS),
    "-i", frame_pattern,
    "-i", audio_file,
    "-c:v", "h264_nvenc",
    "-preset", "fast",
    "-b:v", "8M",
    "-profile:v", "high",
    "-c:a", "aac",
    "-profile:a", "aac_low",
    "-b:a", "448k",
    "-shortest",
    output_video,
]
subprocess.run(command, check=True)
print(f"Vídeo criado com sucesso.")

def generate_frames_chunk(args):
"""
Processa um conjunto de frames do vídeo.
Função usada para processamento paralelo.
"""
(
start_frame,
end_frame,
subtitles,
paired_sentences,
gaps,
output_dir,
num_frames,
sentence_positions,
progress_queue,
audio_duration,
fps,
is_background_video,
current_background_frames_dir,
num_actual_background_frames
) = args

text
for frame_index in range(start_frame, end_frame):
    try:
        image = process_frame(
            frame_index,
            subtitles,
            paired_sentences,
            gaps,
            num_frames,
            sentence_positions,
            audio_duration,
            fps,
            is_background_video,
            current_background_frames_dir,
            num_actual_background_frames
        )
        frame_filename = os.path.join(output_dir, f"frame_{frame_index:04d}.jpg")
        image.save(frame_filename, quality=95)
        progress_queue.put(1)
    except Exception as e:
        # O erro individual do frame não é mais impresso aqui,
        # pois a exceção será capturada e impressa pelo results.get() no processo pai.
        progress_queue.put(1)
        raise # Propaga a exceção para o processo pai

def generate_frames_parallel(subtitles, paired_sentences, gaps, output_dir, sentence_positions, audio_duration, is_background_video, current_background_frames_dir, num_actual_background_frames):
"""
Gera todos os frames do vídeo em paralelo, usando um número controlado de núcleos da CPU.
"""
os.makedirs(output_dir, exist_ok=True)

text
total_duration = audio_duration
num_frames = int(total_duration * FPS) + 1

num_processes = multiprocessing.cpu_count()
if num_processes == 0:
    num_processes = 1
elif num_processes > 8:
    num_processes = 8
# O print sobre o número de processos foi removido.

chunk_size = num_frames // num_processes
if num_frames % num_processes != 0: # Garante que todos os frames sejam cobertos
    chunk_size +=1

manager = multiprocessing.Manager()
progress_queue = manager.Queue()

args_list = [
    (
        i * chunk_size,
        min((i + 1) * chunk_size, num_frames),
        subtitles,
        paired_sentences,
        gaps,
        output_dir,
        num_frames,
        sentence_positions,
        progress_queue,
        audio_duration,
        FPS,
        is_background_video,
        current_background_frames_dir,
        num_actual_background_frames
    )
    for i in range(num_processes) if i * chunk_size < num_frames # Garante que não criemos chunks vazios
]

# Se args_list estiver vazio (ex: num_frames = 0), não faz nada
if not args_list:
    print("Nenhum frame para gerar.")
    return

with multiprocessing.Pool(processes=num_processes) as pool:
    with tqdm(total=num_frames, unit="frame", desc="Gerando frames") as pbar:
        # Usar imap_unordered pode ser um pouco mais eficiente para jobs de tamanhos variados
        # e permite que a barra de progresso seja atualizada assim que um chunk termina.
        # No entanto, generate_frames_chunk já lida com a queue.
        # Vamos manter map_async por enquanto, pois a lógica de atualização da pbar já existe.
        results = pool.map_async(generate_frames_chunk, args_list)

        while not results.ready():
            frames_processed_now = 0
            while not progress_queue.empty():
                try:
                    progress_queue.get_nowait() # Usar get_nowait para não bloquear
                    frames_processed_now += 1
                except Exception: # Tratar Empty exception se a fila estiver vazia entre empty() e get()
                    break
            if frames_processed_now > 0:
                pbar.update(frames_processed_now)
            time.sleep(0.1) # Pequena pausa para não sobrecarregar o loop

        # Certificar-se de que todos os itens restantes na fila são processados para a pbar
        frames_processed_now = 0
        while not progress_queue.empty():
            try:
                progress_queue.get_nowait()
                frames_processed_now += 1
            except Exception:
                break
        if frames_processed_now > 0:
            pbar.update(frames_processed_now)
        
        # Chamar results.get() para propagar quaisquer exceções que possam ter ocorrido nos processos filhos
        try:
            results.get()
        except Exception as e:
            print(f"Erro durante o processamento paralelo dos frames: {e}")
            traceback.print_exc() # Imprime o traceback completo do erro no pool

expected_frames = num_frames
# Verifica os frames realmente gerados no diretório
# É importante que generate_frames_chunk salve os arquivos corretamente
generated_frames_files = [f for f in os.listdir(output_dir) if f.startswith("frame_") and f.endswith(".jpg")]
generated_frames = len(generated_frames_files)

if generated_frames < expected_frames:
    print(f"Aviso: Apenas {generated_frames} de {expected_frames} frames foram gerados.")
else:
    print(f"Todos os {expected_frames} frames foram gerados com sucesso.")

def main():
"""Função principal que coordena todo o processo de criação do vídeo."""
try:

text
    audio_file = "audio.wav"
    subtitle_file = "legenda.srt"
    
    frames_dir_name = "frames"

    abs_frames_dir = os.path.abspath(frames_dir_name)
    abs_background_frames_dir = os.path.abspath(BACKGROUND_FRAMES_DIR)

    video_extensions = ('.mp4', '.avi', '.mov', '.mkv', '.webm')
    is_background_video = BACKGROUND_SOURCE.lower().endswith(video_extensions)
    num_actual_background_frames = 0

    print("Processando áudio e legendas...")
    audio_duration = get_audio_duration(audio_file)
    subtitles, paired_sentences, gaps = read_subtitles(subtitle_file, audio_duration)

    if not subtitles or not paired_sentences:
        raise ValueError("Nenhuma legenda ou sentença foi processada.")

    if os.path.exists(abs_frames_dir):
        shutil.rmtree(abs_frames_dir)
    os.makedirs(abs_frames_dir, exist_ok=True)

    if is_background_video:
        if os.path.exists(abs_background_frames_dir):
            shutil.rmtree(abs_background_frames_dir)

        extract_background_frames(BACKGROUND_SOURCE, abs_background_frames_dir, IMAGE_SIZE, FPS)
        try:

            bg_files = [name for name in os.listdir(abs_background_frames_dir) 
                        if os.path.isfile(os.path.join(abs_background_frames_dir, name)) and 
                           name.startswith("bg_frame_") and name.endswith(".jpg")]
            num_actual_background_frames = len(bg_files)
            if num_actual_background_frames == 0:
                print(f"Aviso: Nenhum frame de fundo (bg_frame_*.jpg) foi encontrado em '{abs_background_frames_dir}' após a extração. O vídeo usará um fundo preto se o fundo for um vídeo.")
            else:
                print(f"Contagem: {num_actual_background_frames} frames de fundo utilizáveis em '{abs_background_frames_dir}'.")
        except FileNotFoundError:
            print(f"Aviso: Diretório de frames de fundo '{abs_background_frames_dir}' não encontrado após tentativa de extração. O vídeo usará um fundo preto se o fundo for um vídeo.")
            num_actual_background_frames = 0
    
    print("Calculando posições das legendas...")
    sentence_positions = calculate_sentence_positions(paired_sentences)

    print("Gerando frames do vídeo...")
    generate_frames_parallel(
        subtitles,
        paired_sentences,
        gaps,
        abs_frames_dir,
        sentence_positions,
        audio_duration,
        is_background_video,
        abs_background_frames_dir,
        num_actual_background_frames
    )

    print("Criando vídeo final...")
    create_video(abs_frames_dir, "output_video.mp4", audio_file)


    print("Limpando arquivos temporários...")
    shutil.rmtree(abs_frames_dir)
    if is_background_video and os.path.exists(abs_background_frames_dir):
         shutil.rmtree(abs_background_frames_dir)

    print("Processamento concluído com sucesso!")

except Exception as e:
    print(f"Erro durante a execução do programa: {str(e)}")
    traceback.print_exc()

if name == "main":
main()

Answer 1

O texto fica “borrado” / “pixelado” porque ele sofre duas etapas de compressão com perdas:

JPEG nos frames individuais (image.save(..., quality=95)),
H-264 4:2:0 a 8 Mb/s no vídeo final.

Para texto e gradientes fortes (vermelho sobre preto) isso é letal: a sub-amostragem de cor 4:2:0 e os blocos JPEG “comem” bordas finas e fazem as letras parecerem desfocadas.

1 – Salve os frames em PNG (lossless)

python
# --- generate_frames_chunk ---------------------------------
frame_filename = os.path.join(output_dir, f"frame_{frame_index:04d}.png")
# PNG não precisa de 'quality'
image.save(frame_filename, format="PNG", compress_level=0)

⚠️ Altere também a extensão em generate_frames_parallel (contagem de arquivos) e em create_video.

2 – Ajuste o `create_video` para usar PNG + H-264 em 4:4:4 (ou bitrate maior)

python
def create_video(frame_folder, output_video, audio_file):
    frame_pattern = os.path.join(frame_folder, "frame_%04d.png")

    command = [
        "ffmpeg", "-y",
        "-framerate", str(FPS),
        "-i", frame_pattern,
        "-i", audio_file,

        # ► CPU (libx264) – qualidade visual excelente
        "-c:v", "libx264",
        "-preset", "slow",
        "-crf", "18",              # ~visualmente lossless
        "-pix_fmt", "yuv444p",     # evita borrões de cor no texto

        # Se preferir NVENC:
        # "-c:v", "h264_nvenc", "-preset", "p7",
        # "-rc", "vbr_hq", "-cq", "19", "-b:v", "0",
        # "-pix_fmt", "yuv444p", "-profile:v", "high444p",

        "-c:a", "aac", "-b:a", "192k",
        "-shortest", output_video,
    ]
    subprocess.run(command, check=True)

yuv444p preserva toda a informação de cor (não há cromasubsampling).
Se seu player não aceitar 4:4:4, mantenha yuv420p mas use bitrate/crf menor (‐crf = 16 – 18 ou bitrate ≥ 16 Mb/s).

3 – Outros toques que ajudam

Ponto	Ajuste recomendado
Bitrate NVENC	Para 1080p 60 fps texto intenso: 15 – 24 Mb/s (`-b:v 0 -cq 19` no modo `vbr_hq`).
Subpixel cropping	O `padding_x_compensation = 20` compensa a margem; deixe igual ao `padding_x` usado na função `create_gradient_word` para não cortar antialias.
Fonte	Confirme que `FiraSansCompressed-Bold.otf` está realmente sendo carregada (caso contrário o fallback Arial pode parecer mais “gordo” e borrar).

Resumo

Troque JPEG → PNG nos frames: nada de perda antes da codificação.
Encodifique o vídeo com CRF 18 (ou bitrate alto) e, se possível, 4:4:4.
Mantenha o resto do pipeline igual; essas duas mudanças já deixam as letras nítidas mesmo com gradientes vermelhos.

Depois de aplicar as alterações, gere novamente e confira: o texto deve ficar perfeitamente limpo.

Question