import os import time from selenium import webdriv...

Question

import os
import time
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import shutil
import threading
import subprocess
import json
import zipfile
import rarfile
import tempfile
from concurrent.futures import ThreadPoolExecutor, as_completed
from selenium.common.exceptions import TimeoutException

global driver
global current_account_index

ACCOUNTS = [
{"email": "[email protected]", "password": "admins123"},
]
current_account_index = 0

def extract_archives(folder_path):
for file in os.listdir(folder_path):
full_path = os.path.join(folder_path, file)
if not os.path.isfile(full_path):
continue

text
    if file.lower().endswith('.zip'):
        with zipfile.ZipFile(full_path, 'r') as zip_ref:
            extract_path = os.path.join(folder_path, f"extracted_{os.path.splitext(file)[0]}")
            os.makedirs(extract_path, exist_ok=True)
            zip_ref.extractall(extract_path)
            print(f"📦 Распакован ZIP: {file} &rarr; {extract_path}")
        os.remove(full_path)

    elif file.lower().endswith('.rar'):
        with rarfile.RarFile(full_path, 'r') as rar_ref:
            extract_path = os.path.join(folder_path, f"extracted_{os.path.splitext(file)[0]}")
            os.makedirs(extract_path, exist_ok=True)
            rar_ref.extractall(extract_path)
            print(f"📦 Распакован RAR: {file} &rarr; {extract_path}")
        os.remove(full_path)

# Удаляем все пустые папки после распаковки
for root, dirs, files in os.walk(folder_path, topdown=False):
    for dir_name in dirs:
        dir_path = os.path.join(root, dir_name)
        try:
            if not os.listdir(dir_path):
                os.rmdir(dir_path)
                print(f"🗑 Удалена пустая папка: {dir_path}")
        except OSError:
            pass

def collect_video_files(root_folder):
video_files = []
for dirpath, _, filenames in os.walk(root_folder):
if "FINAL" in dirpath:
continue # ⛔️ Пропускаем папку FINAL
for file in filenames:
if file.lower().endswith(('.mp4', '.mov', '.mkv', '.qt')):
video_files.append(os.path.join(dirpath, file))
return video_files

def process_videos_in_background(folder_path):
try:
extract_archives(folder_path)

text
    # Удаляем все файлы кроме видео
    for root, _, files in os.walk(folder_path):
        for file in files:
            if not file.lower().endswith(('.mp4', '.mov', '.mkv', '.qt')):
                file_path = os.path.join(root, file)
                os.remove(file_path)
                print(f"🗑 Удален лишний файл: {file_path}")

    processor_script = r"C:\GENERATOR\PYTHON_FILES\ENVATO_RENDER.py"

    # Запускаем процесс с unbuffered выводом
    process = subprocess.Popen(
        ['python', '-u', processor_script, folder_path],
        stdout=subprocess.PIPE,
        stderr=subprocess.STDOUT,
        universal_newlines=True,
        encoding='utf-8',
        errors='replace',
        creationflags=subprocess.CREATE_NEW_PROCESS_GROUP
    )

    # Мониторим вывод в реальном времени
    completion_flag = False
    while True:
        output_line = process.stdout.readline()
        if not output_line and process.poll() is not None:
            break
        if output_line:
            print(f"RENDER: {output_line.strip()}")
            if "🎉 Всё готово! Можно проверять папку FINAL" in output_line:
                completion_flag = True
                break

    # Проверяем статус завершения
    if completion_flag:
        print("✅ Рендеринг успешно завершен")
    else:
        print("⚠️ Рендеринг завершился без сигнала успеха")

except Exception as e:
    print(f"⚠️ Ошибка в обработке видео: {e}")
finally:
    # Гарантируем завершение процесса
    if process.poll() is None:
        process.terminate()
        process.wait()

def wait_for_downloads_browser_based(driver, batch_name, download_folder, timeout=1800):
print("🧭 Открываем chrome://downloads/ в новой вкладке")

text
original_window = driver.current_window_handle
driver.execute_script("window.open('');")
WebDriverWait(driver, 10).until(lambda d: len(d.window_handles) > 1)

new_tab = [handle for handle in driver.window_handles if handle != original_window][0]
driver.switch_to.window(new_tab)
driver.get("chrome://downloads/")
time.sleep(10)
print("⏳ Ожидаем завершения загрузок...")

def ensure_downloads_tab():
    time.sleep(10)
    if driver.current_window_handle != new_tab:
        print("🔁 Chrome открыл другую вкладку — возвращаемся на downloads")
        try:
            driver.switch_to.window(new_tab)
        except Exception as e:
            print(f"⚠️ Не удалось переключиться обратно: {e}")

threading.Thread(target=ensure_downloads_tab, daemon=True).start()

start_time = time.time()
stable_iterations = 0

while time.time() - start_time < timeout:
    try:
        items = driver.execute_script("""
            const manager = document.querySelector('downloads-manager');
            const items = manager.shadowRoot.querySelector('#downloadsList').items;
            return items ? JSON.stringify(items.map(item => ({
                fileName: item.fileName,
                state: item.state,
                percent: item.percent
            }))) : null;
        """)
        if not items:
            print("📭 Список загрузок пуст или не найден.")
            time.sleep(2)
            continue

        downloads = json.loads(items)

        print(f"\n📦 Найдено {len(downloads)} загрузок:")
        for d in downloads:
            state_str = {
                0: "⬇️ В процессе",
                1: "❌ Отменено",
                2: "✅ Завершено",
                3: "⚠️ Прервано"
            }.get(d["state"], f"❓ Неизвестно ({d['state']})")

            percent = f"{d['percent']}%" if d["state"] == 0 else "—"
            print(f"  ├─ 📄 {d['fileName']} — {state_str} — {percent}")

        in_progress = [d for d in downloads if d["state"] == 0]
        if not in_progress:
            stable_iterations += 1
            if stable_iterations >= 3:
                print("✅ Все загрузки завершены (стабильное состояние)")

                try:
                    driver.close()
                    driver.switch_to.window(original_window)
                    print("🔙 Вернулись на основную вкладку и закрыли downloads")
                except Exception as e:
                    print(f"⚠️ Ошибка при закрытии вкладки или переключении: {e}")

                # 🔽 Начинаем файловое отслеживание и перемещение
                print("📂 Проверяем загруженные файлы в папке...")
                time.sleep(3)  # дать системе записать файлы

                existing_files = os.listdir(download_folder)
                completed_files = [
                    f for f in existing_files
                    if f.lower().endswith(('.mp4', '.mov', '.mkv', '.zip', '.rar', '.qt')) and not f.endswith('.crdownload')
                ]

                if completed_files:
                    batch_folder = os.path.join(download_folder, batch_name)
                    os.makedirs(batch_folder, exist_ok=True)

                    for file_name in completed_files:
                        src = os.path.join(download_folder, file_name)
                        dst = os.path.join(batch_folder, file_name)
                        if os.path.exists(src):
                            shutil.move(src, dst)
                            print(f"📁 Перемещён: {file_name} &rarr; {batch_folder}")
                    return True
                else:
                    print("⚠️ Не удалось найти завершённые файлы после загрузки.")

                return False
        else:
            stable_iterations = 0

    except Exception as e:
        print(f"⚠️ Ошибка при получении данных: {e}")

    time.sleep(2)

else:
    print("⏱ Время ожидания вышло. Некоторые файлы, возможно, не догрузились.")

try:
    driver.close()
    driver.switch_to.window(original_window)
    print("🔙 Вернулись на основную вкладку")
except Exception as e:
    print(f"⚠️ Ошибка при закрытии вкладки или переключении: {e}")

return False

def wait_for_downloads(download_folder, batch_name, expected_count=3, max_wait=60, poll_interval=3):
print("⏳ Ожидание загрузки файлов...")

text
start_time = time.time()
existing_files = set(os.listdir(download_folder))
tracked_files = set()

# 1. Ожидаем появления новых файлов (до expected_count)
while time.time() - start_time < max_wait:
    current_files = set(os.listdir(download_folder))
    new_files = current_files - existing_files

    # Только завершённые файлы (не .crdownload)
    completed = {f for f in new_files if not f.endswith('.crdownload')}
    if completed:
        print(f"📥 Завершенные загрузки: {list(completed)}")
    tracked_files.update(completed)

    if len(tracked_files) >= expected_count:
        break

    print(f"🔄 Ожидаем ещё... Сейчас завершено: {len(tracked_files)}")
    time.sleep(poll_interval)

if not tracked_files:
    print("⚠️ Не удалось отследить завершенные загрузки.")
    return []

print("⌛ Убеждаемся, что файлы не увеличиваются (стабилизировались)...")
time.sleep(3)  # немного подождать на всякий случай

# Перемещение завершённых файлов
batch_folder = os.path.join(download_folder, batch_name)
os.makedirs(batch_folder, exist_ok=True)
for file_name in tracked_files:
    src = os.path.join(download_folder, file_name)
    dst = os.path.join(batch_folder, file_name)
    if os.path.exists(src):
        shutil.move(src, dst)
        print(f"📁 Перемещён: {file_name} &rarr; {batch_folder}")

return list(tracked_files)

def process_bulk_downloads(driver):
global current_account_index

text
envato_folder = r"C:\ENVATO"
download_folder = r"C:\ENVATO_DOWNLOAD"
txt_files = [os.path.join(envato_folder, f) for f in os.listdir(envato_folder) if f.endswith('.txt')]

while True:
    try:
        # Открытие страницы
        try:
            driver.get("https://www.filesta.com/envato")
        except TimeoutException as e:
            print(f"Страница не загрузилась за 10 секунд:")
            driver.execute_script("window.stop();")  # <<< Останавливает загрузку вкладки

        # Ожидание загрузки лимита
        WebDriverWait(driver, 20).until(
            EC.presence_of_element_located((By.ID, "remainingLimit"))
        )

        # Чтение лимита
        limit_element = driver.find_element(By.ID, "remainingLimit")
        remaining_limit = int(limit_element.text.split('/')[0].strip())
        print(f"🔄 Текущий лимит: {remaining_limit}")

        if remaining_limit <= 0:
            print("⚠️ Лимит исчерпан! Переключаем аккаунт...")

            # Переключение аккаунта
            current_account_index = (current_account_index + 1) % len(ACCOUNTS)
            new_account = ACCOUNTS[current_account_index]

            driver.quit()
            driver = login(new_account)

            # После смены аккаунта — продолжаем цикл, чтобы снова проверить лимит
            continue

        # Лимит нормальный — выходим из цикла
        break

    except Exception as e:
        print(f"⚠️ Ошибка проверки лимита: {e}")
        break  # или можно retry, по желанию

# Основной цикл обработки файлов
for file_path in txt_files:
    batch_name = os.path.splitext(os.path.basename(file_path))[0]
    print(f"\n📄 Обработка файла: {file_path}")
    with open(file_path, "r", encoding="utf-8") as f:
        links = [line.strip() for line in f if line.strip()]

    batch_folder = os.path.join(download_folder, batch_name)
    os.makedirs(batch_folder, exist_ok=True)

    while links:
        # Проверка лимита перед каждой пачкой
        try:
            try:
                driver.get("https://www.filesta.com/envato")
            except TimeoutException as e:
                print(f"Страница не загрузилась за 10 секунд")
                driver.execute_script("window.stop();")  # <<< Останавливает загрузку вкладки

            limit_element = driver.find_element(By.ID, "remainingLimit")
            remaining_limit = int(limit_element.text.split('/')[0].strip())
            if remaining_limit <= 0:
                raise Exception("Лимит исчерпан во время обработки")
        except Exception as e:
            print(f"⚠️ {e}")
            current_account_index = (current_account_index + 1) % len(ACCOUNTS)
            new_account = ACCOUNTS[current_account_index]
            driver.quit()
            driver = login(new_account)
            continue

        batch = links[:3]
        if not batch:
            break

        # Работа с веб-интерфейсом
        try:
            print("🔄 Обновление страницы и клик по 'Массовое скачивание'")
            driver.set_page_load_timeout(20)
            try:
                driver.get("https://www.filesta.com/envato")
            except TimeoutException as e:
                print(f"Страница не загрузилась за 10 секунд")
                driver.execute_script("window.stop();")  # <<< Останавливает загрузку вкладки

            bulk_download_toggle = WebDriverWait(driver, 30).until(
                EC.presence_of_element_located(
                    (By.CSS_SELECTOR, "div.card-header[data-target='#bulkDownloadArea']")))

            driver.execute_script("arguments[0].scrollIntoView({ behavior: 'smooth', block: 'center' });",
                                  bulk_download_toggle)
            driver.execute_script("window.scrollBy(0, 200);")
            bulk_download_toggle.click()
            time.sleep(2)

            # Ввод ссылок
            textarea = WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.ID, "bulkLinks")))
            textarea.clear()
            textarea.send_keys("\n".join(batch))
            print(f"➡️ Добавлены ссылки: {batch}")

            # Запуск скачивания
            download_button = WebDriverWait(driver, 10).until(
                EC.element_to_be_clickable((By.ID, "startBulkDownload")))
            download_button.click()
            print("⬇️ Начато скачивание...")
            time.sleep(5)

            # Ожидание завершения
            finished = wait_for_downloads_browser_based(driver, batch_name=batch_name,
                                                        download_folder=download_folder)

            # Обновление списка ссылок
            links = links[3:]
            with open(file_path, "w", encoding="utf-8") as f:
                f.write("\n".join(links))
            print("🧹 Ссылки удалены из .txt")

        except TimeoutException:
            print("❌ Время загрузки страницы превысило 20 секунд.")
            driver.execute_script("window.stop();")
            continue
        except Exception as e:
            print(f"⚠️ Критическая ошибка: {e}")
            print("🔁 Повтор итерации из-за ошибки...")
            time.sleep(3)  # Пауза перед повтором (по желанию)
            continue

    # Запуск обработки видео
    if os.path.exists(batch_folder) and os.listdir(batch_folder):
        print(f"🚀 Все ссылки из {file_path} обработаны. Запуск обработки видео.")
        process_videos_in_background(batch_folder)
    else:
        print(f"⚠️ Папка {batch_folder} пуста, обработка видео пропущена.")

    print("⚡ Переход к следующему .txt файлу")

print("🔥 Все файлы обработаны!")

def login(account):
global current_account_index
extension_path = r"C:\GENERATOR\PYTHON_FILES\zencap"
download_folder = r"C:\ENVATO_DOWNLOAD"
proxy = "127.0.0.1:1081"

text
chrome_options = Options()
chrome_options.add_argument(f"--load-extension={extension_path}")
chrome_options.add_experimental_option("prefs", {
    "download.default_directory": download_folder,
    "download.prompt_for_download": False,
    "download.directory_upgrade": True,
    "safebrowsing.enabled": True,
    "profile.content_settings.exceptions.automatic_downloads.*.setting": 1,
    "profile.default_content_setting_values.automatic_downloads": 1,
    "credentials_enable_service": False,
    "profile.password_manager_enabled": False,
    "profile.default_content_settings.popups": 0
})

service = Service()
driver = webdriver.Chrome(service=service, options=chrome_options)
driver.set_page_load_timeout(10)  # Максимальное время загрузки страницы — 10 секунд

try:
    try:
        driver.get("https://www.filesta.com/login")
    except TimeoutException as e:
        print(f"Страница не загрузилась за 10 секунд")
        driver.execute_script("window.stop();")  # <<< Останавливает загрузку вкладки

    WebDriverWait(driver, 30).until(
        EC.presence_of_element_located((By.CSS_SELECTOR, "iframe[title='reCAPTCHA']"))
    )
    print("Ожидание прохождения капчи...")

    WebDriverWait(driver, 300).until(
        lambda d: d.execute_script("return document.getElementById('g-recaptcha-response').value") != ""
    )
    print("Капча пройдена!")

    driver.find_element(By.NAME, "InputEmail").send_keys(account['email'])
    time.sleep(0.5)
    driver.find_element(By.NAME, "InputPassword").send_keys(account['password'])
    time.sleep(0.5)
    driver.find_element(By.CSS_SELECTOR, "button.btn.btn-primary.btn-user.btn-block").click()
    WebDriverWait(driver, 10).until(EC.url_changes("https://www.filesta.com/login"))
    print("Вход выполнен!")

    time.sleep(3)
    return driver

except Exception as e:
    print(f"❌ Ошибка при логине: {e}")
    driver.quit()
    current_account_index = (current_account_index + 1) % len(ACCOUNTS)
    raise

def main_loop():
global current_account_index
while True:
try:
print(f"\n🔁 Используем аккаунт: {ACCOUNTS[current_account_index]['email']}")
driver = login(ACCOUNTS[current_account_index])
process_bulk_downloads(driver)
print("✅ Загрузка завершена")
break
except Exception as e:
print(f"⚠️ Ошибка: {e}\n⏳ Перезапуск через 5 секунд...")
current_account_index = (current_account_index + 1) % len(ACCOUNTS)
time.sleep(5)
continue # Перезапуск

text
    finally:
        try:
            driver.quit()
            print("🧹 Браузер закрыт")
        except:
            pass

if name == "main":
main_loop()

помоги изменить этот код. мне нужно, чтобы работа с txt и папками была немного по-другому. теперь в каждом txt будут пункты: general waterfall и так далее. по краям отмечены звездочкой
нужно чтобы код находил этот пункт и скачивал файлы - после успешной скачки и распаковки файл должен перенестись в название папки из .txt и дочерняя папка название пункта
пример: C:\READY\Alberobello,_Italy\general
рендер запускай также после того, как уже весь .txt будет докачан. мой другой код сам отрендерит по папкам, ему нужна только главная папка.
ПИШИ ТОЛЬКО ПОЛНОСТЬЮ ФУНКЦИЮ

Answer 1

python
def process_bulk_downloads(driver):
    """
    Основной цикл: парсит *.txt с линками, разбитыми на секции вида *general*, *waterfall* …
    ─ скачивает файлы пачками по 3‑шт;
    ─ после каждой успешной загрузки и распаковки перемещает итоговые ВИДЕО‑файлы в  
      C:\READY\<имя‑txt>\<секция>;
    ─ в конце каждого .txt запускает рендер всего набора.
    Файл .txt &laquo;обрезается&raquo; по мере обработки, чтобы можно было возобновиться с места сбоя.
    """
    global current_account_index

    ENVATO_TXT_FOLDER   = r"C:\ENVATO"
    TMP_DOWNLOAD_FOLDER = r"C:\ENVATO_DOWNLOAD"
    READY_ROOT_FOLDER   = r"C:\READY"

    txt_files = [os.path.join(ENVATO_TXT_FOLDER, f)
                 for f in os.listdir(ENVATO_TXT_FOLDER) if f.lower().endswith(".txt")]

    # ────────────────────────────────────────────────────────────────────────────
    def _update_txt(path, sections_dict):
        """Перезаписать .txt, оставив только ещё не скачанные ссылки."""
        with open(path, "w", encoding="utf-8") as f_out:
            out_lines = []
            for sec, links in sections_dict.items():
                if not links:
                    continue
                out_lines.append(f"*{sec}*")
                out_lines.extend(links)
            f_out.write("\n".join(out_lines))

    # ────────────────────────────────────────────────────────────────────────────
    for txt_path in txt_files:
        batch_name = os.path.splitext(os.path.basename(txt_path))[0]

        # ── Читаем и парсим секции
        sections = {}
        current = None
        with open(txt_path, "r", encoding="utf-8") as f_in:
            for raw in f_in:
                line = raw.strip()
                if not line:
                    continue
                if line.startswith("*") and line.endswith("*") and len(line) > 2:
                    current = line.strip("*").strip()
                    sections[current] = []
                elif current:
                    sections[current].append(line)

        if not sections:
            print(f"⚠️ В {txt_path} нет секций/ссылок – пропуск.")
            continue

        print(f"\n📂 Обработка набора &laquo;{batch_name}&raquo; ({len(sections)} секций)")

        # ────────────────────────────────────────────────────────────────────
        for section_name, link_pool in sections.items():
            if not link_pool:
                continue

            ready_folder = os.path.join(READY_ROOT_FOLDER, batch_name, section_name)
            os.makedirs(ready_folder, exist_ok=True)
            print(f"\n⭐ Секция *{section_name}* — осталось ссылок: {len(link_pool)}")

            # ── Цикл пакетных скачиваний внутри секции
            while link_pool:
                # ── Проверка лимита
                try:
                    driver.get("https://www.filesta.com/envato")
                    limit_elem = WebDriverWait(driver, 20).until(
                        EC.presence_of_element_located((By.ID, "remainingLimit")))
                    remaining = int(limit_elem.text.split("/")[0].strip())
                    if remaining <= 0:
                        raise Exception("Лимит исчерпан")
                except Exception as e:
                    print(f"⚠️ {e} – переключаю аккаунт")
                    current_account_index = (current_account_index + 1) % len(ACCOUNTS)
                    driver.quit()
                    driver = login(ACCOUNTS[current_account_index])
                    continue  # пробуем снова

                batch_links = link_pool[:3]

                try:
                    # ── Вставляем ссылки в форму &laquo;Массовое скачивание&raquo;
                    driver.get("https://www.filesta.com/envato")
                    toggle = WebDriverWait(driver, 30).until(
                        EC.element_to_be_clickable(
                            (By.CSS_SELECTOR, "div.card-header[data-target='#bulkDownloadArea']")))
                    driver.execute_script(
                        "arguments[0].scrollIntoView({behavior:'smooth',block:'center'});", toggle)
                    driver.execute_script("window.scrollBy(0, 200);")
                    toggle.click()
                    textarea = WebDriverWait(driver, 10).until(
                        EC.presence_of_element_located((By.ID, "bulkLinks")))
                    textarea.clear()
                    textarea.send_keys("\n".join(batch_links))
                    WebDriverWait(driver, 10).until(
                        EC.element_to_be_clickable((By.ID, "startBulkDownload"))).click()

                    print(f"⬇️ Скачивание {len(batch_links)} файл(ов)…")

                    # ── Ждём завершения через chrome://downloads
                    tmp_folder_tag = f"{batch_name}_{section_name}_tmp"
                    ok = wait_for_downloads_browser_based(
                        driver,
                        batch_name=tmp_folder_tag,
                        download_folder=TMP_DOWNLOAD_FOLDER
                    )
                    if not ok:
                        raise Exception("Скачивание не подтвердило успех")

                    # ── Перемещаем во временную папку и распаковываем
                    tmp_folder_path = os.path.join(TMP_DOWNLOAD_FOLDER, tmp_folder_tag)
                    if not os.path.exists(tmp_folder_path):
                        raise Exception("Не найден временный каталог загрузки")

                    # Перенос в ready‑folder
                    for fname in os.listdir(tmp_folder_path):
                        shutil.move(
                            os.path.join(tmp_folder_path, fname),
                            os.path.join(ready_folder, fname)
                        )
                    shutil.rmtree(tmp_folder_path, ignore_errors=True)

                    # Распаковка, удаление мусора
                    extract_archives(ready_folder)
                    for root, _, files in os.walk(ready_folder):
                        for f in files:
                            if not f.lower().endswith(('.mp4', '.mov', '.mkv', '.qt')):
                                os.remove(os.path.join(root, f))

                    print("✅ Пакет успешно обработан")
                    # ── Удаляем обработанные ссылки из пула и обновляем .txt
                    link_pool = link_pool[3:]
                    sections[section_name] = link_pool
                    _update_txt(txt_path, sections)

                except Exception as ex:
                    print(f"⚠️ Ошибка внутри пакета: {ex}\n🔄 Повтор через 3 сек")
                    time.sleep(3)
                    continue  # повторим пакет

        # ───────────────────────────────────────────────
        # Когда ВСЕ секции .txt скачаны &rarr; рендер
        if all(not lst for lst in sections.values()):
            parent_ready = os.path.join(READY_ROOT_FOLDER, batch_name)
            print(f"\n🚀 Все секции {batch_name} готовы — запускаю рендер.")
            process_videos_in_background(parent_ready)
        else:
            print(f"⚠️ В {batch_name} остались необработанные ссылки – рендер пропущен")

    print("\n🔥 Все .txt обработаны!")

import os import time from selenium import webdriv...

Câu hỏi

Suy nghĩ

Modifying code for sections

Handling folder and render updates

Designing the function for section movement

Building the bulk download function

Trả lời

Chia sẻ Q&A này