import asyncio import aiohttp from bs4 import Beau...
Creado el: 13 de septiembre de 2025
Respondido usando GPT-5 Thinking por Chat01
Creado el: 13 de septiembre de 2025
Respondido usando GPT-5 Thinking por Chat01
import asyncio
import aiohttp
from bs4 import BeautifulSoup
import aiofiles
import logging
import json
import random
from googletrans import Translator # Import Google Translate library
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
TELEGRAM_BOT_TOKEN = '7772853489:AAHQ2mNe23HnqpBPfmQy5v7oCqAPpxrYWC4' # Insert your bot token
CHAT_ID = '5920264584' # Insert your chat_id
urls = [
'https://as.com/noticias/albacete-balompie/',
'https://as.com/noticias/ud-almeria/',
'https://as.com/noticias/burgos-cf/',
'https://as.com/noticias/cadiz-cf/',
'https://as.com/noticias/cartagena-fc/',
'https://as.com/noticias/cd-castellon/',
'https://as.com/noticias/cordoba-cf/',
'https://as.com/noticias/deportivo/',
'https://as.com/noticias/sd-eibar/',
'https://as.com/noticias/elche-cf/',
'https://as.com/noticias/club-deportivo-eldense/',
'https://as.com/noticias/granada-cf/',
'https://as.com/noticias/sd-huesca/',
'https://as.com/noticias/levante-ud/',
'https://as.com/noticias/malaga-cf/',
'https://as.com/noticias/cd-mirandes/',
'https://as.com/noticias/racing-ferrol/',
'https://as.com/noticias/racing-de-santander/',
'https://as.com/noticias/real-oviedo/',
'https://as.com/noticias/real-zaragoza/',
'https://as.com/noticias/sporting/',
'https://as.com/noticias/cd-tenerife/',
]
found_links_file = 'found_links2.json'
user_agents = [
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
' Chrome/70.0.3538.77 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
' Chrome/79.0.3945.117 Safari/537.36',
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko)'
' Chrome/44.0.2403.157 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_5)'
' AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.1.1 Safari/605.1.15',
# Add more User-Agents if necessary
]
translator = Translator()
async def load_found_links(file_name):
try:
async with aiofiles.open(file_name, 'r') as f:
data = await f.read()
return json.loads(data)
except FileNotFoundError:
logging.warning(f"File not found: {file_name}")
return []
async def save_found_links(file_name, found_links):
async with aiofiles.open(file_name, 'w') as f:
await f.write(json.dumps(found_links))
logging.info(f"Links saved to file: {file_name}")
async def send_message(text, url):
# Translate the message to Russian
translated_text = translator.translate(text, src='es', dest='ru').text # Translate from Spanish to Russian
message_text = f"<a href='{url}'>{translated_text}</a>"
send_url = f'https://api.telegram.org/bot{TELEGRAM_BOT_TOKEN}/sendMessage'
payload = {
'chat_id': CHAT_ID,
'text': message_text,
'parse_mode': 'HTML'
}
async with aiohttp.ClientSession() as session:
async with session.post(send_url, data=payload) as response:
if response.status == 200:
logging.info(f"Message sent: {translated_text}")
else:
resp_text = await response.text()
logging.error(f"Failed to send message: {resp_text}")
async def process_url(session, url, found_links):
headers = {'User-Agent': random.choice(user_agents)}
new_links = []
try:
await asyncio.sleep(random.uniform(0.5, 1.5)) # Random delay before request
async with session.get(url, headers=headers) as response:
if response.status == 200:
logging.info(f"Successfully fetched page: {url}")
html = await response.text()
soup = BeautifulSoup(html, 'html.parser')
h2_elements = soup.find_all('h2', class_='s__tl')
for h2 in h2_elements:
link = h2.find('a', href=True)
if link:
href = link['href']
if not href.startswith('http'):
href = f'https://as.com{href}'
title = link.get_text().strip()
if href not in found_links:
await send_message(title, href)
new_links.append(href)
else:
logging.warning(f"Failed to fetch URL: {url} with status code: {response.status}")
except Exception as e:
logging.error(f"Error processing URL: {url}. Error: {str(e)}")
return new_links
async def main():
found_links = set(await load_found_links(found_links_file))
while True:
async with aiohttp.ClientSession() as session:
tasks = [process_url(session, url, found_links) for url in urls]
results = await asyncio.gather(*tasks)
new_found_links = set()
for new_links in results:
new_found_links.update(new_links)
if new_found_links:
found_links.update(new_found_links)
await save_found_links(found_links_file, list(found_links))
logging.info("Waiting for the next cycle...")
await asyncio.sleep(10) # Wait 10 seconds before the next cycle
if name == 'main':
asyncio.run(main()) errors File "c:\Python\Парсеры\AScomSegunda.py", line 8, in <module>
from googletrans import Translator # Import Google Translate library
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\User\AppData\Local\Programs\Python\Python313\Lib\site-packages\googletrans_init_.py", line 6, in <module>
from googletrans.client import Translator
File "C:\Users\User\AppData\Local\Programs\Python\Python313\Lib\site-packages\googletrans\client.py", line 13, in <module>
import httpx
File "C:\Users\User\AppData\Local\Programs\Python\Python313\Lib\site-packages\httpx_init_.py", line 2, in <module>
from ._api import delete, get, head, options, patch, post, put, request, stream
File "C:\Users\User\AppData\Local\Programs\Python\Python313\Lib\site-packages\httpx_api.py", line 3, in <module>
from ._client import Client, StreamContextManager
File "C:\Users\User\AppData\Local\Programs\Python\Python313\Lib\site-packages\httpx_client.py", line 8, in <module>
from ._auth import Auth, BasicAuth, FunctionAuth
File "C:\Users\User\AppData\Local\Programs\Python\Python313\Lib\site-packages\httpx_auth.py", line 10, in <module>
from ._models import Request, Response
File "C:\Users\User\AppData\Local\Programs\Python\Python313\Lib\site-packages\httpx_models.py", line 1, in <module>
import cgi
ModuleNotFoundError: No module named 'cgi' fix it
You’re hitting this because you’re running Python 3.13, which removed the built-in cgi
module. googletrans
pulls in httpx
, and some versions of httpx
still import cgi
, so on 3.13 that import explodes with ModuleNotFoundError: No module named 'cgi'
. (Python documentation)
Install a drop-in replacement for the removed stdlib module:
bash# (Windows) py -m pip install --upgrade pip py -m pip install legacy-cgi googletrans==4.0.0rc1
legacy-cgi
restores the cgi
module so httpx
(and thus googletrans
) can import it on Python 3.13. (PyPI)
Alternate route: run this project under Python 3.12 (where
cgi
still exists) if you prefer not to add the backport. (pradyunsg-cpython-lutra-testing.readthedocs.io)
The token you pasted can be used by anyone to control your bot. Revoke and regenerate it now in @BotFather:
/mybots
→ select your bot → API Token → Revoke current token/revoke
/ /token
directly.Example (optional) change to keep secrets out of code:
pythonimport os TELEGRAM_BOT_TOKEN = os.getenv("TELEGRAM_BOT_TOKEN") CHAT_ID = os.getenv("CHAT_ID") # then set them in your environment before running: # set TELEGRAM_BOT_TOKEN=123:ABC... # set CHAT_ID=5920264584
cgi
per PEP 594 → anything importing it breaks. (Python documentation)legacy-cgi
is a maintained fork that provides the same API name (cgi
) on PyPI, so existing imports keep working without code changes. (PyPI)googletrans
project itself acknowledges the 3.13/cgi
issue. (GitHub)If you want, I can also show a minimal patch to add timeouts and avoid blocking the event loop during translation—but the error you posted will be resolved by installing legacy-cgi
(or by using Python 3.12).