import os import re import pandas as pd from colle...

Creado el: 10 de enero de 2026

Respondido usando GPT-5.2 Thinking por Chat01

Pregunta

image.pngimage.png
image.pngimage.png
image.pngimage.png

import os
import re
import pandas as pd
from collections import deque
from sympy import symbols, Eq, solve
from datetime import datetime, timedelta

from openpyxl import Workbook, load_workbook
from openpyxl.styles import Alignment, Font, PatternFill, Border, Side
from openpyxl.utils import get_column_letter

===================== НАСТРОЙКИ =====================

excel_file_path = r"C:\Users\User\Desktop\results\results.xlsx"

=====================================================

league_thresholds = {
"Albania Superiore": (2.52, 2.73),
"Andorra Primera": (2.42, 2.75),
"Armenia Premier": (2.32, 3.20),
"Bosnia WWin Liga": (2.00, 3.20),
"Croatia HNL": (2.29, 3.25),
"Cyprus League": (2.25, 3.20),
"Cyprus Division 2": (2.46, 2.70),
"Czech Chance Liga": (2.25, 3.30),
"Denmark Superliga": (2.40, 3.00),
"Greece Super League": (2.36, 3.40),
"Greece Super League 2": (2.25, 3.30),
"Hungary OTP Bank Liga": (2.17, 3.40),
"Hungary Merkantil Bank Liga": (2.38, 3.30),
"Luxembourg BGL Ligue": (2.20, 3.00),
"Malta Premier League": (2.75, 2.60),
"Montenegro Prva Liga": (2.50, 3.20),
"North Macedonia 1 MFL": (2.13, 3.60),
"Slovakia Nike Liga": (2.30, 3.00),
"Slovenia Prva Liga": (2.47, 3.00),
"Algeria Ligue 2": (1.87, 5.00),
"Mexico Liga de Expansion MX": (2.00, 4.00),
"South Africa Premiership": (2.31, 3.45),
"Croatia Prva NL": (2.40, 2.80),
"India ISL": (2.17, 3.33),
"Indonesia Liga 1": (2.27, 3.45),
"Bulgaria Vtora liga": (2.27, 3.45),
"Zambia Super League": (2.47, 3.10),
"Romania Liga 3": (2.30, 2.94),
"Myanmar National League": (2.50, 2.50),
"Tanzania Ligi Kuu Bara": (2.20, 3.30),
"France Nacional": (2.32, 3.30),
"Nigeria NPFL": (1.50, 6.00),
"Turkey 1.Lig": (2.27, 3.20),
"Algeria Ligue 1": (2.05, 4.30),
"Italy Serie C - A": (2.38, 3.30),
"Italy Serie B": (2.46, 3.50),
"Algeria Ligue 2 - 21": (2.00, 3.20),
"England National League South": (2.38, 3.00),
"Brazil Paulista A4": (2.44, 3.00),
"Ethiopia": (3.12, 2.85),
"Spain LaLiga": (2.29, 3.61),
"Australia NPL": (2.22, 3.00),
"Bulgaria Vtora Liga": (2.08, 3.70),
"Italy Serie C - C": (2.40, 3.20),
"Spain Segunda": (2.11, 4.02),
"France Ligue 1": (2.30, 3.60),
"Brazil Serie A": (2.05, 4.00),
"Brazil Serie B": (2.05, 4.00),
"Brazil Serie C": (2.15, 3.70),
"Iceland Besta Delid": (2.12, 3.10),
"Australia NPL Northern": (2.47, 2.30),
"Norway Division 2": (2.08, 2.94),
"USA MLS": (2.27, 3.20),
"Argentina": (2.22, 3.80),
"Mexico": (2.08, 3.80),
"Bundesliga - 2": (2.16, 3.55),
"France - 2": (2.16, 3.55),
"Netherlands - Eridivisie": (2.20, 3.20),
"England - Premier League": (2.18, 3.60)
}

===================== helpers =====================

def normalize_dashes(text):
if isinstance(text, str):
return text.replace('–', '-').replace('—', '-').replace('−', '-')
return text

def clean_team_name(name):
if isinstance(name, str):
name = normalize_dashes(name)
return ' '.join(name.strip().split())
return name

def sanitize_pair_text(p):
if not isinstance(p, str):
return p
p = p.strip()
p = normalize_dashes(p)
p = re.sub(r"\s*->\s*&\s*[A-Za-z]:[\/].*", "", p) p = re.sub(r"\s*&\s*[A-Za-z]:[\\/].*", "", p)
return p.strip()

def parse_date_to_dt(raw_date):
if isinstance(raw_date, datetime):
return raw_date
if isinstance(raw_date, (float, int)):
try:
return datetime(1899, 12, 30) + timedelta(days=int(raw_date))
except:
return None
if isinstance(raw_date, str):
try:
if ' - ' in raw_date:
date_part = raw_date.split(' - ')[0].strip()
return datetime.strptime(date_part, '%d.%m.%Y')
return datetime.strptime(raw_date.strip(), '%d.%m.%Y')
except:
return None
return None

def to_float(x):
return float(str(x).replace(',', '.'))

def fmt2(x) -> str:
"""Всегда 2 знака после точки: 2 -> 2.00"""
try:
return f"{to_float(x):.2f}"
except:
return str(x)

===================== read matches =====================

df = pd.read_excel(excel_file_path)

matches = []
for _, row in df.iterrows():
match = (
row[0], # league
row[1], # date
clean_team_name(row[2]), # home
clean_team_name(row[3]), # away
row[4], # odds1
row[5], # odds_draw
row[6] # odds2
)
matches.append(match)

===================== chains =====================

def find_chains(matches, start_team, end_team, min_depth=1, max_depth=4):
graph = {}
for match in matches:
league, date, home_team, away_team, odds1, odds_draw, odds2 = match
graph.setdefault(home_team, [])
graph.setdefault(away_team, [])
graph[home_team].append((away_team, match))
graph[away_team].append((home_team, match))

text
chains = [] queue = deque([(start_team, [], set())]) while queue: current_team, path, visited = queue.popleft() if current_team == end_team and path: if min_depth <= len(path) <= max_depth: chains.append(path) continue if len(path) >= max_depth: continue for neighbor, match in graph.get(current_team, []): if match not in visited: new_visited = visited.copy() new_visited.add(match) queue.append((neighbor, path + [match], new_visited)) return chains

===================== input pairs =====================

raw_input_teams = input(
"Введите либо 2 команды (как раньше), либо список пар через запятую.\n"
"Пример: Malaga - Granada, Granada - Albacete\n"
"Можно использовать '-', '–' или '—'.\n"
"Ввод: "
).strip()

raw_input_teams = sanitize_pair_text(raw_input_teams)

team_pairs = []
pair_splitter = re.compile(r"\s+[-]\s+")
has_any_dash = '-' in raw_input_teams

if (',' in raw_input_teams) or has_any_dash:
parts = [p.strip() for p in re.split(r"[,\n;]+", raw_input_teams) if p.strip()]
for p in parts:
p = sanitize_pair_text(p)
if not p:
continue
teams = [t for t in pair_splitter.split(p) if t.strip()]
if len(teams) != 2:
print(f"Пропуск (неверный формат пары, нужен 'Команда1 - Команда2'): {p}")
continue
a = clean_team_name(teams[0])
b = clean_team_name(teams[1])
if a and b:
team_pairs.append((a, b))

text
if not team_pairs: start_team = clean_team_name(raw_input_teams) end_team = clean_team_name(input("Введите название команды назначения: ").strip()) team_pairs = [(start_team, end_team)]

else:
start_team = clean_team_name(raw_input_teams)
end_team = clean_team_name(input("Введите название команды назначения: ").strip())
team_pairs = [(start_team, end_team)]

===================== calculator =====================

class TeamStrengthCalculator:
def init(self):
self.games = []
self.equations_method_1 = []
self.equations_method_2 = []
self.teams = set()

text
def add_games(self, games): for g in games: league, date, home_team, away_team, home_odds, draw_odds, away_odds = g self.games.append({ 'league': league, 'date': date, 'home_team': clean_team_name(home_team), 'away_team': clean_team_name(away_team), 'home_odds': to_float(home_odds), 'draw_odds': to_float(draw_odds), 'away_odds': to_float(away_odds) }) self.teams.add(clean_team_name(home_team)) self.teams.add(clean_team_name(away_team)) def calculate_strength_difference_method_2(self): for game in self.games: league_name = game['league'] home_team = game['home_team'] away_team = game['away_team'] home_odds = game['home_odds'] away_odds = game['away_odds'] threshold_home, threshold_away = league_thresholds.get(league_name, (2.40, 3.00)) if home_odds < away_odds: if home_odds < threshold_home: home_strength = abs((100 / threshold_home) - (100 / home_odds)) self.equations_method_2.append( f"{home_team} = {away_team} + {home_strength:.3f}" ) else: home_strength = abs((100 / home_odds) - (100 / threshold_home)) self.equations_method_2.append( f"{away_team} = {home_team} + {home_strength:.3f}" ) else: if away_odds < threshold_away: away_strength = abs((100 / threshold_away) - (100 / away_odds)) self.equations_method_2.append( f"{away_team} = {home_team} + {away_strength:.3f}" ) else: away_strength = abs((100 / away_odds) - (100 / threshold_away)) self.equations_method_2.append( f"{home_team} = {away_team} + {away_strength:.3f}" ) def simplify_with_sympy_method2(self, start_team, end_team): """ Возвращает: - method2_str (без 'Method 2:') - warnings - used thresholds (home, away) """ if not self.games: return "", ["Нет матчей для расчета."], ("", "") first_league = self.games[0]['league'] threshold_home, threshold_away = league_thresholds.get(first_league, (2.40, 3.00)) team_symbols = {} for team in self.teams: if team == start_team: team_symbols[team] = 0 else: safe_name = team.replace(' ', '_').replace('-', '_') team_symbols[team] = symbols(safe_name) sympy_equations = [] for equation in self.equations_method_2: parts = equation.split('=') if len(parts) == 2 and '+' in parts[1]: left_team, right = parts[0].strip(), parts[1].strip() right_team, strength_diff = right.split('+') right_team = right_team.strip() strength_diff = float(strength_diff.strip()) left_sym = team_symbols[left_team] right_sym = team_symbols[right_team] sympy_equations.append(Eq(left_sym, right_sym + strength_diff)) symbols_list = [sym for sym in team_symbols.values() if sym != 0] solution = solve(sympy_equations, symbols_list, dict=True) if sympy_equations else [] warnings = [] end_symbol = team_symbols.get(end_team) if not solution or end_symbol is None: return "", ["Нет решения для метода 2."], (f"{threshold_home:.2f}", f"{threshold_away:.2f}") sol = solution[0] if end_symbol not in sol: return "", [f"Команда {end_team} не найдена в решении метода 2."], (f"{threshold_home:.2f}", f"{threshold_away:.2f}") try: end_value_numeric = float(sol[end_symbol].evalf()) except Exception: return "", [f"Не удалось вычислить численное значение силы {end_team} в методе 2."], (f"{threshold_home:.2f}", f"{threshold_away:.2f}") strength_diff = -end_value_numeric new_odds_start = 100 / ((100 / threshold_home) + strength_diff) new_odds_end = 100 / ((100 / threshold_home) - strength_diff) # ВАЖНО: без "Method 2:" method2_str = ( f"{start_team} = {end_team} + {strength_diff:.3f}; " f"{start_team} odds = {new_odds_start:.3f}; {end_team} odds = {new_odds_end:.3f}" ) return method2_str, warnings, (f"{threshold_home:.2f}", f"{threshold_away:.2f}")

===================== Excel (колонки как на фото, но вместо Chain/WTP = thresholds) =====================

thin = Side(style="thin", color="000000")
medium = Side(style="medium", color="000000")
BORDER_THIN = Border(left=thin, right=thin, top=thin, bottom=thin)
BORDER_MEDIUM_TOP = Border(left=thin, right=thin, top=medium, bottom=thin)

ALIGN_WRAP_TOP = Alignment(wrap_text=True, vertical="top")
ALIGN_CENTER = Alignment(horizontal="center", vertical="center", wrap_text=True)

FONT_HEADER = Font(bold=True, size=11)
FONT_BOLD = Font(bold=True)
FONT_MONO = Font(name="Consolas")

FILL_HEADER = PatternFill("solid", fgColor="D9E1F2")
FILL_PAIR = PatternFill("solid", fgColor="E2EFDA")

EXPECTED_HEADERS = [
"RUN", # A
"Request #", # B
"Start", # C
"End", # D
"Th home", # E (вместо Chain #)
"Th away", # F (вместо WTP Len)
"Avg Date", # G
"Matches (Date | Home-Away (1,X,2))", # H
"Result" # I (вместо Method 2)
]

def init_results_workbook(output_excel_path):
if os.path.exists(output_excel_path):
wb = load_workbook(output_excel_path)
else:
wb = Workbook()

text
# если Results уже есть — используем, но проверим шапку if "Results" in wb.sheetnames: ws = wb["Results"] else: ws = wb.active ws.title = "Results" # если пустой — пишем шапку if ws.max_row == 1 and ws["A1"].value is None: ws.append(EXPECTED_HEADERS) else: # если шапка не совпадает — создадим новый лист, чтобы не ломать старые данные current = [ws.cell(row=1, column=i).value for i in range(1, len(EXPECTED_HEADERS) + 1)] if current != EXPECTED_HEADERS: base = "Results_v2" name = base i = 2 while name in wb.sheetnames: name = f"{base}_{i}" i += 1 ws = wb.create_sheet(name) ws.append(EXPECTED_HEADERS) # стиль шапки for col in range(1, len(EXPECTED_HEADERS) + 1): c = ws.cell(row=1, column=col) c.font = FONT_HEADER c.fill = FILL_HEADER c.alignment = ALIGN_CENTER c.border = BORDER_THIN # ширины (под твой скрин + thresholds) widths = [18, 9, 18, 18, 9, 9, 11, 70, 55] for i, w in enumerate(widths, start=1): ws.column_dimensions[get_column_letter(i)].width = w ws.freeze_panes = "A2" return wb, ws

def append_row(ws, row_values, is_new_pair=False):
ws.append(row_values)
r = ws.max_row

text
for col in range(1, len(row_values) + 1): cell = ws.cell(row=r, column=col) cell.alignment = ALIGN_WRAP_TOP cell.border = BORDER_MEDIUM_TOP if is_new_pair else BORDER_THIN # подсветим Start/End/Request на первой строке пары if is_new_pair and col in (2, 3, 4): cell.fill = FILL_PAIR cell.font = FONT_BOLD # Matches моноширинный if col == 8: cell.font = FONT_MONO # высота строки по числу строк в Matches matches_text = str(row_values[7]) if row_values[7] else "" line_count = matches_text.count("\n") + 1 ws.row_dimensions[r].height = min(max(18, 14 * line_count), 260)

===================== MAIN =====================

run_dt = datetime.now()
run_stamp = run_dt.strftime("%d.%m.%Y %H:%M:%S")

out_dir = os.path.dirname(excel_file_path) if os.path.dirname(excel_file_path) else os.getcwd()
output_excel_path = os.path.join(out_dir, "wtp_results.xlsx")

wb, ws = init_results_workbook(output_excel_path)

for request_num, (start_team, end_team) in enumerate(team_pairs, start=1):
chains = find_chains(matches, start_team, end_team)

text
# убрать дубликаты unique_chains = [] seen = set() for ch in chains: t = tuple(ch) if t not in seen: seen.add(t) unique_chains.append(ch) chains = unique_chains if not chains: append_row( ws, [run_stamp, request_num, start_team, end_team, "", "", "", "", ""], is_new_pair=True ) continue # группировка по длине, сортировка по средней дате (свежие сверху) groups = {} for chain in chains: L = len(chain) groups.setdefault(L, []) dts = [parse_date_to_dt(m[1]) for m in chain] dts = [d for d in dts if d] avg_date = datetime.fromtimestamp(sum(d.timestamp() for d in dts) / len(dts)) if dts else datetime.min groups[L].append({"chain": chain, "avg_date": avg_date}) for L in groups: groups[L] = sorted(groups[L], key=lambda x: x["avg_date"], reverse=True) first_row_for_pair = True for L in sorted(groups.keys()): for info in groups[L]: chain = info["chain"] avg_date = info["avg_date"] avg_date_str = avg_date.strftime("%d.%m.%Y") if avg_date != datetime.min else "" games = [] base_lines = [] used_league_for_threshold = None # 1) собираем base строки "date | home-away" for match in chain: league, date, home, away, o1, ox, o2 = match games.append(match) if used_league_for_threshold is None: used_league_for_threshold = league base_lines.append(f"{date} | {home}-{away}") # 2) выравниваем начало скобок (по max длине base), НО внутри скобок БЕЗ padding max_base_len = max(len(b) for b in base_lines) if base_lines else 0 matches_lines_for_excel = [] for base, match in zip(base_lines, chain): _, date, home, away, o1, ox, o2 = match pad_spaces = " " * (max_base_len - len(base) + 2) # ВАЖНО: после '(' НЕТ пробелов, коэффициенты всегда .2f matches_lines_for_excel.append( f"{base}{pad_spaces}({fmt2(o1)}, {fmt2(ox)}, {fmt2(o2)})" ) # thresholds (home/away) по первой лиге цепочки (как используется в формулах) th_home, th_away = league_thresholds.get(used_league_for_threshold, (2.40, 3.00)) th_home_str = f"{th_home:.2f}" th_away_str = f"{th_away:.2f}" # calc method2 calc = TeamStrengthCalculator() calc.add_games(games) calc.calculate_strength_difference_method_2() result2, warnings, _ths = calc.simplify_with_sympy_method2(start_team, end_team) append_row( ws, [ run_stamp, # RUN request_num, # Request # start_team, # Start end_team, # End th_home_str, # Th home th_away_str, # Th away avg_date_str, # Avg Date "\n".join(matches_lines_for_excel), # Matches result2 # Result (без "Method 2:") ], is_new_pair=first_row_for_pair ) first_row_for_pair = False

wb.save(output_excel_path)
print(f"\nГотово. Результаты дописаны в Excel:\n{output_excel_path}") в этом коде есть проблема, в excel файле где он берет данные по играм команды Andorra и Ceuta написаны вот так, FC Andorra и AD Ceuta, фотографии я приложил, так как названия совпадают неполностью код не может рассчитать коэфициенты для этих команд, надо решить эту проблему глобально, чтобы допустим при расчете других лиг если названия команд совпадат неполностью проблем не было, измени код и вышли мне полный исправленный код

Pensamiento

Fixing global team name matching

User has code that needs to fix global team name matching in Excel. Team names have prefixes like FC or AD, which might not appear in input pairs. To handle this, I'll implement canonicalization and alias mapping for fuzzy matching — removing common prefixes, normalizing text, and carefully handling important distinctions like "Real Sociedad B."

Storing and resolving team names

When reading matches, I’ll store both original and canonical names, mapping the canonical key to a consistent representative. For collisions like "FC Porto" vs "Porto," I'll allow "Porto" but keep "Real" since it’s meaningful. The resolution process will canonicalize user input and match it against the team names dataset using token overlap or similarity measure.

Implementing deterministic resolution and consistency

I’ll use difflib to ensure deterministic team name matching based on the highest similarity ratio. Warnings will be shown where needed. Both start and end teams will be resolved before chain calculations. Additionally, I'll incorporate canonicalization into the TeamStrengthCalculator, ensuring consistency, especially with names like "Mirandes - Andorra" vs. "FC Andorra."

Creating team name normalization and canonicalization functions

I’ll build functions to normalize team names by converting to lowercase, removing accents, and stripping punctuation. I'll also tokenize names, removing organizational markers like "FC," "AFC," "CD," etc., while keeping necessary ones like "B" in "Real Sociedad B." I'll ensure tokens that are purely organizational like "club" or "deportivo" are handled carefully.

Handling team name abbreviations and resolving collisions

For teams like "Deportivo La Coruna," I’ll maintain abbreviations like "dep" to avoid mismatches but will map "dep" to "deportivo" to handle synonyms. I'll also normalize common abbreviations like "ath" for "athletic." To resolve team names consistently, I'll create a function that Canonicalizes team names, choosing the most common version while preserving variations like “B” and “U19.” This ensures we use common names across all variations.

Implementing consistent team name mapping

I’ll start by counting the frequency of cleaned team names and computing their canonical key. For each key, I’ll pick the most frequent representation, and resolve any ties by choosing the shortest. Then, I’ll replace team names in the matches list with their canonical representatives and ensure consistency. If matches don’t align, I'll try mapping based on similarity. I’ll also handle organization tokens and common abbreviations carefully to minimize collisions.

Cleaning and normalizing team names

I'll remove common stopwords like "de", "la", "the" (connector words) to improve matching, but I need to avoid potential collisions like "Real Sociedad" vs "Sociedad." I'll also remove punctuation, normalize accents, and clean up team names in the code. Team names will be standardized using canonical mappings. After reading the matches, I'll clean and resolve the names into a unified listing before graphing.

Implementing team name normalization and mapping

I'll implement the code that processes team names using the clean and canonicalization functions, resolves teams into their representative names based on frequency, and creates an alias map. Additionally, I'll design a function that resolves user input to the proper canonical team name, with fuzzy matching for uncertainty. I'll handle unresolved or fuzzy matches with warnings.

Sanitizing team names and improving symbol parsing

I’ll modify the symbol generation to sanitize team names by replacing non-alphanumeric characters and ensuring the symbol doesn’t start with a digit. I will adjust the input pair parsing to handle cases like "FC Andorra-AD Ceuta" and improve spaces in dashes handling. I'll also ensure the final code is clean, with the necessary dependencies and clear comments.

Thought for 1m 24s

Respuesta

Нужно сделать единое “каноническое” имя команды и использовать его везде: при чтении матчей из Excel, при построении графа цепочек и при обработке ввода пользователя. Тогда Andorra будет автоматически матчиться с FC Andorra, Ceuta с AD Ceuta, и так далее (FC/CF/AD/UD/CD/SC…, точки, дефисы, “de/la/del” и пр.).

Ниже — полный исправленный код. Я добавил:

  • canonical_team_key() — делает “ключ” команды (нижний регистр, без диакритики, без пунктуации, убирает организационные префиксы FC/AD/... и связки de/la/del/...).
  • При чтении Excel строится карта вариантов → представитель (самый частый вариант в файле), и все матчи нормализуются к одному названию.
  • Ввод пользователя также резолвится к представителю из Excel (с запасным fuzzy-подбором, если надо).

Скопируй и замени свой файл целиком:

python
import os import re import unicodedata import difflib import pandas as pd from collections import deque, defaultdict, Counter from sympy import symbols, Eq, solve from datetime import datetime, timedelta from openpyxl import Workbook, load_workbook from openpyxl.styles import Alignment, Font, PatternFill, Border, Side from openpyxl.utils import get_column_letter # ===================== НАСТРОЙКИ ===================== excel_file_path = r"C:\Users\User\Desktop\results\results.xlsx" # ===================================================== league_thresholds = { "Albania Superiore": (2.52, 2.73), "Andorra Primera": (2.42, 2.75), "Armenia Premier": (2.32, 3.20), "Bosnia WWin Liga": (2.00, 3.20), "Croatia HNL": (2.29, 3.25), "Cyprus League": (2.25, 3.20), "Cyprus Division 2": (2.46, 2.70), "Czech Chance Liga": (2.25, 3.30), "Denmark Superliga": (2.40, 3.00), "Greece Super League": (2.36, 3.40), "Greece Super League 2": (2.25, 3.30), "Hungary OTP Bank Liga": (2.17, 3.40), "Hungary Merkantil Bank Liga": (2.38, 3.30), "Luxembourg BGL Ligue": (2.20, 3.00), "Malta Premier League": (2.75, 2.60), "Montenegro Prva Liga": (2.50, 3.20), "North Macedonia 1 MFL": (2.13, 3.60), "Slovakia Nike Liga": (2.30, 3.00), "Slovenia Prva Liga": (2.47, 3.00), "Algeria Ligue 2": (1.87, 5.00), "Mexico Liga de Expansion MX": (2.00, 4.00), "South Africa Premiership": (2.31, 3.45), "Croatia Prva NL": (2.40, 2.80), "India ISL": (2.17, 3.33), "Indonesia Liga 1": (2.27, 3.45), "Bulgaria Vtora liga": (2.27, 3.45), "Zambia Super League": (2.47, 3.10), "Romania Liga 3": (2.30, 2.94), "Myanmar National League": (2.50, 2.50), "Tanzania Ligi Kuu Bara": (2.20, 3.30), "France Nacional": (2.32, 3.30), "Nigeria NPFL": (1.50, 6.00), "Turkey 1.Lig": (2.27, 3.20), "Algeria Ligue 1": (2.05, 4.30), "Italy Serie C - A": (2.38, 3.30), "Italy Serie B": (2.46, 3.50), "Algeria Ligue 2 - 21": (2.00, 3.20), "England National League South": (2.38, 3.00), "Brazil Paulista A4": (2.44, 3.00), "Ethiopia": (3.12, 2.85), "Spain LaLiga": (2.29, 3.61), "Australia NPL": (2.22, 3.00), "Bulgaria Vtora Liga": (2.08, 3.70), "Italy Serie C - C": (2.40, 3.20), "Spain Segunda": (2.11, 4.02), "France Ligue 1": (2.30, 3.60), "Brazil Serie A": (2.05, 4.00), "Brazil Serie B": (2.05, 4.00), "Brazil Serie C": (2.15, 3.70), "Iceland Besta Delid": (2.12, 3.10), "Australia NPL Northern": (2.47, 2.30), "Norway Division 2": (2.08, 2.94), "USA MLS": (2.27, 3.20), "Argentina": (2.22, 3.80), "Mexico": (2.08, 3.80), "Bundesliga - 2": (2.16, 3.55), "France - 2": (2.16, 3.55), "Netherlands - Eridivisie": (2.20, 3.20), "England - Premier League": (2.18, 3.60) } # ===================== helpers ===================== def normalize_dashes(text): if isinstance(text, str): return text.replace('–', '-').replace('—', '-').replace('−', '-') return text def clean_team_name(name): if isinstance(name, str): name = normalize_dashes(name) return ' '.join(name.strip().split()) return name def sanitize_pair_text(p): if not isinstance(p, str): return p p = p.strip() p = normalize_dashes(p) p = re.sub(r"\s*->\s*&\s*[A-Za-z]:[\\/].*$", "", p) p = re.sub(r"\s*&\s*[A-Za-z]:[\\/].*$", "", p) return p.strip() def parse_date_to_dt(raw_date): if isinstance(raw_date, datetime): return raw_date if isinstance(raw_date, (float, int)): try: return datetime(1899, 12, 30) + timedelta(days=int(raw_date)) except: return None if isinstance(raw_date, str): try: if ' - ' in raw_date: date_part = raw_date.split(' - ')[0].strip() return datetime.strptime(date_part, '%d.%m.%Y') return datetime.strptime(raw_date.strip(), '%d.%m.%Y') except: return None return None def to_float(x): return float(str(x).replace(',', '.')) def fmt2(x) -> str: """Всегда 2 знака после точки: 2 -> 2.00""" try: return f"{to_float(x):.2f}" except: return str(x) def strip_accents(s: str) -> str: s = unicodedata.normalize("NFKD", s) return "".join(ch for ch in s if not unicodedata.combining(ch)) # Организационные маркеры/префиксы (убираем из "ключа") ORG_TOKENS = { "fc","cf","ad","cd","ud","sc","ac","afc","fk","sk","nk","as","ss","sv","if","bk","kf", "cfc","fcd","f.c","c.f","a.d","u.d","c.d","s.c","a.c", "club","clube","clubs","football","futbol","fútbol","calcio" } # Частые "связки" в названиях (тоже убираем из ключа) CONNECTOR_TOKENS = { "de","del","la","las","los","el","da","do","di","du","der","den","van","von","the","of","and" } ABBREV_MAP = { # Dep. / Dep -> Deportivo "dep": "deportivo", "depor": "deportivo", } def canonical_team_key(name: str) -> str: """ Делает канонический ключ команды, чтобы "FC Andorra" == "Andorra", "AD Ceuta" == "Ceuta", и т.п. - lower - без диакритики - пунктуация -> пробел - удаляем ORG_TOKENS и CONNECTOR_TOKENS """ if not isinstance(name, str): return "" s = clean_team_name(name) s = strip_accents(s.lower()) # всё, что не буква/цифра -> пробел s = re.sub(r"[^0-9a-z\s]", " ", s) tokens = [t for t in s.split() if t] normalized = [] for t in tokens: t = ABBREV_MAP.get(t, t) if t in ORG_TOKENS: continue if t in CONNECTOR_TOKENS: continue normalized.append(t) # если вдруг всё вычистили — fallback на исходные токены if not normalized: normalized = tokens return " ".join(normalized).strip() def build_team_normalization_maps(raw_rows): """ Строим: - rep_by_key: ключ -> "представитель" (самый частый вариант в Excel) - alias_to_rep: любой встреченный вариант -> представитель - rep_names: set представителей - rep_key_by_name: представитель -> его ключ """ key_to_counts = defaultdict(Counter) for league, date, home, away, o1, ox, o2 in raw_rows: kh = canonical_team_key(home) ka = canonical_team_key(away) if kh: key_to_counts[kh][home] += 1 if ka: key_to_counts[ka][away] += 1 rep_by_key = {} alias_to_rep = {} for key, cnt in key_to_counts.items(): most = cnt.most_common() max_count = most[0][1] candidates = [name for name, c in most if c == max_count] # при равенстве — берём самый короткий (обычно без FC/AD и т.п.), затем по алфавиту rep = min(candidates, key=lambda x: (len(x), x.lower())) rep_by_key[key] = rep for name in cnt.keys(): alias_to_rep[name] = rep rep_names = set(rep_by_key.values()) rep_key_by_name = {rep: canonical_team_key(rep) for rep in rep_names} return rep_by_key, alias_to_rep, rep_names, rep_key_by_name def resolve_user_team_name(user_name: str, alias_to_rep, rep_by_key, rep_names, rep_key_by_name, fuzzy_min_ratio=0.83): """ Приводит ввод пользователя к "представителю" из Excel. 1) точное совпадение с представителями 2) точное совпадение с любым алиасом 3) совпадение по каноническому ключу 4) fuzzy по ключам (если очень похоже) Возвращает (resolved_name, warning_or_None) """ cleaned = clean_team_name(user_name) if cleaned in rep_names: return cleaned, None if cleaned in alias_to_rep: return alias_to_rep[cleaned], None key = canonical_team_key(cleaned) if key in rep_by_key: rep = rep_by_key[key] if rep != cleaned: return rep, f"Нормализация: '{cleaned}' -> '{rep}'" return rep, None # fuzzy fallback: сравнение по ключам best_rep = None best_score = 0.0 for rep in rep_names: rep_key = rep_key_by_name.get(rep, "") if not rep_key: continue score = difflib.SequenceMatcher(None, key, rep_key).ratio() if score > best_score: best_score = score best_rep = rep if best_rep and best_score >= fuzzy_min_ratio: return best_rep, f"Fuzzy-нормализация: '{cleaned}' -> '{best_rep}' (score={best_score:.2f})" return cleaned, f"ВНИМАНИЕ: команда '{cleaned}' не найдена для нормализации (ключ='{key}')." # ===================== read matches ===================== df = pd.read_excel(excel_file_path) raw_rows = [] for _, row in df.iterrows(): league = row[0] date = row[1] home = clean_team_name(row[2]) away = clean_team_name(row[3]) odds1 = row[4] odds_draw = row[5] odds2 = row[6] raw_rows.append((league, date, home, away, odds1, odds_draw, odds2)) # Строим глобальную нормализацию по всему файлу rep_by_key, alias_to_rep, rep_names, rep_key_by_name = build_team_normalization_maps(raw_rows) # Нормализуем матчи (все имена приводим к "представителю") matches = [] for league, date, home, away, odds1, odds_draw, odds2 in raw_rows: home_rep = alias_to_rep.get(home, home) away_rep = alias_to_rep.get(away, away) matches.append((league, date, home_rep, away_rep, odds1, odds_draw, odds2)) # ===================== chains ===================== def find_chains(matches, start_team, end_team, min_depth=1, max_depth=4): graph = {} for match in matches: league, date, home_team, away_team, odds1, odds_draw, odds2 = match graph.setdefault(home_team, []) graph.setdefault(away_team, []) graph[home_team].append((away_team, match)) graph[away_team].append((home_team, match)) chains = [] queue = deque([(start_team, [], set())]) while queue: current_team, path, visited = queue.popleft() if current_team == end_team and path: if min_depth <= len(path) <= max_depth: chains.append(path) continue if len(path) >= max_depth: continue for neighbor, match in graph.get(current_team, []): if match not in visited: new_visited = visited.copy() new_visited.add(match) queue.append((neighbor, path + [match], new_visited)) return chains # ===================== input pairs ===================== raw_input_teams = input( "Введите либо 2 команды (как раньше), либо список пар через запятую.\n" "Пример: Malaga - Granada, Granada - Albacete\n" "Можно использовать '-', '–' или '—'.\n" "Ввод: " ).strip() raw_input_teams = sanitize_pair_text(raw_input_teams) team_pairs = [] pair_splitter = re.compile(r"\s*-\s*") # <- было строго с пробелами, теперь устойчивее has_any_dash = '-' in raw_input_teams if (',' in raw_input_teams) or has_any_dash: parts = [p.strip() for p in re.split(r"[,\n;]+", raw_input_teams) if p.strip()] for p in parts: p = sanitize_pair_text(p) if not p: continue teams = [t for t in pair_splitter.split(p) if t.strip()] if len(teams) != 2: print(f"Пропуск (неверный формат пары, нужен 'Команда1 - Команда2'): {p}") continue a = clean_team_name(teams[0]) b = clean_team_name(teams[1]) if a and b: team_pairs.append((a, b)) if not team_pairs: start_team = clean_team_name(raw_input_teams) end_team = clean_team_name(input("Введите название команды назначения: ").strip()) team_pairs = [(start_team, end_team)] else: start_team = clean_team_name(raw_input_teams) end_team = clean_team_name(input("Введите название команды назначения: ").strip()) team_pairs = [(start_team, end_team)] # Нормализуем ввод пользователя к тем именам, которые реально есть в Excel resolved_pairs = [] for a, b in team_pairs: ra, wa = resolve_user_team_name(a, alias_to_rep, rep_by_key, rep_names, rep_key_by_name) rb, wb = resolve_user_team_name(b, alias_to_rep, rep_by_key, rep_names, rep_key_by_name) if wa: print(wa) if wb: print(wb) resolved_pairs.append((ra, rb)) team_pairs = resolved_pairs # ===================== calculator ===================== class TeamStrengthCalculator: def __init__(self): self.games = [] self.equations_method_2 = [] self.teams = set() def add_games(self, games): for g in games: league, date, home_team, away_team, home_odds, draw_odds, away_odds = g self.games.append({ 'league': league, 'date': date, 'home_team': clean_team_name(home_team), 'away_team': clean_team_name(away_team), 'home_odds': to_float(home_odds), 'draw_odds': to_float(draw_odds), 'away_odds': to_float(away_odds) }) self.teams.add(clean_team_name(home_team)) self.teams.add(clean_team_name(away_team)) def calculate_strength_difference_method_2(self): for game in self.games: league_name = game['league'] home_team = game['home_team'] away_team = game['away_team'] home_odds = game['home_odds'] away_odds = game['away_odds'] threshold_home, threshold_away = league_thresholds.get(league_name, (2.40, 3.00)) if home_odds < away_odds: if home_odds < threshold_home: home_strength = abs((100 / threshold_home) - (100 / home_odds)) self.equations_method_2.append( f"{home_team} = {away_team} + {home_strength:.3f}" ) else: home_strength = abs((100 / home_odds) - (100 / threshold_home)) self.equations_method_2.append( f"{away_team} = {home_team} + {home_strength:.3f}" ) else: if away_odds < threshold_away: away_strength = abs((100 / threshold_away) - (100 / away_odds)) self.equations_method_2.append( f"{away_team} = {home_team} + {away_strength:.3f}" ) else: away_strength = abs((100 / away_odds) - (100 / threshold_away)) self.equations_method_2.append( f"{home_team} = {away_team} + {away_strength:.3f}" ) def _safe_sympy_symbol(self, team: str): # более безопасно, чем просто replace(' ', '_'): убираем спецсимволы base = canonical_team_key(team).replace(" ", "_") base = re.sub(r"[^0-9a-zA-Z_]", "_", base) if not base: base = "team" if base[0].isdigit(): base = "t_" + base return symbols(base) def simplify_with_sympy_method2(self, start_team, end_team): """ Возвращает: - method2_str (без 'Method 2:') - warnings - used thresholds (home, away) """ if not self.games: return "", ["Нет матчей для расчета."], ("", "") first_league = self.games[0]['league'] threshold_home, threshold_away = league_thresholds.get(first_league, (2.40, 3.00)) team_symbols = {} for team in self.teams: if team == start_team: team_symbols[team] = 0 else: team_symbols[team] = self._safe_sympy_symbol(team) sympy_equations = [] for equation in self.equations_method_2: parts = equation.split('=') if len(parts) == 2 and '+' in parts[1]: left_team, right = parts[0].strip(), parts[1].strip() right_team, strength_diff = right.split('+') right_team = right_team.strip() strength_diff = float(strength_diff.strip()) left_sym = team_symbols[left_team] right_sym = team_symbols[right_team] sympy_equations.append(Eq(left_sym, right_sym + strength_diff)) symbols_list = [sym for sym in team_symbols.values() if sym != 0] solution = solve(sympy_equations, symbols_list, dict=True) if sympy_equations else [] end_symbol = team_symbols.get(end_team) if not solution or end_symbol is None: return "", ["Нет решения для метода 2."], (f"{threshold_home:.2f}", f"{threshold_away:.2f}") sol = solution[0] if end_symbol not in sol: return "", [f"Команда {end_team} не найдена в решении метода 2."], (f"{threshold_home:.2f}", f"{threshold_away:.2f}") try: end_value_numeric = float(sol[end_symbol].evalf()) except Exception: return "", [f"Не удалось вычислить численное значение силы {end_team} в методе 2."], (f"{threshold_home:.2f}", f"{threshold_away:.2f}") strength_diff = -end_value_numeric new_odds_start = 100 / ((100 / threshold_home) + strength_diff) new_odds_end = 100 / ((100 / threshold_home) - strength_diff) method2_str = ( f"{start_team} = {end_team} + {strength_diff:.3f}; " f"{start_team} odds = {new_odds_start:.3f}; {end_team} odds = {new_odds_end:.3f}" ) return method2_str, [], (f"{threshold_home:.2f}", f"{threshold_away:.2f}") # ===================== Excel (колонки как на фото, но вместо Chain/WTP = thresholds) ===================== thin = Side(style="thin", color="000000") medium = Side(style="medium", color="000000") BORDER_THIN = Border(left=thin, right=thin, top=thin, bottom=thin) BORDER_MEDIUM_TOP = Border(left=thin, right=thin, top=medium, bottom=thin) ALIGN_WRAP_TOP = Alignment(wrap_text=True, vertical="top") ALIGN_CENTER = Alignment(horizontal="center", vertical="center", wrap_text=True) FONT_HEADER = Font(bold=True, size=11) FONT_BOLD = Font(bold=True) FONT_MONO = Font(name="Consolas") FILL_HEADER = PatternFill("solid", fgColor="D9E1F2") FILL_PAIR = PatternFill("solid", fgColor="E2EFDA") EXPECTED_HEADERS = [ "RUN", # A "Request #", # B "Start", # C "End", # D "Th home", # E "Th away", # F "Avg Date", # G "Matches (Date | Home-Away (1,X,2))", # H "Result" # I ] def init_results_workbook(output_excel_path): if os.path.exists(output_excel_path): wb = load_workbook(output_excel_path) else: wb = Workbook() if "Results" in wb.sheetnames: ws = wb["Results"] else: ws = wb.active ws.title = "Results" if ws.max_row == 1 and ws["A1"].value is None: ws.append(EXPECTED_HEADERS) else: current = [ws.cell(row=1, column=i).value for i in range(1, len(EXPECTED_HEADERS) + 1)] if current != EXPECTED_HEADERS: base = "Results_v2" name = base i = 2 while name in wb.sheetnames: name = f"{base}_{i}" i += 1 ws = wb.create_sheet(name) ws.append(EXPECTED_HEADERS) for col in range(1, len(EXPECTED_HEADERS) + 1): c = ws.cell(row=1, column=col) c.font = FONT_HEADER c.fill = FILL_HEADER c.alignment = ALIGN_CENTER c.border = BORDER_THIN widths = [18, 9, 18, 18, 9, 9, 11, 70, 55] for i, w in enumerate(widths, start=1): ws.column_dimensions[get_column_letter(i)].width = w ws.freeze_panes = "A2" return wb, ws def append_row(ws, row_values, is_new_pair=False): ws.append(row_values) r = ws.max_row for col in range(1, len(row_values) + 1): cell = ws.cell(row=r, column=col) cell.alignment = ALIGN_WRAP_TOP cell.border = BORDER_MEDIUM_TOP if is_new_pair else BORDER_THIN if is_new_pair and col in (2, 3, 4): cell.fill = FILL_PAIR cell.font = FONT_BOLD if col == 8: cell.font = FONT_MONO matches_text = str(row_values[7]) if row_values[7] else "" line_count = matches_text.count("\n") + 1 ws.row_dimensions[r].height = min(max(18, 14 * line_count), 260) # ===================== MAIN ===================== run_dt = datetime.now() run_stamp = run_dt.strftime("%d.%m.%Y %H:%M:%S") out_dir = os.path.dirname(excel_file_path) if os.path.dirname(excel_file_path) else os.getcwd() output_excel_path = os.path.join(out_dir, "wtp_results.xlsx") wb, ws = init_results_workbook(output_excel_path) for request_num, (start_team, end_team) in enumerate(team_pairs, start=1): chains = find_chains(matches, start_team, end_team) # убрать дубликаты unique_chains = [] seen = set() for ch in chains: t = tuple(ch) if t not in seen: seen.add(t) unique_chains.append(ch) chains = unique_chains if not chains: append_row( ws, [run_stamp, request_num, start_team, end_team, "", "", "", "", ""], is_new_pair=True ) continue # группировка по длине, сортировка по средней дате (свежие сверху) groups = {} for chain in chains: L = len(chain) groups.setdefault(L, []) dts = [parse_date_to_dt(m[1]) for m in chain] dts = [d for d in dts if d] avg_date = datetime.fromtimestamp(sum(d.timestamp() for d in dts) / len(dts)) if dts else datetime.min groups[L].append({"chain": chain, "avg_date": avg_date}) for L in groups: groups[L] = sorted(groups[L], key=lambda x: x["avg_date"], reverse=True) first_row_for_pair = True for L in sorted(groups.keys()): for info in groups[L]: chain = info["chain"] avg_date = info["avg_date"] avg_date_str = avg_date.strftime("%d.%m.%Y") if avg_date != datetime.min else "" games = [] base_lines = [] used_league_for_threshold = None for match in chain: league, date, home, away, o1, ox, o2 = match games.append(match) if used_league_for_threshold is None: used_league_for_threshold = league base_lines.append(f"{date} | {home}-{away}") max_base_len = max(len(b) for b in base_lines) if base_lines else 0 matches_lines_for_excel = [] for base, match in zip(base_lines, chain): _, date, home, away, o1, ox, o2 = match pad_spaces = " " * (max_base_len - len(base) + 2) matches_lines_for_excel.append( f"{base}{pad_spaces}({fmt2(o1)}, {fmt2(ox)}, {fmt2(o2)})" ) th_home, th_away = league_thresholds.get(used_league_for_threshold, (2.40, 3.00)) th_home_str = f"{th_home:.2f}" th_away_str = f"{th_away:.2f}" calc = TeamStrengthCalculator() calc.add_games(games) calc.calculate_strength_difference_method_2() result2, warnings, _ths = calc.simplify_with_sympy_method2(start_team, end_team) append_row( ws, [ run_stamp, request_num, start_team, end_team, th_home_str, th_away_str, avg_date_str, "\n".join(matches_lines_for_excel), result2 ], is_new_pair=first_row_for_pair ) first_row_for_pair = False wb.save(output_excel_path) print(f"\nГотово. Результаты дописаны в Excel:\n{output_excel_path}")

Если хочешь, могу ещё добавить в ORG_TOKENS/ABBREV_MAP специфичные сокращения под твои источники (например, atlatletico, stsaint, и т.п.), но уже в текущем виде FC/AD/точки/дефисы/“de la” и похожие случаи должны перестать ломать расчёт.

Pregunta

ВНИМАНИЕ: команда 'Cultural Leonesa' не найдена для нормализации (ключ='cultural leonesa'), название в excel файле просто Leonesa, переделай и вышли мне полный код

Pensamiento

Improving alias mapping with fuzzy comparison

I need to adjust how I handle team name normalization. For "Cultural Leonesa" vs "Leonesa," my approach should allow for better alias mapping, like using token containment or a fuzzy comparison. This would help improve matches while avoiding mismatches, especially with similar tokens like 'leonesa.'

Implementing robust alias matching

To resolve name mismatches, I’ll combine exact containment matches (like token subsets) with a SequenceMatcher ratio. I’ll also check for close matches, but be cautious of ambiguous tokens like "United." The approach will involve calculating token overlap scores and using a threshold to select the best match.

Refining team name resolution

I’ll ensure team name uniqueness by counting candidates with matching scores within a small margin. I'll also handle cases like "Cultural Leonesa" vs "Leonesa" using fuzzy matching. For normalization, I'll keep generic adjectives like "Real" but treat them as optional for unique matching, avoiding collisions like with "Real Madrid."

Finalizing improved matching logic

I’ll keep the full data for better discriminability. For example, "Cultural Leonesa" can still match "Leonesa" via subset matching if necessary. I’ll craft improved matching functions like tokenize_key and match_score, calculating scores using Dice coefficient, containment, and SequenceMatcher, with thresholds to handle ambiguity properly.

Handling ambiguity in team name matching

For cases like "united" vs. "Sheffield United," I'll avoid accepting matches due to potential ambiguity (multiple teams matching). If there are multiple rep names with similar scores, I’ll flag it as ambiguous. Also, I need to ensure that pair splitting doesn’t split team names with hyphens or dots, while handling input format properly.

Improving team name splitting and collision handling

For team name splitting, I'll support user input with or without spaces around a hyphen. I'll tweak the regex to handle this, ensuring correct splits while avoiding hyphenated team names being incorrectly split. Additionally, potential collisions in canonical team keys need addressing, especially if token distinction is lost.

Thought for 48s

Respuesta

Понял: тут нужен не просто похожий fuzzy, а нормализация, которая умеет матчить случаи, когда в источнике имя усечено (например, Leonesa вместо Cultural Leonesa). Я переделал резолвер так, чтобы он:

  • сначала искал точные совпадения;
  • потом совпадение по каноническому ключу;
  • потом token-based matching (если один ключ является подмножеством другого — это твой кейс);
  • и только потом обычный fuzzy.
  • плюс защита от ошибок типа United → (много вариантов): если совпадение не уникально, то не маппит.

Ниже полный исправленный код (замени целиком):

python
import os import re import unicodedata import difflib import pandas as pd from collections import deque, defaultdict, Counter from sympy import symbols, Eq, solve from datetime import datetime, timedelta from openpyxl import Workbook, load_workbook from openpyxl.styles import Alignment, Font, PatternFill, Border, Side from openpyxl.utils import get_column_letter # ===================== НАСТРОЙКИ ===================== excel_file_path = r"C:\Users\User\Desktop\results\results.xlsx" # ===================================================== league_thresholds = { "Albania Superiore": (2.52, 2.73), "Andorra Primera": (2.42, 2.75), "Armenia Premier": (2.32, 3.20), "Bosnia WWin Liga": (2.00, 3.20), "Croatia HNL": (2.29, 3.25), "Cyprus League": (2.25, 3.20), "Cyprus Division 2": (2.46, 2.70), "Czech Chance Liga": (2.25, 3.30), "Denmark Superliga": (2.40, 3.00), "Greece Super League": (2.36, 3.40), "Greece Super League 2": (2.25, 3.30), "Hungary OTP Bank Liga": (2.17, 3.40), "Hungary Merkantil Bank Liga": (2.38, 3.30), "Luxembourg BGL Ligue": (2.20, 3.00), "Malta Premier League": (2.75, 2.60), "Montenegro Prva Liga": (2.50, 3.20), "North Macedonia 1 MFL": (2.13, 3.60), "Slovakia Nike Liga": (2.30, 3.00), "Slovenia Prva Liga": (2.47, 3.00), "Algeria Ligue 2": (1.87, 5.00), "Mexico Liga de Expansion MX": (2.00, 4.00), "South Africa Premiership": (2.31, 3.45), "Croatia Prva NL": (2.40, 2.80), "India ISL": (2.17, 3.33), "Indonesia Liga 1": (2.27, 3.45), "Bulgaria Vtora liga": (2.27, 3.45), "Zambia Super League": (2.47, 3.10), "Romania Liga 3": (2.30, 2.94), "Myanmar National League": (2.50, 2.50), "Tanzania Ligi Kuu Bara": (2.20, 3.30), "France Nacional": (2.32, 3.30), "Nigeria NPFL": (1.50, 6.00), "Turkey 1.Lig": (2.27, 3.20), "Algeria Ligue 1": (2.05, 4.30), "Italy Serie C - A": (2.38, 3.30), "Italy Serie B": (2.46, 3.50), "Algeria Ligue 2 - 21": (2.00, 3.20), "England National League South": (2.38, 3.00), "Brazil Paulista A4": (2.44, 3.00), "Ethiopia": (3.12, 2.85), "Spain LaLiga": (2.29, 3.61), "Australia NPL": (2.22, 3.00), "Bulgaria Vtora Liga": (2.08, 3.70), "Italy Serie C - C": (2.40, 3.20), "Spain Segunda": (2.11, 4.02), "France Ligue 1": (2.30, 3.60), "Brazil Serie A": (2.05, 4.00), "Brazil Serie B": (2.05, 4.00), "Brazil Serie C": (2.15, 3.70), "Iceland Besta Delid": (2.12, 3.10), "Australia NPL Northern": (2.47, 2.30), "Norway Division 2": (2.08, 2.94), "USA MLS": (2.27, 3.20), "Argentina": (2.22, 3.80), "Mexico": (2.08, 3.80), "Bundesliga - 2": (2.16, 3.55), "France - 2": (2.16, 3.55), "Netherlands - Eridivisie": (2.20, 3.20), "England - Premier League": (2.18, 3.60) } # ===================== helpers ===================== def normalize_dashes(text): if isinstance(text, str): return text.replace('–', '-').replace('—', '-').replace('−', '-') return text def clean_team_name(name): if isinstance(name, str): name = normalize_dashes(name) return ' '.join(name.strip().split()) return name def sanitize_pair_text(p): if not isinstance(p, str): return p p = p.strip() p = normalize_dashes(p) p = re.sub(r"\s*->\s*&\s*[A-Za-z]:[\\/].*$", "", p) p = re.sub(r"\s*&\s*[A-Za-z]:[\\/].*$", "", p) return p.strip() def parse_date_to_dt(raw_date): if isinstance(raw_date, datetime): return raw_date if isinstance(raw_date, (float, int)): try: return datetime(1899, 12, 30) + timedelta(days=int(raw_date)) except: return None if isinstance(raw_date, str): try: if ' - ' in raw_date: date_part = raw_date.split(' - ')[0].strip() return datetime.strptime(date_part, '%d.%m.%Y') return datetime.strptime(raw_date.strip(), '%d.%m.%Y') except: return None return None def to_float(x): return float(str(x).replace(',', '.')) def fmt2(x) -> str: """Всегда 2 знака после точки: 2 -> 2.00""" try: return f"{to_float(x):.2f}" except: return str(x) def strip_accents(s: str) -> str: s = unicodedata.normalize("NFKD", s) return "".join(ch for ch in s if not unicodedata.combining(ch)) # Организационные маркеры/префиксы (убираем из "ключа") ORG_TOKENS = { "fc","cf","ad","cd","ud","sc","ac","afc","fk","sk","nk","as","ss","sv","if","bk","kf", "cfc","fcd","f.c","c.f","a.d","u.d","c.d","s.c","a.c", "club","clube","clubs","football","futbol","fútbol","calcio" } # Частые "связки" в названиях (тоже убираем из ключа) CONNECTOR_TOKENS = { "de","del","la","las","los","el","da","do","di","du","der","den","van","von","the","of","and" } ABBREV_MAP = { # Dep. / Dep -> Deportivo "dep": "deportivo", "depor": "deportivo", } def canonical_team_key(name: str) -> str: """ Канонический ключ команды: - lower - без диакритики - пунктуация -> пробел - удаляем ORG_TOKENS и CONNECTOR_TOKENS """ if not isinstance(name, str): return "" s = clean_team_name(name) s = strip_accents(s.lower()) # всё, что не буква/цифра -> пробел s = re.sub(r"[^0-9a-z\s]", " ", s) tokens = [t for t in s.split() if t] normalized = [] for t in tokens: t = ABBREV_MAP.get(t, t) if t in ORG_TOKENS: continue if t in CONNECTOR_TOKENS: continue normalized.append(t) # если вдруг всё вычистили — fallback на исходные токены if not normalized: normalized = tokens return " ".join(normalized).strip() def key_tokens(key: str): return [t for t in key.split() if t] def match_score_by_tokens(user_key: str, rep_key: str) -> float: """ Усиленный скоринг: - containment (подмножество токенов) хорошо ловит "Cultural Leonesa" vs "Leonesa" - dice коэффициент - sequence ratio """ if not user_key or not rep_key: return 0.0 u_toks = set(key_tokens(user_key)) r_toks = set(key_tokens(rep_key)) if not u_toks or not r_toks: return 0.0 inter = len(u_toks & r_toks) if inter == 0: return 0.0 containment = inter / min(len(u_toks), len(r_toks)) # 1.0 если один набор токенов целиком внутри другого dice = (2 * inter) / (len(u_toks) + len(r_toks)) seq = difflib.SequenceMatcher(None, user_key, rep_key).ratio() # containment важнее, но чуть ниже 1.0 чтобы различать "полное равенство" и "вложенность" return max(seq, dice, 0.98 * containment) def build_team_normalization_maps(raw_rows): """ Строим: - rep_by_key: ключ -> "представитель" (самый частый вариант в Excel) - alias_to_rep: любой встреченный вариант -> представитель - rep_names: set представителей - rep_key_by_name: представитель -> его ключ """ key_to_counts = defaultdict(Counter) for league, date, home, away, o1, ox, o2 in raw_rows: kh = canonical_team_key(home) ka = canonical_team_key(away) if kh: key_to_counts[kh][home] += 1 if ka: key_to_counts[ka][away] += 1 rep_by_key = {} alias_to_rep = {} for key, cnt in key_to_counts.items(): most = cnt.most_common() max_count = most[0][1] candidates = [name for name, c in most if c == max_count] # при равенстве — берём самый короткий (обычно без FC/AD и т.п.), затем по алфавиту rep = min(candidates, key=lambda x: (len(x), x.lower())) rep_by_key[key] = rep for name in cnt.keys(): alias_to_rep[name] = rep rep_names = set(rep_by_key.values()) rep_key_by_name = {rep: canonical_team_key(rep) for rep in rep_names} return rep_by_key, alias_to_rep, rep_names, rep_key_by_name def resolve_user_team_name( user_name: str, alias_to_rep, rep_by_key, rep_names, rep_key_by_name, fuzzy_min_ratio=0.84, ambiguity_eps=0.02, margin=0.08 ): """ Приводит ввод пользователя к "представителю" из Excel. 1) точное совпадение с представителями 2) точное совпадение с любым алиасом 3) совпадение по каноническому ключу 4) token-based/fuzzy (включая кейс усечения: "Cultural Leonesa" -> "Leonesa") + защита от неоднозначности (типа "United" -> много вариантов) """ cleaned = clean_team_name(user_name) if cleaned in rep_names: return cleaned, None if cleaned in alias_to_rep: return alias_to_rep[cleaned], None key = canonical_team_key(cleaned) if key in rep_by_key: rep = rep_by_key[key] if rep != cleaned: return rep, f"Нормализация: '{cleaned}' -> '{rep}'" return rep, None # Усиленный подбор по ключам scored = [] for rep in rep_names: rep_key = rep_key_by_name.get(rep, "") score = match_score_by_tokens(key, rep_key) if score > 0: scored.append((score, rep)) if not scored: return cleaned, f"ВНИМАНИЕ: команда '{cleaned}' не найдена для нормализации (ключ='{key}')." scored.sort(reverse=True, key=lambda x: x[0]) best_score, best_rep = scored[0] second_score = scored[1][0] if len(scored) > 1 else 0.0 # Проверка неоднозначности: сколько кандидатов почти с таким же score? near_best = [rep for sc, rep in scored if sc >= best_score - ambiguity_eps] # Если слишком неоднозначно (например, "united"), не маппим if len(near_best) > 1 and best_score < 0.95: return cleaned, ( f"ВНИМАНИЕ: '{cleaned}' неоднозначно (ключ='{key}'), возможные варианты: " f"{', '.join(near_best[:6])}" + (" ..." if len(near_best) > 6 else "") ) # Принимаем, если достаточно похоже и/или сильно лучше второго if best_score >= fuzzy_min_ratio and (best_score >= 0.95 or (best_score - second_score) >= margin): return best_rep, f"Fuzzy/token-нормализация: '{cleaned}' -> '{best_rep}' (score={best_score:.2f})" return cleaned, f"ВНИМАНИЕ: команда '{cleaned}' не найдена для нормализации (ключ='{key}')." def split_pair_robust(p: str): """ Аккуратно делим пару "A - B", не ломая команды с дефисом в названии. Приоритет: ' - ' (с пробелами). Далее — дефис с пробелами вокруг. В конце — самый общий вариант. """ p = normalize_dashes(p) if " - " in p: a, b = p.split(" - ", 1) return a.strip(), b.strip() parts = re.split(r"\s+[-]\s+", p) if len(parts) == 2: return parts[0].strip(), parts[1].strip() parts2 = re.split(r"\s*-\s*", p) if len(parts2) == 2: return parts2[0].strip(), parts2[1].strip() return None # ===================== read matches ===================== df = pd.read_excel(excel_file_path) raw_rows = [] for _, row in df.iterrows(): league = row[0] date = row[1] home = clean_team_name(row[2]) away = clean_team_name(row[3]) odds1 = row[4] odds_draw = row[5] odds2 = row[6] raw_rows.append((league, date, home, away, odds1, odds_draw, odds2)) # Строим глобальную нормализацию по всему файлу rep_by_key, alias_to_rep, rep_names, rep_key_by_name = build_team_normalization_maps(raw_rows) # Нормализуем матчи (все имена приводим к "представителю") matches = [] for league, date, home, away, odds1, odds_draw, odds2 in raw_rows: home_rep = alias_to_rep.get(home, home) away_rep = alias_to_rep.get(away, away) matches.append((league, date, home_rep, away_rep, odds1, odds_draw, odds2)) # ===================== chains ===================== def find_chains(matches, start_team, end_team, min_depth=1, max_depth=4): graph = {} for match in matches: league, date, home_team, away_team, odds1, odds_draw, odds2 = match graph.setdefault(home_team, []) graph.setdefault(away_team, []) graph[home_team].append((away_team, match)) graph[away_team].append((home_team, match)) chains = [] queue = deque([(start_team, [], set())]) while queue: current_team, path, visited = queue.popleft() if current_team == end_team and path: if min_depth <= len(path) <= max_depth: chains.append(path) continue if len(path) >= max_depth: continue for neighbor, match in graph.get(current_team, []): if match not in visited: new_visited = visited.copy() new_visited.add(match) queue.append((neighbor, path + [match], new_visited)) return chains # ===================== input pairs ===================== raw_input_teams = input( "Введите либо 2 команды (как раньше), либо список пар через запятую.\n" "Пример: Malaga - Granada, Granada - Albacete\n" "Можно использовать '-', '–' или '—'.\n" "Ввод: " ).strip() raw_input_teams = sanitize_pair_text(raw_input_teams) team_pairs = [] has_any_dash = '-' in raw_input_teams if (',' in raw_input_teams) or has_any_dash: parts = [p.strip() for p in re.split(r"[,\n;]+", raw_input_teams) if p.strip()] for p in parts: p = sanitize_pair_text(p) if not p: continue sp = split_pair_robust(p) if not sp: print(f"Пропуск (неверный формат пары, нужен 'Команда1 - Команда2'): {p}") continue a, b = sp a = clean_team_name(a) b = clean_team_name(b) if a and b: team_pairs.append((a, b)) if not team_pairs: start_team = clean_team_name(raw_input_teams) end_team = clean_team_name(input("Введите название команды назначения: ").strip()) team_pairs = [(start_team, end_team)] else: start_team = clean_team_name(raw_input_teams) end_team = clean_team_name(input("Введите название команды назначения: ").strip()) team_pairs = [(start_team, end_team)] # Нормализуем ввод пользователя к тем именам, которые реально есть в Excel resolved_pairs = [] for a, b in team_pairs: ra, wa = resolve_user_team_name(a, alias_to_rep, rep_by_key, rep_names, rep_key_by_name) rb, wb = resolve_user_team_name(b, alias_to_rep, rep_by_key, rep_names, rep_key_by_name) if wa: print(wa) if wb: print(wb) resolved_pairs.append((ra, rb)) team_pairs = resolved_pairs # ===================== calculator ===================== class TeamStrengthCalculator: def __init__(self): self.games = [] self.equations_method_2 = [] self.teams = set() def add_games(self, games): for g in games: league, date, home_team, away_team, home_odds, draw_odds, away_odds = g self.games.append({ 'league': league, 'date': date, 'home_team': clean_team_name(home_team), 'away_team': clean_team_name(away_team), 'home_odds': to_float(home_odds), 'draw_odds': to_float(draw_odds), 'away_odds': to_float(away_odds) }) self.teams.add(clean_team_name(home_team)) self.teams.add(clean_team_name(away_team)) def calculate_strength_difference_method_2(self): for game in self.games: league_name = game['league'] home_team = game['home_team'] away_team = game['away_team'] home_odds = game['home_odds'] away_odds = game['away_odds'] threshold_home, threshold_away = league_thresholds.get(league_name, (2.40, 3.00)) if home_odds < away_odds: if home_odds < threshold_home: home_strength = abs((100 / threshold_home) - (100 / home_odds)) self.equations_method_2.append( f"{home_team} = {away_team} + {home_strength:.3f}" ) else: home_strength = abs((100 / home_odds) - (100 / threshold_home)) self.equations_method_2.append( f"{away_team} = {home_team} + {home_strength:.3f}" ) else: if away_odds < threshold_away: away_strength = abs((100 / threshold_away) - (100 / away_odds)) self.equations_method_2.append( f"{away_team} = {home_team} + {away_strength:.3f}" ) else: away_strength = abs((100 / away_odds) - (100 / threshold_away)) self.equations_method_2.append( f"{home_team} = {away_team} + {away_strength:.3f}" ) def _safe_sympy_symbol(self, team: str): base = canonical_team_key(team).replace(" ", "_") base = re.sub(r"[^0-9a-zA-Z_]", "_", base) if not base: base = "team" if base[0].isdigit(): base = "t_" + base return symbols(base) def simplify_with_sympy_method2(self, start_team, end_team): if not self.games: return "", ["Нет матчей для расчета."], ("", "") first_league = self.games[0]['league'] threshold_home, threshold_away = league_thresholds.get(first_league, (2.40, 3.00)) team_symbols = {} for team in self.teams: if team == start_team: team_symbols[team] = 0 else: team_symbols[team] = self._safe_sympy_symbol(team) sympy_equations = [] for equation in self.equations_method_2: parts = equation.split('=') if len(parts) == 2 and '+' in parts[1]: left_team, right = parts[0].strip(), parts[1].strip() right_team, strength_diff = right.split('+') right_team = right_team.strip() strength_diff = float(strength_diff.strip()) left_sym = team_symbols[left_team] right_sym = team_symbols[right_team] sympy_equations.append(Eq(left_sym, right_sym + strength_diff)) symbols_list = [sym for sym in team_symbols.values() if sym != 0] solution = solve(sympy_equations, symbols_list, dict=True) if sympy_equations else [] end_symbol = team_symbols.get(end_team) if not solution or end_symbol is None: return "", ["Нет решения для метода 2."], (f"{threshold_home:.2f}", f"{threshold_away:.2f}") sol = solution[0] if end_symbol not in sol: return "", [f"Команда {end_team} не найдена в решении метода 2."], (f"{threshold_home:.2f}", f"{threshold_away:.2f}") try: end_value_numeric = float(sol[end_symbol].evalf()) except Exception: return "", [f"Не удалось вычислить численное значение силы {end_team} в методе 2."], (f"{threshold_home:.2f}", f"{threshold_away:.2f}") strength_diff = -end_value_numeric new_odds_start = 100 / ((100 / threshold_home) + strength_diff) new_odds_end = 100 / ((100 / threshold_home) - strength_diff) method2_str = ( f"{start_team} = {end_team} + {strength_diff:.3f}; " f"{start_team} odds = {new_odds_start:.3f}; {end_team} odds = {new_odds_end:.3f}" ) return method2_str, [], (f"{threshold_home:.2f}", f"{threshold_away:.2f}") # ===================== Excel ===================== thin = Side(style="thin", color="000000") medium = Side(style="medium", color="000000") BORDER_THIN = Border(left=thin, right=thin, top=thin, bottom=thin) BORDER_MEDIUM_TOP = Border(left=thin, right=thin, top=medium, bottom=thin) ALIGN_WRAP_TOP = Alignment(wrap_text=True, vertical="top") ALIGN_CENTER = Alignment(horizontal="center", vertical="center", wrap_text=True) FONT_HEADER = Font(bold=True, size=11) FONT_BOLD = Font(bold=True) FONT_MONO = Font(name="Consolas") FILL_HEADER = PatternFill("solid", fgColor="D9E1F2") FILL_PAIR = PatternFill("solid", fgColor="E2EFDA") EXPECTED_HEADERS = [ "RUN", "Request #", "Start", "End", "Th home", "Th away", "Avg Date", "Matches (Date | Home-Away (1,X,2))", "Result" ] def init_results_workbook(output_excel_path): if os.path.exists(output_excel_path): wb = load_workbook(output_excel_path) else: wb = Workbook() if "Results" in wb.sheetnames: ws = wb["Results"] else: ws = wb.active ws.title = "Results" if ws.max_row == 1 and ws["A1"].value is None: ws.append(EXPECTED_HEADERS) else: current = [ws.cell(row=1, column=i).value for i in range(1, len(EXPECTED_HEADERS) + 1)] if current != EXPECTED_HEADERS: base = "Results_v2" name = base i = 2 while name in wb.sheetnames: name = f"{base}_{i}" i += 1 ws = wb.create_sheet(name) ws.append(EXPECTED_HEADERS) for col in range(1, len(EXPECTED_HEADERS) + 1): c = ws.cell(row=1, column=col) c.font = FONT_HEADER c.fill = FILL_HEADER c.alignment = ALIGN_CENTER c.border = BORDER_THIN widths = [18, 9, 18, 18, 9, 9, 11, 70, 55] for i, w in enumerate(widths, start=1): ws.column_dimensions[get_column_letter(i)].width = w ws.freeze_panes = "A2" return wb, ws def append_row(ws, row_values, is_new_pair=False): ws.append(row_values) r = ws.max_row for col in range(1, len(row_values) + 1): cell = ws.cell(row=r, column=col) cell.alignment = ALIGN_WRAP_TOP cell.border = BORDER_MEDIUM_TOP if is_new_pair else BORDER_THIN if is_new_pair and col in (2, 3, 4): cell.fill = FILL_PAIR cell.font = FONT_BOLD if col == 8: cell.font = FONT_MONO matches_text = str(row_values[7]) if row_values[7] else "" line_count = matches_text.count("\n") + 1 ws.row_dimensions[r].height = min(max(18, 14 * line_count), 260) # ===================== MAIN ===================== run_dt = datetime.now() run_stamp = run_dt.strftime("%d.%m.%Y %H:%M:%S") out_dir = os.path.dirname(excel_file_path) if os.path.dirname(excel_file_path) else os.getcwd() output_excel_path = os.path.join(out_dir, "wtp_results.xlsx") wb, ws = init_results_workbook(output_excel_path) for request_num, (start_team, end_team) in enumerate(team_pairs, start=1): chains = find_chains(matches, start_team, end_team) unique_chains = [] seen = set() for ch in chains: t = tuple(ch) if t not in seen: seen.add(t) unique_chains.append(ch) chains = unique_chains if not chains: append_row( ws, [run_stamp, request_num, start_team, end_team, "", "", "", "", ""], is_new_pair=True ) continue groups = {} for chain in chains: L = len(chain) groups.setdefault(L, []) dts = [parse_date_to_dt(m[1]) for m in chain] dts = [d for d in dts if d] avg_date = datetime.fromtimestamp(sum(d.timestamp() for d in dts) / len(dts)) if dts else datetime.min groups[L].append({"chain": chain, "avg_date": avg_date}) for L in groups: groups[L] = sorted(groups[L], key=lambda x: x["avg_date"], reverse=True) first_row_for_pair = True for L in sorted(groups.keys()): for info in groups[L]: chain = info["chain"] avg_date = info["avg_date"] avg_date_str = avg_date.strftime("%d.%m.%Y") if avg_date != datetime.min else "" games = [] base_lines = [] used_league_for_threshold = None for match in chain: league, date, home, away, o1, ox, o2 = match games.append(match) if used_league_for_threshold is None: used_league_for_threshold = league base_lines.append(f"{date} | {home}-{away}") max_base_len = max(len(b) for b in base_lines) if base_lines else 0 matches_lines_for_excel = [] for base, match in zip(base_lines, chain): _, date, home, away, o1, ox, o2 = match pad_spaces = " " * (max_base_len - len(base) + 2) matches_lines_for_excel.append( f"{base}{pad_spaces}({fmt2(o1)}, {fmt2(ox)}, {fmt2(o2)})" ) th_home, th_away = league_thresholds.get(used_league_for_threshold, (2.40, 3.00)) th_home_str = f"{th_home:.2f}" th_away_str = f"{th_away:.2f}" calc = TeamStrengthCalculator() calc.add_games(games) calc.calculate_strength_difference_method_2() result2, warnings, _ths = calc.simplify_with_sympy_method2(start_team, end_team) append_row( ws, [ run_stamp, request_num, start_team, end_team, th_home_str, th_away_str, avg_date_str, "\n".join(matches_lines_for_excel), result2 ], is_new_pair=first_row_for_pair ) first_row_for_pair = False wb.save(output_excel_path) print(f"\nГотово. Результаты дописаны в Excel:\n{output_excel_path}")

Теперь Cultural Leonesa должна автоматически сматчиться с Leonesa (потому что leonesa — подмножество токенов).

Если после этого вылезет похожий кейс (например, в Excel “Santander” вместо “Racing Santander”), скинь пример пары (как в Excel и как вводишь), и я добавлю ещё одно правило (например, запрет на матч по одному слишком короткому токену, если он часто встречается).

Pregunta

теперь все идеально работает, нужно еще кое-что сделать, если при повторном расчете с более актуальными данными появятся новые цепочки по играм которые уже были рассчитаны в excel, нужно чтобы он просто дополнил их а не создавал новые расчеты внизу предыдущих расчетов, и если появились новые цепочки нужно чтобы код выделил их каким-нибудь цветом,Ю например желтым в excel, переделай и вышли мне полный код

Pensamiento

Updating script to avoid duplicates in reruns

I need to modify the script so it updates existing calculations when new data comes in, rather than just appending everything. The Excel setup includes columns for various game details. If new chains show up for the same pair, I'll add them in the correct place without duplicates.

Updating existing chains and preventing duplicates

If I rerun calculations for the same pair, it looks like some results might already exist in the file. Instead of adding new rows at the bottom, I should update the existing ones—maybe by refreshing the RUN time, thresholds, or other relevant details. New chains will be added near existing ones and highlighted yellow. I'll define the chain identity by key details like teams, date, or odds, but the signature should account for updated data.

Defining chain signature and handling updates

So, the signature for a chain could be a tuple of (league, date, home, away), possibly including odds if they change. If odds are updated, the chain should just have its values recalculated, and the row should be overwritten. I'll read the existing file and map each pair (start, end) to its corresponding rows and chain signatures from the matches column.

Defining chain signature and updating rows

I'll use a normalized key for start/end, and compute the chain signature from match lines, excluding odds and parentheses' contents. The signature will be based on date and home-away, ignoring multiple spaces. When I rerun, I'll compare signatures and update existing rows without appending new ones at the bottom. If a new chain appears, I’ll insert it near its pair.

Updating existing blocks without appending duplicates

The goal is to update the calculation results within existing blocks, not append new ones. When I run the script, I'll compute chains for each input pair and then locate the corresponding rows in the existing sheet. I’ll group rows by pair and update the most recent or first block, removing or skipping duplicates if needed—without creating new blocks. New rows will be highlighted in the sheet.

Avoiding appending new blocks, updating instead

When rerunning, I'll focus on updating existing blocks rather than creating new ones. If there are multiple blocks for a pair, I will merge them into one, keeping only the latest block and discarding duplicates from earlier blocks. To do this, I’ll map pairs to row indices, then merge and compress them into one continuous segment. The goal is to avoid creating new blocks, ensuring no duplicates are added.

Updating last block or appending new block

The goal is to identify and update the last block of a pair or append if it's the first occurrence. I’ll search for contiguous rows that match the pair Start and End, track the segment, and update or delete rows accordingly. For new chains, I’ll highlight them in yellow to distinguish them.

Identifying new chains and managing styles

For each new chain, I’ll compare its signature to the existing set and mark it as new if it’s not found. Then, I’ll update the row styles by adjusting row height, matching fonts, and handling the Request # consistency. For new chains, I’ll apply yellow highlight and adjust existing styles. Also, I’ll ensure we always write to the correct sheet, updating it without adding new sheets.

Normalizing date format and signature generation

To maintain stable signatures, I'll ensure consistent date formatting. If the date includes time, I'll format it carefully, but for simplicity and compatibility, I’ll preserve it as a string similar to the original. I’ll also implement a method to clean up matches, generating consistent chain signatures. These signatures will help when sorting and updating segments.

Managing row adjustments and new chains

To handle adding/removing rows, I'll use insert_rows and delete_rows while ensuring that row shifts don’t affect subsequent segments. I'll process pairs from bottom to top, re-locating segments as needed. For new rows, I'll highlight them yellow if they’re new chains within an existing pair's segment.

Thought for 1m 30s

Respuesta

Ниже — полный код, который:

  1. Не плодит дубликаты при повторных запусках:

    • Если для пары Start-End уже есть блок строк в wtp_results.xlsx, то код перезапишет этот блок актуальными расчетами (обновит RUN/Avg Date/Matches/Result/thresholds).
    • Если появились новые цепочки, он вставит новые строки внутрь существующего блока, а не добавит новый блок внизу файла.
  2. Подсвечивает новые цепочки желтым (весь ряд).

Важно: код обновляет последний найденный блок для этой пары Start-End (если ты раньше уже запускал и получил дубликаты блоков — он будет обновлять самый нижний/последний).

Скопируй и замени файл целиком:

python
import os import re import unicodedata import difflib import pandas as pd from collections import deque, defaultdict, Counter from sympy import symbols, Eq, solve from datetime import datetime, timedelta from openpyxl import Workbook, load_workbook from openpyxl.styles import Alignment, Font, PatternFill, Border, Side from openpyxl.utils import get_column_letter # ===================== НАСТРОЙКИ ===================== excel_file_path = r"C:\Users\User\Desktop\results\results.xlsx" # ===================================================== league_thresholds = { "Albania Superiore": (2.52, 2.73), "Andorra Primera": (2.42, 2.75), "Armenia Premier": (2.32, 3.20), "Bosnia WWin Liga": (2.00, 3.20), "Croatia HNL": (2.29, 3.25), "Cyprus League": (2.25, 3.20), "Cyprus Division 2": (2.46, 2.70), "Czech Chance Liga": (2.25, 3.30), "Denmark Superliga": (2.40, 3.00), "Greece Super League": (2.36, 3.40), "Greece Super League 2": (2.25, 3.30), "Hungary OTP Bank Liga": (2.17, 3.40), "Hungary Merkantil Bank Liga": (2.38, 3.30), "Luxembourg BGL Ligue": (2.20, 3.00), "Malta Premier League": (2.75, 2.60), "Montenegro Prva Liga": (2.50, 3.20), "North Macedonia 1 MFL": (2.13, 3.60), "Slovakia Nike Liga": (2.30, 3.00), "Slovenia Prva Liga": (2.47, 3.00), "Algeria Ligue 2": (1.87, 5.00), "Mexico Liga de Expansion MX": (2.00, 4.00), "South Africa Premiership": (2.31, 3.45), "Croatia Prva NL": (2.40, 2.80), "India ISL": (2.17, 3.33), "Indonesia Liga 1": (2.27, 3.45), "Bulgaria Vtora liga": (2.27, 3.45), "Zambia Super League": (2.47, 3.10), "Romania Liga 3": (2.30, 2.94), "Myanmar National League": (2.50, 2.50), "Tanzania Ligi Kuu Bara": (2.20, 3.30), "France Nacional": (2.32, 3.30), "Nigeria NPFL": (1.50, 6.00), "Turkey 1.Lig": (2.27, 3.20), "Algeria Ligue 1": (2.05, 4.30), "Italy Serie C - A": (2.38, 3.30), "Italy Serie B": (2.46, 3.50), "Algeria Ligue 2 - 21": (2.00, 3.20), "England National League South": (2.38, 3.00), "Brazil Paulista A4": (2.44, 3.00), "Ethiopia": (3.12, 2.85), "Spain LaLiga": (2.29, 3.61), "Australia NPL": (2.22, 3.00), "Bulgaria Vtora Liga": (2.08, 3.70), "Italy Serie C - C": (2.40, 3.20), "Spain Segunda": (2.11, 4.02), "France Ligue 1": (2.30, 3.60), "Brazil Serie A": (2.05, 4.00), "Brazil Serie B": (2.05, 4.00), "Brazil Serie C": (2.15, 3.70), "Iceland Besta Delid": (2.12, 3.10), "Australia NPL Northern": (2.47, 2.30), "Norway Division 2": (2.08, 2.94), "USA MLS": (2.27, 3.20), "Argentina": (2.22, 3.80), "Mexico": (2.08, 3.80), "Bundesliga - 2": (2.16, 3.55), "France - 2": (2.16, 3.55), "Netherlands - Eridivisie": (2.20, 3.20), "England - Premier League": (2.18, 3.60) } # ===================== helpers ===================== def normalize_dashes(text): if isinstance(text, str): return text.replace('–', '-').replace('—', '-').replace('−', '-') return text def clean_team_name(name): if isinstance(name, str): name = normalize_dashes(name) return ' '.join(name.strip().split()) return name def sanitize_pair_text(p): if not isinstance(p, str): return p p = p.strip() p = normalize_dashes(p) p = re.sub(r"\s*->\s*&\s*[A-Za-z]:[\\/].*$", "", p) p = re.sub(r"\s*&\s*[A-Za-z]:[\\/].*$", "", p) return p.strip() def parse_date_to_dt(raw_date): if isinstance(raw_date, datetime): return raw_date if isinstance(raw_date, (float, int)): try: return datetime(1899, 12, 30) + timedelta(days=int(raw_date)) except: return None if isinstance(raw_date, str): try: if ' - ' in raw_date: date_part = raw_date.split(' - ')[0].strip() return datetime.strptime(date_part, '%d.%m.%Y') return datetime.strptime(raw_date.strip(), '%d.%m.%Y') except: return None return None def to_float(x): return float(str(x).replace(',', '.')) def fmt2(x) -> str: try: return f"{to_float(x):.2f}" except: return str(x) def strip_accents(s: str) -> str: s = unicodedata.normalize("NFKD", s) return "".join(ch for ch in s if not unicodedata.combining(ch)) ORG_TOKENS = { "fc","cf","ad","cd","ud","sc","ac","afc","fk","sk","nk","as","ss","sv","if","bk","kf", "cfc","fcd","f.c","c.f","a.d","u.d","c.d","s.c","a.c", "club","clube","clubs","football","futbol","fútbol","calcio" } CONNECTOR_TOKENS = { "de","del","la","las","los","el","da","do","di","du","der","den","van","von","the","of","and" } ABBREV_MAP = { "dep": "deportivo", "depor": "deportivo", } def canonical_team_key(name: str) -> str: if not isinstance(name, str): return "" s = clean_team_name(name) s = strip_accents(s.lower()) s = re.sub(r"[^0-9a-z\s]", " ", s) tokens = [t for t in s.split() if t] normalized = [] for t in tokens: t = ABBREV_MAP.get(t, t) if t in ORG_TOKENS: continue if t in CONNECTOR_TOKENS: continue normalized.append(t) if not normalized: normalized = tokens return " ".join(normalized).strip() def key_tokens(key: str): return [t for t in key.split() if t] def match_score_by_tokens(user_key: str, rep_key: str) -> float: if not user_key or not rep_key: return 0.0 u_toks = set(key_tokens(user_key)) r_toks = set(key_tokens(rep_key)) if not u_toks or not r_toks: return 0.0 inter = len(u_toks & r_toks) if inter == 0: return 0.0 containment = inter / min(len(u_toks), len(r_toks)) # подмножество токенов dice = (2 * inter) / (len(u_toks) + len(r_toks)) seq = difflib.SequenceMatcher(None, user_key, rep_key).ratio() return max(seq, dice, 0.98 * containment) def build_team_normalization_maps(raw_rows): key_to_counts = defaultdict(Counter) for league, date, home, away, o1, ox, o2 in raw_rows: kh = canonical_team_key(home) ka = canonical_team_key(away) if kh: key_to_counts[kh][home] += 1 if ka: key_to_counts[ka][away] += 1 rep_by_key = {} alias_to_rep = {} for key, cnt in key_to_counts.items(): most = cnt.most_common() max_count = most[0][1] candidates = [name for name, c in most if c == max_count] rep = min(candidates, key=lambda x: (len(x), x.lower())) rep_by_key[key] = rep for name in cnt.keys(): alias_to_rep[name] = rep rep_names = set(rep_by_key.values()) rep_key_by_name = {rep: canonical_team_key(rep) for rep in rep_names} return rep_by_key, alias_to_rep, rep_names, rep_key_by_name def resolve_user_team_name( user_name: str, alias_to_rep, rep_by_key, rep_names, rep_key_by_name, fuzzy_min_ratio=0.84, ambiguity_eps=0.02, margin=0.08 ): cleaned = clean_team_name(user_name) if cleaned in rep_names: return cleaned, None if cleaned in alias_to_rep: return alias_to_rep[cleaned], None key = canonical_team_key(cleaned) if key in rep_by_key: rep = rep_by_key[key] if rep != cleaned: return rep, f"Нормализация: '{cleaned}' -> '{rep}'" return rep, None scored = [] for rep in rep_names: rep_key = rep_key_by_name.get(rep, "") score = match_score_by_tokens(key, rep_key) if score > 0: scored.append((score, rep)) if not scored: return cleaned, f"ВНИМАНИЕ: команда '{cleaned}' не найдена для нормализации (ключ='{key}')." scored.sort(reverse=True, key=lambda x: x[0]) best_score, best_rep = scored[0] second_score = scored[1][0] if len(scored) > 1 else 0.0 near_best = [rep for sc, rep in scored if sc >= best_score - ambiguity_eps] if len(near_best) > 1 and best_score < 0.95: return cleaned, ( f"ВНИМАНИЕ: '{cleaned}' неоднозначно (ключ='{key}'), возможные варианты: " f"{', '.join(near_best[:6])}" + (" ..." if len(near_best) > 6 else "") ) if best_score >= fuzzy_min_ratio and (best_score >= 0.95 or (best_score - second_score) >= margin): return best_rep, f"Fuzzy/token-нормализация: '{cleaned}' -> '{best_rep}' (score={best_score:.2f})" return cleaned, f"ВНИМАНИЕ: команда '{cleaned}' не найдена для нормализации (ключ='{key}')." def split_pair_robust(p: str): p = normalize_dashes(p) if " - " in p: a, b = p.split(" - ", 1) return a.strip(), b.strip() parts = re.split(r"\s+[-]\s+", p) if len(parts) == 2: return parts[0].strip(), parts[1].strip() parts2 = re.split(r"\s*-\s*", p) if len(parts2) == 2: return parts2[0].strip(), parts2[1].strip() return None # ===== signatures (чтобы не плодить дубликаты цепочек) ===== def line_compact(s: str) -> str: s = normalize_dashes(str(s)) s = re.sub(r"\s+", " ", s).strip() return s def signature_from_chain_base_lines(base_lines): # base_lines = ["date | home-away", ...] return "\n".join(line_compact(x) for x in base_lines) def signature_from_matches_cell(text): if not text: return "" lines = [] for ln in str(text).splitlines(): ln = normalize_dashes(ln).strip() ln = re.sub(r"\s*$[^)]*$\s*$", "", ln).strip() # убрать (odds) ln = re.sub(r"\s+", " ", ln).strip() if ln: lines.append(ln) return "\n".join(lines) # ===================== read matches ===================== df = pd.read_excel(excel_file_path) raw_rows = [] for _, row in df.iterrows(): league = row[0] date = row[1] home = clean_team_name(row[2]) away = clean_team_name(row[3]) odds1 = row[4] odds_draw = row[5] odds2 = row[6] raw_rows.append((league, date, home, away, odds1, odds_draw, odds2)) rep_by_key, alias_to_rep, rep_names, rep_key_by_name = build_team_normalization_maps(raw_rows) matches = [] for league, date, home, away, odds1, odds_draw, odds2 in raw_rows: home_rep = alias_to_rep.get(home, home) away_rep = alias_to_rep.get(away, away) matches.append((league, date, home_rep, away_rep, odds1, odds_draw, odds2)) # ===================== chains ===================== def find_chains(matches, start_team, end_team, min_depth=1, max_depth=4): graph = {} for match in matches: league, date, home_team, away_team, odds1, odds_draw, odds2 = match graph.setdefault(home_team, []) graph.setdefault(away_team, []) graph[home_team].append((away_team, match)) graph[away_team].append((home_team, match)) chains = [] queue = deque([(start_team, [], set())]) while queue: current_team, path, visited = queue.popleft() if current_team == end_team and path: if min_depth <= len(path) <= max_depth: chains.append(path) continue if len(path) >= max_depth: continue for neighbor, match in graph.get(current_team, []): if match not in visited: new_visited = visited.copy() new_visited.add(match) queue.append((neighbor, path + [match], new_visited)) return chains # ===================== input pairs ===================== raw_input_teams = input( "Введите либо 2 команды (как раньше), либо список пар через запятую.\n" "Пример: Malaga - Granada, Granada - Albacete\n" "Можно использовать '-', '–' или '—'.\n" "Ввод: " ).strip() raw_input_teams = sanitize_pair_text(raw_input_teams) team_pairs = [] has_any_dash = '-' in raw_input_teams if (',' in raw_input_teams) or has_any_dash: parts = [p.strip() for p in re.split(r"[,\n;]+", raw_input_teams) if p.strip()] for p in parts: p = sanitize_pair_text(p) if not p: continue sp = split_pair_robust(p) if not sp: print(f"Пропуск (неверный формат пары, нужен 'Команда1 - Команда2'): {p}") continue a, b = sp a = clean_team_name(a) b = clean_team_name(b) if a and b: team_pairs.append((a, b)) if not team_pairs: start_team = clean_team_name(raw_input_teams) end_team = clean_team_name(input("Введите название команды назначения: ").strip()) team_pairs = [(start_team, end_team)] else: start_team = clean_team_name(raw_input_teams) end_team = clean_team_name(input("Введите название команды назначения: ").strip()) team_pairs = [(start_team, end_team)] resolved_pairs = [] for a, b in team_pairs: ra, wa = resolve_user_team_name(a, alias_to_rep, rep_by_key, rep_names, rep_key_by_name) rb, wb = resolve_user_team_name(b, alias_to_rep, rep_by_key, rep_names, rep_key_by_name) if wa: print(wa) if wb: print(wb) resolved_pairs.append((ra, rb)) team_pairs = resolved_pairs # ===================== calculator ===================== class TeamStrengthCalculator: def __init__(self): self.games = [] self.equations_method_2 = [] self.teams = set() def add_games(self, games): for g in games: league, date, home_team, away_team, home_odds, draw_odds, away_odds = g self.games.append({ 'league': league, 'date': date, 'home_team': clean_team_name(home_team), 'away_team': clean_team_name(away_team), 'home_odds': to_float(home_odds), 'draw_odds': to_float(draw_odds), 'away_odds': to_float(away_odds) }) self.teams.add(clean_team_name(home_team)) self.teams.add(clean_team_name(away_team)) def calculate_strength_difference_method_2(self): for game in self.games: league_name = game['league'] home_team = game['home_team'] away_team = game['away_team'] home_odds = game['home_odds'] away_odds = game['away_odds'] threshold_home, threshold_away = league_thresholds.get(league_name, (2.40, 3.00)) if home_odds < away_odds: if home_odds < threshold_home: home_strength = abs((100 / threshold_home) - (100 / home_odds)) self.equations_method_2.append( f"{home_team} = {away_team} + {home_strength:.3f}" ) else: home_strength = abs((100 / home_odds) - (100 / threshold_home)) self.equations_method_2.append( f"{away_team} = {home_team} + {home_strength:.3f}" ) else: if away_odds < threshold_away: away_strength = abs((100 / threshold_away) - (100 / away_odds)) self.equations_method_2.append( f"{away_team} = {home_team} + {away_strength:.3f}" ) else: away_strength = abs((100 / away_odds) - (100 / threshold_away)) self.equations_method_2.append( f"{home_team} = {away_team} + {away_strength:.3f}" ) def _safe_sympy_symbol(self, team: str): base = canonical_team_key(team).replace(" ", "_") base = re.sub(r"[^0-9a-zA-Z_]", "_", base) if not base: base = "team" if base[0].isdigit(): base = "t_" + base return symbols(base) def simplify_with_sympy_method2(self, start_team, end_team): if not self.games: return "", ["Нет матчей для расчета."], ("", "") first_league = self.games[0]['league'] threshold_home, threshold_away = league_thresholds.get(first_league, (2.40, 3.00)) team_symbols = {} for team in self.teams: if team == start_team: team_symbols[team] = 0 else: team_symbols[team] = self._safe_sympy_symbol(team) sympy_equations = [] for equation in self.equations_method_2: parts = equation.split('=') if len(parts) == 2 and '+' in parts[1]: left_team, right = parts[0].strip(), parts[1].strip() right_team, strength_diff = right.split('+') right_team = right_team.strip() strength_diff = float(strength_diff.strip()) left_sym = team_symbols[left_team] right_sym = team_symbols[right_team] sympy_equations.append(Eq(left_sym, right_sym + strength_diff)) symbols_list = [sym for sym in team_symbols.values() if sym != 0] solution = solve(sympy_equations, symbols_list, dict=True) if sympy_equations else [] end_symbol = team_symbols.get(end_team) if not solution or end_symbol is None: return "", ["Нет решения для метода 2."], (f"{threshold_home:.2f}", f"{threshold_away:.2f}") sol = solution[0] if end_symbol not in sol: return "", [f"Команда {end_team} не найдена в решении метода 2."], (f"{threshold_home:.2f}", f"{threshold_away:.2f}") try: end_value_numeric = float(sol[end_symbol].evalf()) except Exception: return "", [f"Не удалось вычислить численное значение силы {end_team} в методе 2."], (f"{threshold_home:.2f}", f"{threshold_away:.2f}") strength_diff = -end_value_numeric new_odds_start = 100 / ((100 / threshold_home) + strength_diff) new_odds_end = 100 / ((100 / threshold_home) - strength_diff) method2_str = ( f"{start_team} = {end_team} + {strength_diff:.3f}; " f"{start_team} odds = {new_odds_start:.3f}; {end_team} odds = {new_odds_end:.3f}" ) return method2_str, [], (f"{threshold_home:.2f}", f"{threshold_away:.2f}") # ===================== Excel styles ===================== thin = Side(style="thin", color="000000") medium = Side(style="medium", color="000000") BORDER_THIN = Border(left=thin, right=thin, top=thin, bottom=thin) BORDER_MEDIUM_TOP = Border(left=thin, right=thin, top=medium, bottom=thin) ALIGN_WRAP_TOP = Alignment(wrap_text=True, vertical="top") ALIGN_CENTER = Alignment(horizontal="center", vertical="center", wrap_text=True) FONT_HEADER = Font(bold=True, size=11) FONT_BOLD = Font(bold=True) FONT_MONO = Font(name="Consolas") FILL_HEADER = PatternFill("solid", fgColor="D9E1F2") FILL_PAIR = PatternFill("solid", fgColor="E2EFDA") FILL_NEW_CHAIN = PatternFill("solid", fgColor="FFF2CC") # жёлтый FILL_NONE = PatternFill() # без заливки EXPECTED_HEADERS = [ "RUN", "Request #", "Start", "End", "Th home", "Th away", "Avg Date", "Matches (Date | Home-Away (1,X,2))", "Result" ] def init_results_workbook(output_excel_path): if os.path.exists(output_excel_path): wb = load_workbook(output_excel_path) else: wb = Workbook() if "Results" in wb.sheetnames: ws = wb["Results"] else: ws = wb.active ws.title = "Results" if ws.max_row == 1 and ws["A1"].value is None: ws.append(EXPECTED_HEADERS) else: current = [ws.cell(row=1, column=i).value for i in range(1, len(EXPECTED_HEADERS) + 1)] if current != EXPECTED_HEADERS: base = "Results_v2" name = base i = 2 while name in wb.sheetnames: name = f"{base}_{i}" i += 1 ws = wb.create_sheet(name) ws.append(EXPECTED_HEADERS) for col in range(1, len(EXPECTED_HEADERS) + 1): c = ws.cell(row=1, column=col) c.font = FONT_HEADER c.fill = FILL_HEADER c.alignment = ALIGN_CENTER c.border = BORDER_THIN widths = [18, 9, 18, 18, 9, 9, 11, 70, 55] for i, w in enumerate(widths, start=1): ws.column_dimensions[get_column_letter(i)].width = w ws.freeze_panes = "A2" return wb, ws def apply_row_style(ws, r, is_pair_first_row: bool, is_new_chain: bool): for col in range(1, len(EXPECTED_HEADERS) + 1): cell = ws.cell(row=r, column=col) cell.alignment = ALIGN_WRAP_TOP cell.border = BORDER_MEDIUM_TOP if is_pair_first_row else BORDER_THIN # Сначала чистим заливку, потом накладываем нужную cell.fill = FILL_NONE # Подсветка новых цепочек — приоритетнее if is_new_chain: cell.fill = FILL_NEW_CHAIN else: # Подсветка пары (как было) только на первой строке блока и только B,C,D if is_pair_first_row and col in (2, 3, 4): cell.fill = FILL_PAIR # Fonts if is_pair_first_row and col in (2, 3, 4): cell.font = FONT_BOLD elif col == 8: cell.font = FONT_MONO else: cell.font = Font() # default def set_row_height_from_matches(ws, r, matches_text): matches_text = str(matches_text) if matches_text else "" line_count = matches_text.count("\n") + 1 ws.row_dimensions[r].height = min(max(18, 14 * line_count), 260) def append_row(ws, row_values, is_new_pair=False, is_new_chain=False): ws.append(row_values) r = ws.max_row apply_row_style(ws, r, is_new_pair, is_new_chain) set_row_height_from_matches(ws, r, row_values[7]) def find_last_segment_for_pair(ws, start_team: str, end_team: str): """ Ищем последний (самый нижний) непрерывный блок строк, где Start/End совпадают. Возвращает (seg_start, seg_end) или None. """ start_col = 3 # C end_col = 4 # D segments = [] in_seg = False seg_start = None for r in range(2, ws.max_row + 1): s = ws.cell(row=r, column=start_col).value e = ws.cell(row=r, column=end_col).value s = clean_team_name(s) if s else "" e = clean_team_name(e) if e else "" if s == start_team and e == end_team: if not in_seg: in_seg = True seg_start = r else: if in_seg: segments.append((seg_start, r - 1)) in_seg = False seg_start = None if in_seg: segments.append((seg_start, ws.max_row)) if not segments: return None return segments[-1] # ===================== MAIN ===================== run_dt = datetime.now() run_stamp = run_dt.strftime("%d.%m.%Y %H:%M:%S") out_dir = os.path.dirname(excel_file_path) if os.path.dirname(excel_file_path) else os.getcwd() output_excel_path = os.path.join(out_dir, "wtp_results.xlsx") wb, ws = init_results_workbook(output_excel_path) for request_num, (start_team, end_team) in enumerate(team_pairs, start=1): chains = find_chains(matches, start_team, end_team) # убрать дубликаты цепочек (по матчам) unique_chains = [] seen = set() for ch in chains: t = tuple(ch) if t not in seen: seen.add(t) unique_chains.append(ch) chains = unique_chains # ---- формируем desired_rows: список строк, которые должны быть в Excel для этой пары ---- desired_rows = [] if not chains: desired_rows.append({ "signature": "", "values": [run_stamp, request_num, start_team, end_team, "", "", "", "", ""] }) else: # группировка по длине, сортировка по средней дате (свежие сверху) groups = {} for chain in chains: L = len(chain) groups.setdefault(L, []) dts = [parse_date_to_dt(m[1]) for m in chain] dts = [d for d in dts if d] avg_date = datetime.fromtimestamp(sum(d.timestamp() for d in dts) / len(dts)) if dts else datetime.min groups[L].append({"chain": chain, "avg_date": avg_date}) for L in groups: groups[L] = sorted(groups[L], key=lambda x: x["avg_date"], reverse=True) for L in sorted(groups.keys()): for info in groups[L]: chain = info["chain"] avg_date = info["avg_date"] avg_date_str = avg_date.strftime("%d.%m.%Y") if avg_date != datetime.min else "" games = [] base_lines = [] used_league_for_threshold = None for match in chain: league, date, home, away, o1, ox, o2 = match games.append(match) if used_league_for_threshold is None: used_league_for_threshold = league base_lines.append(f"{date} | {home}-{away}") # signature без odds (чтобы совпадало даже если odds обновились) chain_sig = signature_from_chain_base_lines(base_lines) # красивый вывод с выравниванием скобок (как было) max_base_len = max(len(b) for b in base_lines) if base_lines else 0 matches_lines_for_excel = [] for base, match in zip(base_lines, chain): _, date, home, away, o1, ox, o2 = match pad_spaces = " " * (max_base_len - len(base) + 2) matches_lines_for_excel.append( f"{base}{pad_spaces}({fmt2(o1)}, {fmt2(ox)}, {fmt2(o2)})" ) th_home, th_away = league_thresholds.get(used_league_for_threshold, (2.40, 3.00)) th_home_str = f"{th_home:.2f}" th_away_str = f"{th_away:.2f}" calc = TeamStrengthCalculator() calc.add_games(games) calc.calculate_strength_difference_method_2() result2, warnings, _ths = calc.simplify_with_sympy_method2(start_team, end_team) desired_rows.append({ "signature": chain_sig, "values": [ run_stamp, # RUN request_num, # Request # start_team, # Start end_team, # End th_home_str, # Th home th_away_str, # Th away avg_date_str, # Avg Date "\n".join(matches_lines_for_excel), # Matches result2 # Result ] }) # ---- обновляем существующий блок для пары (или добавляем новый) ---- seg = find_last_segment_for_pair(ws, start_team, end_team) if seg is None: # пары ещё нет в файле — просто добавляем блок (без жёлтого) first = True for item in desired_rows: append_row(ws, item["values"], is_new_pair=first, is_new_chain=False) first = False continue seg_start, seg_end = seg current_len = seg_end - seg_start + 1 desired_len = len(desired_rows) # существующие signatures (по текущему блоку) existing_sigs = set() for r in range(seg_start, seg_end + 1): sig = signature_from_matches_cell(ws.cell(row=r, column=8).value) # H existing_sigs.add(sig) # подгоняем размер блока под desired_len if desired_len > current_len: insert_at = seg_end + 1 ws.insert_rows(insert_at, amount=(desired_len - current_len)) seg_end += (desired_len - current_len) elif desired_len < current_len: delete_from = seg_start + desired_len ws.delete_rows(delete_from, amount=(current_len - desired_len)) seg_end = seg_start + desired_len - 1 # перезаписываем строки блока for i, item in enumerate(desired_rows): r = seg_start + i vals = item["values"] sig = item["signature"] is_pair_first_row = (i == 0) # новая цепочка — если ее сигнатуры не было раньше (и сигнатура не пустая) is_new_chain = (sig != "" and sig not in existing_sigs) # записываем значения for col_idx, v in enumerate(vals, start=1): ws.cell(row=r, column=col_idx, value=v) # применяем стили apply_row_style(ws, r, is_pair_first_row, is_new_chain) set_row_height_from_matches(ws, r, vals[7]) wb.save(output_excel_path) print(f"\nГотово. Результаты обновлены в Excel:\n{output_excel_path}")

Если хочешь, могу сделать ещё удобнее:

  • чтобы он удалял старые дубли блоков (если они уже накопились),
  • или чтобы новые цепочки вставлялись строго по месту внутри сортировки (уже так) и ещё добавляли пометку типа “NEW” в отдельной колонке.

Compartir este Q&A