import pandas as pd import numpy as np import ran...
Creato il: 11 giugno 2026
Creato il: 11 giugno 2026
import pandas as pd
import numpy as np
import random
import matplotlib.pyplot as plt
from collections import defaultdict, Counter
from google.colab import files
from IPython.display import display, HTML
import copy
print("Silakan upload file CSV Anda (PRIZE_LEVEL_4_ELITE.csv):")
uploaded = files.upload()
if not uploaded:
print("Error: Tidak ada file yang diupload.")
else:
file_name = list(uploaded.keys())[0]
try:
df = pd.read_csv(file_name, parse_dates=["timestamp"])
print(f"Berhasil memuat file: {file_name}")
text# === Validasi Kolom === required_columns = ["timestamp", "raw_number", "sum_digits", "d1", "d2", "d3", "d4"] missing = [col for col in required_columns if col not in df.columns] if missing: raise ValueError(f"File CSV kekurangan kolom wajib: {missing}") # === 2. Helper precomputations === df["timeslot"] = df["timestamp"].dt.strftime("%H:%M") numbers_array = df["raw_number"].values sum_array = df["sum_digits"].values odd_count_array = (df[["d1", "d2", "d3", "d4"]].values % 2).sum(axis=1) def number_to_digits(n: int): return (n // 1000, (n // 100) % 10, (n // 10) % 10, n % 10) digits_array = np.array([number_to_digits(i) for i in range(10_000)], dtype=np.int8) sum_digits_array = digits_array.sum(axis=1) odd_even_array = (digits_array % 2).sum(axis=1) def pattern_category(d): cnt = Counter(d) if len(cnt) == 1: return "AAAA" if 3 in cnt.values(): return "AAAB" if sorted(cnt.values()) == [2, 2]: return "AABB" if 2 in cnt.values(): return "AABC" return "ABCD" pattern_map = {"AAAA": 0, "AAAB": 1, "AABB": 2, "AABC": 3, "ABCD": 4} pattern_id_array_candidates = np.array( [pattern_map[pattern_category(tuple(d))] for d in digits_array], dtype=np.int8 ) pattern_index_array = np.array( [pattern_map[pattern_category((r.d1, r.d2, r.d3, r.d4))] for r in df.itertuples()], dtype=np.int8, ) prev_digits_array = np.zeros((len(df), 4), dtype=np.int8) markov_dict = defaultdict(lambda: [np.ones((10, 10), dtype=np.float32) for _ in range(4)]) last_digits_per_slot = {} for i, r in enumerate(df.itertuples()): slot = r.timeslot cur_digits = (r.d1, r.d2, r.d3, r.d4) if slot in last_digits_per_slot: prev_digits_array[i] = last_digits_per_slot[slot] last_digits_per_slot[slot] = cur_digits if i < len(df) - 300 and prev_digits_array[i].sum() != 0: for pos in range(4): markov_dict[slot][pos][prev_digits_array[i][pos], cur_digits[pos]] += 1 markov_probs_dict = { slot: [m / m.sum(axis=1, keepdims=True) for m in mats] for slot, mats in markov_dict.items() } # === 3. Scoring & evaluation function === def evaluate_weights_fast(w, recent_n): eval_start = len(df) - 300 hits, ranks = 0, [] weight_keys = ["freq", "gap", "corr", "sum", "odd_even", "pattern"] w_vals = np.array([w[k] for k in weight_keys]) for idx in range(eval_start, len(df)): start_idx = max(0, idx - recent_n) window_numbers = numbers_array[start_idx:idx] if len(window_numbers) == 0: continue # Pre-calculate features freq = np.bincount(window_numbers, minlength=10_000).astype(np.float32) / len(window_numbers) gap_idx = np.full(10_000, -1, dtype=np.int32) for pos, num in enumerate(window_numbers): gap_idx[num] = pos gap = np.where(gap_idx >= 0, 1.0 / (len(window_numbers) - gap_idx), 0.0) sum_counts = np.bincount(sum_array[start_idx:idx], minlength=37).astype(np.float32) sum_feat = sum_counts[sum_digits_array] / len(window_numbers) odd_counts = np.bincount(odd_count_array[start_idx:idx], minlength=5).astype(np.float32) odd_feat = odd_counts[odd_even_array] / len(window_numbers) pattern_counts = np.bincount(pattern_index_array[start_idx:idx], minlength=5).astype(np.float32) pattern_feat = pattern_counts[pattern_id_array_candidates] / len(window_numbers) slot = df.at[idx, "timeslot"] prev_d = prev_digits_array[idx] mats = markov_probs_dict.get(slot, [np.ones((10,10))]*4) corr = mats[0][prev_d[0], digits_array[:, 0]].copy() for p in range(1, 4): corr *= mats[p][prev_d[p], digits_array[:, p]] # Combine features using weight vector feat_matrix = np.stack([freq, gap, corr, sum_feat, odd_feat, pattern_feat], axis=1) score = feat_matrix @ w_vals actual = numbers_array[idx] rank = (score > score[actual]).sum() + 1 ranks.append(rank) if rank <= 10: hits += 1 return hits / 300.0, float(np.median(ranks)) if ranks else 10000.0 # === 4. GENETIC ALGORITHM IMPLEMENTATION === class GeneticOptimizer: def __init__(self, fitness_func, recent_n, pop_size=25, generations=15, mutation_rate=0.2): self.fitness_func = fitness_func self.recent_n = recent_n self.pop_size = pop_size self.generations = generations self.mutation_rate = mutation_rate self.keys = ["freq", "gap", "corr", "sum", "odd_even", "pattern"] def _get_metric(self, w): acc, med = self.fitness_func(w, self.recent_n) return (1 - acc) + med / 10_000.0, acc, med def run(self): # Initialize Population population = [] for _ in range(self.pop_size): ind = {k: random.random() for k in self.keys} population.append(ind) best_overall_w = None best_overall_metric = float('inf') best_overall_acc = 0 best_overall_med = 0 print(f"Starting Genetic Algorithm ({self.generations} generations)...") for gen in range(self.generations): # 1. Evaluate Fitness scored_pop = [] for ind in population: metric, acc, med = self._get_metric(ind) scored_pop.append({'w': ind, 'metric': metric, 'acc': acc, 'med': med}) # Sort by metric (ascending, because lower is better) scored_pop.sort(key=lambda x: x['metric']) # Track best if scored_pop[0]['metric'] < best_overall_metric: best_overall_metric = scored_pop[0]['metric'] best_overall_w = copy.deepcopy(scored_pop[0]['w']) best_overall_acc = scored_pop[0]['acc'] best_overall_med = scored_pop[0]['med'] print(f"Gen {gen+1}/{self.generations} | Best Metric: {best_overall_metric:.4f} | Best Med Rank: {best_overall_med:.1f}") # 2. Selection (Tournament Selection) new_population = [copy.deepcopy(scored_pop[0]['w'])] # Elitism: keep the best while len(new_population) < self.pop_size: # Tournament p1 = self._tournament(scored_pop) p2 = self._tournament(scored_pop) # 3. Crossover (Arithmetic Crossover) child = self._crossover(p1, p2) # 4. Mutation child = self._mutate(child) new_population.append(child) population = new_population return best_overall_w, best_overall_acc, best_overall_med, best_overall_metric def _tournament(self, scored_pop): participants = random.sample(scored_pop, 3) return min(participants, key=lambda x: x['metric'])['w'] def _crossover(self, p1, p2): alpha = random.random() child = {} for k in self.keys: child[k] = alpha * p1[k] + (1 - alpha) * p2[k] return child def _mutate(self, ind): for k in self.keys: if random.random() < self.mutation_rate: ind[k] += random.gauss(0, 0.1) ind[k] = max(0, min(1, ind[k])) # Clamp between 0 and 1 return ind # === 5. Main Execution === # Baseline baseline_w = {k: 1.0 for k in ["freq", "gap", "corr", "sum", "odd_even", "pattern"]} baseline_acc, baseline_med = evaluate_weights_fast(baseline_w, 150) # Tuning with GA search_results = {} for recent in (50, 150, 300): optimizer = GeneticOptimizer(evaluate_weights_fast, recent, pop_size=20, generations=10) best_w_ga, best_acc_ga, best_med_ga, best_met_ga = optimizer.run() search_results[recent] = {"weights": best_w_ga, "acc": best_acc_ga, "median": best_med_ga} # Pick best window overall_best_recent = min(search_results.keys(), key=lambda r: (1 - search_results[r]["acc"]) + search_results[r]["median"] / 10_000.0) best_w = search_results[overall_best_recent]["weights"] # === 6. Final prediction === start_idx = len(df) - overall_best_recent window_numbers = numbers_array[start_idx:] freq = np.bincount(window_numbers, minlength=10_000).astype(np.float32) / overall_best_recent gap_idx = np.full(10_000, -1, dtype=np.int32) for pos, num in enumerate(window_numbers): gap_idx[num] = pos gap = np.where(gap_idx >= 0, 1.0 / (len(window_numbers) - gap_idx), 0.0) sum_counts = np.bincount(sum_array[start_idx:], minlength=37).astype(np.float32) sum_feat = sum_counts[sum_digits_array] / overall_best_recent odd_counts = np.bincount(odd_count_array[start_idx:], minlength=5).astype(np.float32) odd_feat = odd_counts[odd_even_array] / overall_best_recent pattern_counts = np.bincount(pattern_index_array[start_idx:], minlength=5).astype(np.float32) pattern_feat = pattern_counts[pattern_id_array_candidates] / overall_best_recent last_row = df.iloc[-1] slot = last_row["timeslot"] prev_d_for_next = (last_row.d1, last_row.d2, last_row.d3, last_row.d4) mats = markov_probs_dict.get(slot, [np.ones((10,10))]*4) corr = mats[0][prev_d_for_next[0], digits_array[:, 0]].copy() for p in range(1, 4): corr *= mats[p][prev_d_for_next[p], digits_array[:, p]] final_score = (best_w["freq"] * freq + best_w["gap"] * gap + best_w["corr"] * corr + best_w["sum"] * sum_feat + best_w["odd_even"] * odd_feat + best_w["pattern"] * pattern_feat) top10_idx = np.argsort(-final_score)[:10] top10_numbers = [f"{i:04d}" for i in top10_idx] top10_scores = final_score[top10_idx] # === 7. Results Display === summary_df = pd.DataFrame({ "Scenario": ["Baseline (equal w)", "Genetic Optimized"], "Top-10 Accuracy": [baseline_acc, search_results[overall_best_recent]["acc"]], "Median Rank": [baseline_med, search_results[overall_best_recent]["median"]], }) print("\n" + "="*30) print("PERFORMANCE SUMMARY") print("="*30) display(summary_df) plt.figure(figsize=(6, 4)) plt.bar(summary_df["Scenario"], summary_df["Median Rank"], color=['gray', 'green']) plt.title("Median Rank: Baseline vs Genetic GA") plt.ylabel("Median Rank (lower is better)") plt.show() print("\n" + "="*30) print("BEST HYPER-PARAMETERS (FOUND BY GA)") print("="*30) print(f"Recent window (n): {overall_best_recent}") print("Weights:", {k: round(v, 4) for k, v in best_w.items()}) print("\n" + "="*30) print("TOP-10 CANDIDATES FOR NEXT DRAW") print("="*30) for num, sc in zip(top10_numbers, top10_scores): print(f"{num} | score={sc:.4f}") except Exception as e: print(f"Terjadi kesalahan: {e}") import traceback traceback.print_exc()
Tingkatkan script ini dan buat ulang
I’ve refactored the entire notebook-style script into a clean, production-ready Python program:
python lottery_prediction_enhanced.py mydata.csv) instead of relying on Colab’s upload widget.--seed flag lets you get repeatable results.results/ folder.Feel free to try it out and let me know what tweaks or extra features you’d like next!