# ================================================...
Criado em: 1 de junho de 2026
Respondido usando GPT-5.5 Thinking por Chat01
Criado em: 1 de junho de 2026
Respondido usando GPT-5.5 Thinking por Chat01
import os
import json
import itertools
import numpy as np
import pandas as pd
try:
from google.colab import files # type: ignore
print("Upload file PRIZE_clean_long.csv...")
uploaded = files.upload()
CSV_PATH = next(iter(uploaded.keys()))
except Exception:
candidates = [
"PRIZE_clean_long.csv",
"/content/PRIZE_clean_long.csv",
"/mnt/data/PRIZE_clean_long.csv"
]
CSV_PATH = next((p for p in candidates if os.path.exists(p)), candidates[0])
TOP_K_FINAL = 5
BBFS_SIZE = 5
MIN_HISTORY = 60
TUNE_RECENT_WINDOWS = [72, 144, 288, 576]
TUNE_ALPHAS = [0.5, 1.0, 1.5, 2.0]
VALIDATION_TAIL = 420
VALIDATION_STRIDE = 2
WEIGHT_SETS = [
{
"name": "balanced",
"weights": {
"recent": 0.35,
"transition": 0.35,
"conditional": 0.20,
"global": 0.10
}
},
{
"name": "recent_strong",
"weights": {
"recent": 0.50,
"transition": 0.25,
"conditional": 0.15,
"global": 0.10
}
},
{
"name": "transition_strong",
"weights": {
"recent": 0.25,
"transition": 0.50,
"conditional": 0.15,
"global": 0.10
}
},
{
"name": "conditional_strong",
"weights": {
"recent": 0.20,
"transition": 0.25,
"conditional": 0.45,
"global": 0.10
}
},
{
"name": "recent_transition",
"weights": {
"recent": 0.45,
"transition": 0.40,
"conditional": 0.05,
"global": 0.10
}
},
{
"name": "global_safe",
"weights": {
"recent": 0.25,
"transition": 0.25,
"conditional": 0.15,
"global": 0.35
}
},
]
def load_data(path: str) -> pd.DataFrame:
df = pd.read_csv(path, dtype={"result_4d": str})
textrequired = {"datetime", "waktu", "slot_dalam_jam", "result_4d"} missing = required - set(df.columns) if missing: raise ValueError(f"Kolom wajib tidak ditemukan: {sorted(missing)}") df["datetime"] = pd.to_datetime(df["datetime"]) sort_cols = [c for c in ["datetime", "slot_dalam_jam", "source_line"] if c in df.columns] df = df.sort_values(sort_cols).reset_index(drop=True) df["result_4d"] = df["result_4d"].astype(str).str.zfill(4).str[-4:] for pos in range(4): df[f"p{pos+1}"] = df["result_4d"].str[pos].astype(int) return df
def normalize_weights(weights: dict) -> dict:
total = float(sum(weights.values()))
return {k: float(v) / total for k, v in weights.items()}
def digit_distribution(values, alpha: float) -> np.ndarray:
counts = np.ones(10, dtype=float) * alpha
vals = pd.Series(values).dropna().astype(int).to_numpy()
textif len(vals): counts += np.bincount(vals, minlength=10) return counts / counts.sum()
def prepare_pairs(df: pd.DataFrame, mode: str) -> pd.DataFrame:
pair = df[["waktu", "slot_dalam_jam", "p1", "p2", "p3", "p4"]].copy()
pair["source_idx"] = pair.index
textif mode == "global": pair["target_idx"] = pair["source_idx"] + 1 for pos in range(1, 5): pair[f"y{pos}"] = df[f"p{pos}"].shift(-1) pair = pair[pair["target_idx"] < len(df)].copy() elif mode == "same_slot": pair["target_idx"] = pair.groupby("slot_dalam_jam")["source_idx"].shift(-1) for pos in range(1, 5): pair[f"y{pos}"] = pair.groupby("slot_dalam_jam")[f"p{pos}"].shift(-1) pair = pair.dropna(subset=["target_idx", "y1", "y2", "y3", "y4"]).copy() else: raise ValueError("mode harus 'global' atau 'same_slot'") pair["target_idx"] = pair["target_idx"].astype(int) for pos in range(1, 5): pair[f"y{pos}"] = pair[f"y{pos}"].astype(int) return pair.reset_index(drop=True)
def build_next_index(df: pd.DataFrame, mode: str) -> np.ndarray:
next_idx = np.full(len(df), -1, dtype=int)
textif mode == "global": next_idx[:-1] = np.arange(1, len(df)) return next_idx if mode == "same_slot": for _, group in df.groupby("slot_dalam_jam", sort=False): idx = group.index.to_numpy() if len(idx) > 1: next_idx[idx[:-1]] = idx[1:] return next_idx raise ValueError("mode harus 'global' atau 'same_slot'")
def probabilities_for_row(
df: pd.DataFrame,
row_idx: int,
mode: str,
pair_cache: dict,
recent_window: int,
alpha: float,
weights: dict
) -> list[np.ndarray]:
textweights = normalize_weights(weights) observed = df.iloc[:row_idx + 1] current = df.iloc[row_idx] pairs = pair_cache[mode] # Pasangan yang targetnya sudah diketahui sampai row_idx. # Ini penting agar evaluasi tidak bocor masa depan. pair_avail = pairs[pairs["target_idx"] <= row_idx] cond_avail = pair_avail[ (pair_avail["waktu"] == current["waktu"]) & (pair_avail["slot_dalam_jam"] == current["slot_dalam_jam"]) ] if mode == "same_slot": recent_observed = observed[observed["slot_dalam_jam"] == current["slot_dalam_jam"]] else: recent_observed = observed probs = [] for pos in range(1, 5): # 1. Frekuensi digit terbaru p_recent = digit_distribution( recent_observed[f"p{pos}"].tail(recent_window), alpha=alpha ) # 2. Distribusi umum p_global = digit_distribution( observed[f"p{pos}"], alpha=alpha ) # 3. Markov transition digit sekarang -> digit berikutnya cur_digit = int(current[f"p{pos}"]) trans_source = pair_avail[pair_avail[f"p{pos}"] == cur_digit] if mode == "same_slot": trans_source = trans_source[ trans_source["slot_dalam_jam"] == current["slot_dalam_jam"] ] p_transition = digit_distribution( trans_source[f"y{pos}"], alpha=alpha * 0.7 ) # 4. Kondisional waktu + slot p_conditional = digit_distribution( cond_avail[f"y{pos}"], alpha=alpha * 0.7 ) combined = ( weights["recent"] * p_recent + weights["transition"] * p_transition + weights["conditional"] * p_conditional + weights["global"] * p_global ) combined = combined / combined.sum() probs.append(combined) return probs
def top_combinations(probs: list[np.ndarray], k: int = 5) -> pd.DataFrame:
k = min(k, 10000)
textscore_grid = ( probs[0][:, None, None, None] * probs[1][None, :, None, None] * probs[2][None, None, :, None] * probs[3][None, None, None, :] ) flat = score_grid.reshape(-1) idx = np.argpartition(flat, -k)[-k:] idx = idx[np.argsort(flat[idx])[::-1]] rows = [] for rank, i in enumerate(idx, start=1): num = f"{i:04d}" rows.append({ "rank": rank, "prediksi_4d": num, "score": float(flat[i]) }) return pd.DataFrame(rows)
def best_bbfs_digits(probs: list[np.ndarray], size: int = 5):
"""
BBFS 5D di sini berarti 5 digit basis terbaik.
Skor = peluang semua posisi 4D masuk ke dalam 5 digit tersebut.
"""
best_digits = None
best_score = -1.0
textfor comb in itertools.combinations(range(10), size): idx = list(comb) score = 1.0 for p in probs: score *= float(p[idx].sum()) if score > best_score: best_score = score best_digits = comb return "".join(map(str, best_digits)), best_score
def top_combinations_from_bbfs(
probs: list[np.ndarray],
bbfs_digits: str,
k: int = 5
) -> pd.DataFrame:
digits = [int(x) for x in bbfs_digits]
textrows = [] for d1, d2, d3, d4 in itertools.product(digits, repeat=4): score = ( probs[0][d1] * probs[1][d2] * probs[2][d3] * probs[3][d4] ) rows.append({ "prediksi_4d": f"{d1}{d2}{d3}{d4}", "score": float(score) }) out = pd.DataFrame(rows) out = out.sort_values("score", ascending=False).head(k).reset_index(drop=True) out.insert(0, "rank", np.arange(1, len(out) + 1)) return out
def walk_forward_evaluation(
df: pd.DataFrame,
mode: str,
pair_cache: dict,
next_index_cache: dict,
recent_window: int,
alpha: float,
weights: dict,
top_list=(1, 5, 10, 20),
validation_tail: int = VALIDATION_TAIL,
validation_stride: int = VALIDATION_STRIDE
) -> dict:
textstart = max(MIN_HISTORY, len(df) - validation_tail) hits = {k: 0 for k in top_list} pos_hits = np.zeros(4, dtype=float) bbfs5_cover = 0 n = 0 next_idx = next_index_cache[mode] for row_idx in range(start, len(df), validation_stride): target_idx = int(next_idx[row_idx]) if target_idx < 0 or target_idx >= len(df): continue truth = df.loc[target_idx, "result_4d"] probs = probabilities_for_row( df=df, row_idx=row_idx, mode=mode, pair_cache=pair_cache, recent_window=recent_window, alpha=alpha, weights=weights ) preds = top_combinations(probs, k=max(top_list))["prediksi_4d"].tolist() for k in top_list: hits[k] += int(truth in preds[:k]) top1 = preds[0] for pos in range(4): pos_hits[pos] += int(top1[pos] == truth[pos]) bbfs_digits, _ = best_bbfs_digits(probs, size=BBFS_SIZE) bbfs5_cover += int(all(ch in bbfs_digits for ch in truth)) n += 1 if n == 0: return { "mode": mode, "n_test": 0, "error": "Tidak ada baris evaluasi." } result = { "mode": mode, "n_test": n, "recent_window": recent_window, "alpha": alpha, "weight_name": None, "weights": json.dumps(normalize_weights(weights)), "bbfs_size": BBFS_SIZE, "bbfs5_cover": bbfs5_cover / n, "digit_acc_avg": float((pos_hits / n).mean()), "digit_acc_p1": float(pos_hits[0] / n), "digit_acc_p2": float(pos_hits[1] / n), "digit_acc_p3": float(pos_hits[2] / n), "digit_acc_p4": float(pos_hits[3] / n), } for k in top_list: result[f"top_{k}"] = hits[k] / n return result
def auto_tune_mode(
df: pd.DataFrame,
mode: str,
pair_cache: dict,
next_index_cache: dict
):
records = []
configs = []
textprint("\n" + "=" * 70) print(f"Auto-tuning mode: {mode}") config_id = 0 for recent_window in TUNE_RECENT_WINDOWS: for alpha in TUNE_ALPHAS: for wset in WEIGHT_SETS: weights = wset["weights"] res = walk_forward_evaluation( df=df, mode=mode, pair_cache=pair_cache, next_index_cache=next_index_cache, recent_window=recent_window, alpha=alpha, weights=weights, top_list=(1, 5, 10, 20), validation_tail=VALIDATION_TAIL, validation_stride=VALIDATION_STRIDE ) if "error" in res: continue res["config_id"] = config_id res["weight_name"] = wset["name"] # Skor pemilihan: # Prioritas utama Top 5 exact hit, # lalu Top 10, BBFS cover, dan akurasi digit. res["selection_score"] = ( res["top_5"] * 1000 + res["top_10"] * 100 + res["top_20"] * 10 + res["bbfs5_cover"] * 2 + res["digit_acc_avg"] ) records.append(res) configs.append({ "config_id": config_id, "mode": mode, "recent_window": recent_window, "alpha": alpha, "weight_name": wset["name"], "weights": weights }) config_id += 1 summary = pd.DataFrame(records) if summary.empty: raise RuntimeError(f"Tuning gagal untuk mode {mode}: tidak ada hasil evaluasi.") summary = summary.sort_values( ["selection_score", "top_5", "top_10", "bbfs5_cover", "digit_acc_avg"], ascending=False ).reset_index(drop=True) best_config_id = int(summary.iloc[0]["config_id"]) best_config = next(c for c in configs if c["config_id"] == best_config_id) print("\nBest config:") print(summary.head(5)[[ "mode", "n_test", "recent_window", "alpha", "weight_name", "top_1", "top_5", "top_10", "top_20", "bbfs5_cover", "digit_acc_avg", "selection_score" ]]) return summary, best_config
def make_prediction_package(
df: pd.DataFrame,
row_idx: int,
mode: str,
config: dict,
pair_cache: dict,
label: str
):
probs = probabilities_for_row(
df=df,
row_idx=row_idx,
mode=mode,
pair_cache=pair_cache,
recent_window=config["recent_window"],
alpha=config["alpha"],
weights=config["weights"]
)
texttop5 = top_combinations(probs, k=TOP_K_FINAL) top5.insert(0, "label", label) top5.insert(1, "mode", mode) top5.insert(2, "recent_window", config["recent_window"]) top5.insert(3, "alpha", config["alpha"]) top5.insert(4, "weight_name", config["weight_name"]) bbfs_digits, bbfs_score = best_bbfs_digits(probs, size=BBFS_SIZE) bbfs_top5 = top_combinations_from_bbfs( probs=probs, bbfs_digits=bbfs_digits, k=TOP_K_FINAL ) bbfs_top5.insert(0, "label", label) bbfs_top5.insert(1, "mode", mode) bbfs_top5.insert(2, "bbfs_5d", bbfs_digits) bbfs_top5.insert(3, "bbfs_score", float(bbfs_score)) bbfs_top5.insert(4, "recent_window", config["recent_window"]) bbfs_top5.insert(5, "alpha", config["alpha"]) bbfs_top5.insert(6, "weight_name", config["weight_name"]) digit_rows = [] for pos, p in enumerate(probs, start=1): ranked = sorted( [(d, float(p[d])) for d in range(10)], key=lambda x: x[1], reverse=True ) for rank, (digit, prob) in enumerate(ranked, start=1): digit_rows.append({ "label": label, "mode": mode, "posisi": pos, "rank": rank, "digit": digit, "probability": prob }) digit_probs = pd.DataFrame(digit_rows) return top5, bbfs_top5, digit_probs
df = load_data(CSV_PATH)
print(f"Data terbaca: {len(df):,} baris")
print("Rentang waktu:", df["datetime"].min(), "sampai", df["datetime"].max())
print("\nBaris terakhir:")
print(df.tail(1)[["datetime", "waktu", "slot_dalam_jam", "result_4d"]])
pair_cache = {
"global": prepare_pairs(df, "global"),
"same_slot": prepare_pairs(df, "same_slot")
}
next_index_cache = {
"global": build_next_index(df, "global"),
"same_slot": build_next_index(df, "same_slot")
}
summary_global, best_global = auto_tune_mode(
df=df,
mode="global",
pair_cache=pair_cache,
next_index_cache=next_index_cache
)
summary_same_slot, best_same_slot = auto_tune_mode(
df=df,
mode="same_slot",
pair_cache=pair_cache,
next_index_cache=next_index_cache
)
summary_all = pd.concat([summary_global, summary_same_slot], ignore_index=True)
summary_all = summary_all.sort_values(
["selection_score", "top_5", "top_10", "bbfs5_cover", "digit_acc_avg"],
ascending=False
).reset_index(drop=True)
summary_all.to_csv("tuning_summary_top5_bbfs5d.csv", index=False)
print("\n" + "=" * 70)
print("Ringkasan tuning terbaik semua mode:")
print(summary_all.head(10)[[
"mode",
"n_test",
"recent_window",
"alpha",
"weight_name",
"top_1",
"top_5",
"top_10",
"top_20",
"bbfs5_cover",
"digit_acc_avg",
"selection_score"
]])
print("\n" + "=" * 70)
print("Evaluasi final config terbaik, stride 1:")
final_eval_global = walk_forward_evaluation(
df=df,
mode="global",
pair_cache=pair_cache,
next_index_cache=next_index_cache,
recent_window=best_global["recent_window"],
alpha=best_global["alpha"],
weights=best_global["weights"],
top_list=(1, 5, 10, 20),
validation_tail=VALIDATION_TAIL,
validation_stride=1
)
final_eval_same_slot = walk_forward_evaluation(
df=df,
mode="same_slot",
pair_cache=pair_cache,
next_index_cache=next_index_cache,
recent_window=best_same_slot["recent_window"],
alpha=best_same_slot["alpha"],
weights=best_same_slot["weights"],
top_list=(1, 5, 10, 20),
validation_tail=VALIDATION_TAIL,
validation_stride=1
)
print("\nGLOBAL:")
print(final_eval_global)
print("\nSAME_SLOT:")
print(final_eval_same_slot)
pd.DataFrame([final_eval_global, final_eval_same_slot]).to_csv(
"final_evaluation_top5_bbfs5d.csv",
index=False
)
last_idx = len(df) - 1
print("\n" + "=" * 70)
print("Prediksi TOP 5 GLOBAL setelah baris terakhir")
print("Best global config:", best_global)
top5_global, bbfs_global, digit_probs_global = make_prediction_package(
df=df,
row_idx=last_idx,
mode="global",
config=best_global,
pair_cache=pair_cache,
label="next_global_after_last_row"
)
print("\nTOP 5 GLOBAL:")
print(top5_global)
print("\nBBFS 5D GLOBAL:")
print(bbfs_global)
top5_global.to_csv("prediksi_top5_global.csv", index=False)
bbfs_global.to_csv("bbfs5d_global.csv", index=False)
digit_probs_global.to_csv("digit_probabilities_global.csv", index=False)
print("\n" + "=" * 70)
print("Prediksi TOP 5 SAME_SLOT untuk slot-slot pada datetime terakhir")
print("Best same_slot config:", best_same_slot)
last_datetime = df["datetime"].max()
last_rows = df[df["datetime"] == last_datetime].copy()
all_top5_slot = []
all_bbfs_slot = []
all_digit_probs_slot = []
for idx, row in last_rows.iterrows():
label = (
f"next_same_slot_slot_{row['slot_dalam_jam']}"
f"after{row['datetime']}"
)
textprint("\n" + "-" * 70) print( "Slot:", row["slot_dalam_jam"], "| waktu:", row["waktu"], "| result terakhir:", row["result_4d"] ) top5_slot, bbfs_slot, digit_probs_slot = make_prediction_package( df=df, row_idx=int(idx), mode="same_slot", config=best_same_slot, pair_cache=pair_cache, label=label ) top5_slot.insert(0, "slot_dalam_jam", row["slot_dalam_jam"]) top5_slot.insert(1, "last_result_4d", row["result_4d"]) bbfs_slot.insert(0, "slot_dalam_jam", row["slot_dalam_jam"]) bbfs_slot.insert(1, "last_result_4d", row["result_4d"]) digit_probs_slot.insert(0, "slot_dalam_jam", row["slot_dalam_jam"]) digit_probs_slot.insert(1, "last_result_4d", row["result_4d"]) print("\nTOP 5 SAME_SLOT:") print(top5_slot) print("\nBBFS 5D SAME_SLOT:") print(bbfs_slot) all_top5_slot.append(top5_slot) all_bbfs_slot.append(bbfs_slot) all_digit_probs_slot.append(digit_probs_slot)
pred_top5_same_slot = pd.concat(all_top5_slot, ignore_index=True)
pred_bbfs_same_slot = pd.concat(all_bbfs_slot, ignore_index=True)
pred_digit_probs_same_slot = pd.concat(all_digit_probs_slot, ignore_index=True)
pred_top5_same_slot.to_csv("prediksi_top5_same_slot.csv", index=False)
pred_bbfs_same_slot.to_csv("bbfs5d_same_slot.csv", index=False)
pred_digit_probs_same_slot.to_csv("digit_probabilities_same_slot.csv", index=False)
print("\n" + "=" * 70)
print("File output dibuat:")
print("- tuning_summary_top5_bbfs5d.csv")
print("- final_evaluation_top5_bbfs5d.csv")
print("- prediksi_top5_global.csv")
print("- bbfs5d_global.csv")
print("- digit_probabilities_global.csv")
print("- prediksi_top5_same_slot.csv")
print("- bbfs5d_same_slot.csv")
print("- digit_probabilities_same_slot.csv")
print("\nCatatan:")
print("1. TOP 5 = 5 angka 4D dengan skor probabilitas tertinggi.")
print("2. BBFS 5D = 5 digit basis terbaik, lalu script tampilkan Top 5 kombinasi 4D dari digit tersebut.")
print("3. Gunakan hasil sebagai ranking statistik, bukan kepastian.") Fokus pada same slot untuk bbfs 5D dan 2D terkuat
Saya sudah fokuskan ke SAME_SLOT saja dan menghitung:
p1p2, p1p3, p1p4, p2p3, p2p4, p3p42026-06-01 06:30recent_window=288, alpha=0.5, weight_name=transition_strong
Bobot:
textrecent 0.25 transition 0.50 conditional 0.15 global 0.10
Catatan evaluasi: exact Top 5 historis masih 0% pada final stride 1, dan BBFS cover sekitar 5.995%, mendekati baseline acak BBFS 5 digit yaitu 6.25%. Jadi ini lebih cocok dipakai sebagai ranking statistik, bukan sinyal pasti.
| Slot | Last result | BBFS 5D | Top 5 4D dari BBFS | 2D terkuat overall |
|---|---|---|---|---|
| 1 | 8553 | 01489 | 8014, 9014, 4014, 0014, 8011 | p3p4:14, p2p3:21, p2p3:01, p3p4:11, p2p4:24 |
| 2 | 0197 | 13568 | 1518, 6518, 8518, 3518, 5518 | p2p3:51, p1p2:45, p2p4:52, p1p3:41, p3p4:12 |
| 3 | 1070 | 01457 | 1744, 1741, 1444, 1441, 7744 | p1p3:14, p1p4:14, p1p2:17, p1p4:11, p3p4:44 |
File hasil:
Prediksi untuk hasil selanjutnya dengan analisis di atas
Saya pakai config SAME_SLOT sebelumnya: recent_window=288, alpha=0.5, bobot transition_strong. Data terbaru berhenti di 02 Juni 2026 06:30 = 7738, jadi target berikutnya saya anggap 02 Juni 2026 11:30 untuk Prize 2 / same slot. (PRIZE_2_ONLY.txt)
BBFS 5D: 02357
Top 5 4D dari BBFS:
| Rank | 4D |
|---|---|
| 1 | 5302 |
| 2 | 5303 |
| 3 | 5322 |
| 4 | 5323 |
| 5 | 5307 |
Cadangan kuat dari BBFS: 5702, 5703, 5502, 5327, 5722
| Rank | Posisi | 2D |
|---|---|---|
| 1 | p1p2 / depan | 53 |
| 2 | p1p3 / silang | 50 |
| 3 | p1p3 / silang | 52 |
| 4 | p1p2 / depan | 57 |
| 5 | p1p2 / depan | 55 |
| 6 | p2p3 / tengah | 30 |
| 7 | p1p2 / depan | 33 |
| 8 | p1p4 / silang | 52 |
| 9 | p1p4 / silang | 53 |
| 10 | p1p3 / silang | 30 |
| Posisi | Digit kuat |
|---|---|
| p1 | 5, 3, 8, 6, 4 |
| p2 | 3, 7, 5, 2, 0 |
| p3 | 0, 9, 2, 7, 8 |
| p4 | 9, 2, 3, 7, 6 |
Kesimpulan singkat: arah utama saya ambil 5302 / 5303 / 5322 / 5323 / 5307, dengan BBFS 02357 dan 2D paling kuat 53, 50, 52, 57, 55. Tetap anggap ini ranking statistik historis, bukan kepastian hasil.