Prediksi untuk hasil selanjutnya yang akurat dan b...

创建于:2026年6月3日

使用 GPT-5.5 Thinking 回答 Chat01

问题

prize2_features.csv

Prediksi untuk hasil selanjutnya yang akurat dan buatkan scriptnya untuk di jalankan di google colab

Python

python
import pandas as pd, os, json, textwrap, numpy as np path="/mnt/data/prize2_features.csv" df=pd.read_csv(path) df.shape, df.head(), df.tail(), df.columns.tolist(), df.dtypes ((668, 85), datetime result ... target_d3_puluhan target_d4_satuan 0 2026-02-11 03:30:00 2992 ... 9 2 1 2026-02-11 06:30:00 9443 ... 4 3 2 2026-02-11 11:30:00 8459 ... 5 9 3 2026-02-11 13:30:00 6407 ... 0 7 4 2026-02-11 17:30:00 3041 ... 4 1 [5 rows x 85 columns], datetime result ... target_d3_puluhan target_d4_satuan 663 2026-06-02 13:30:00 685 ... 8 5 664 2026-06-02 17:30:00 7146 ... 4 6 665 2026-06-02 21:30:00 3942 ... 4 2 666 2026-06-03 03:30:00 3020 ... 2 0 667 2026-06-03 06:30:00 4880 ... 8 0 [5 rows x 85 columns], ['datetime', 'result', 'idx', 'hour', 'minute', 'day', 'month', 'dayofweek', 'draw_slot', 'lag1_d1_ribuan', 'lag1_d2_ratusan', 'lag1_d3_puluhan', 'lag1_d4_satuan', 'lag1_digit_sum', 'lag1_odd_count', 'lag1_unique_digit_count', 'lag1_missing_count', 'lag2_d1_ribuan', 'lag2_d2_ratusan', 'lag2_d3_puluhan', 'lag2_d4_satuan', 'lag2_digit_sum', 'lag2_odd_count', 'lag2_unique_digit_count', 'lag2_missing_count', 'lag3_d1_ribuan', 'lag3_d2_ratusan', 'lag3_d3_puluhan', 'lag3_d4_satuan', 'lag3_digit_sum', 'lag3_odd_count', 'lag3_unique_digit_count', 'lag3_missing_count', 'lag5_d1_ribuan', 'lag5_d2_ratusan', 'lag5_d3_puluhan', 'lag5_d4_satuan', 'lag5_digit_sum', 'lag5_odd_count', 'lag5_unique_digit_count', 'lag5_missing_count', 'lag10_d1_ribuan', 'lag10_d2_ratusan', 'lag10_d3_puluhan', 'lag10_d4_satuan', 'lag10_digit_sum', 'lag10_odd_count', 'lag10_unique_digit_count', 'lag10_missing_count', 'roll5_d1_ribuan_mean', 'roll5_d1_ribuan_std', 'roll5_d2_ratusan_mean', 'roll5_d2_ratusan_std', 'roll5_d3_puluhan_mean', 'roll5_d3_puluhan_std', 'roll5_d4_satuan_mean', 'roll5_d4_satuan_std', 'roll10_d1_ribuan_mean', 'roll10_d1_ribuan_std', 'roll10_d2_ratusan_mean', 'roll10_d2_ratusan_std', 'roll10_d3_puluhan_mean', 'roll10_d3_puluhan_std', 'roll10_d4_satuan_mean', 'roll10_d4_satuan_std', 'roll20_d1_ribuan_mean', 'roll20_d1_ribuan_std', 'roll20_d2_ratusan_mean', 'roll20_d2_ratusan_std', 'roll20_d3_puluhan_mean', 'roll20_d3_puluhan_std', 'roll20_d4_satuan_mean', 'roll20_d4_satuan_std', 'roll50_d1_ribuan_mean', 'roll50_d1_ribuan_std', 'roll50_d2_ratusan_mean', 'roll50_d2_ratusan_std', 'roll50_d3_puluhan_mean', 'roll50_d3_puluhan_std', 'roll50_d4_satuan_mean', 'roll50_d4_satuan_std', 'target_d1_ribuan', 'target_d2_ratusan', 'target_d3_puluhan', 'target_d4_satuan'], datetime object result int64 idx int64 hour float64 minute float64 ... roll50_d4_satuan_std float64 target_d1_ribuan int64 target_d2_ratusan int64 target_d3_puluhan int64 target_d4_satuan int64 Length: 85, dtype: object)

Python

python
# inspect draw slot mapping and dtypes df['datetime_dt']=pd.to_datetime(df['datetime']) print(df[['datetime','idx','hour','minute','draw_slot','result','target_d1_ribuan','target_d2_ratusan','target_d3_puluhan','target_d4_satuan']].tail(12).to_string()) print(df.groupby(['hour','minute','draw_slot']).size()) print(df['datetime_dt'].diff().value_counts().head(20)) print(df[['datetime_dt','draw_slot']].tail(20)) print(df.isna().sum().sort_values(ascending=False).head(10)) print(df[['idx']].agg(['min','max']))

Python

python
for col in ['roll5_d1_ribuan_mean','roll5_d1_ribuan_std','roll10_d1_ribuan_mean','lag1_d1_ribuan','lag5_d1_ribuan']: print(col, df.loc[667,col]) # previous target d1 values targets = df[['target_d1_ribuan','target_d2_ratusan','target_d3_puluhan','target_d4_satuan']] print("last 7 d1:", targets['target_d1_ribuan'].iloc[660:668].tolist()) print("prev5 before row667 d1", targets['target_d1_ribuan'].iloc[662:667].tolist(), np.mean(targets['target_d1_ribuan'].iloc[662:667]), np.std(targets['target_d1_ribuan'].iloc[662:667], ddof=1)) print("including row667 last5", targets['target_d1_ribuan'].iloc[663:668].tolist(), np.mean(targets['target_d1_ribuan'].iloc[663:668]), np.std(targets['target_d1_ribuan'].iloc[663:668], ddof=1)) print(df[['datetime','target_d1_ribuan','lag1_d1_ribuan','lag2_d1_ribuan','lag3_d1_ribuan','lag5_d1_ribuan','roll5_d1_ribuan_mean','roll5_d1_ribuan_std']].tail(8).to_string())

Python

python
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, GradientBoostingClassifier, HistGradientBoostingClassifier from sklearn.linear_model import LogisticRegression from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler from sklearn.metrics import accuracy_score from sklearn.impute import SimpleImputer from sklearn.model_selection import TimeSeriesSplit from sklearn.base import clone import numpy as np, pandas as pd, warnings, math, os, textwrap, json warnings.filterwarnings("ignore") target_cols = ['target_d1_ribuan','target_d2_ratusan','target_d3_puluhan','target_d4_satuan'] drop_cols = ['datetime','datetime_dt','result'] + target_cols feature_cols = [c for c in df.columns if c not in drop_cols] X = df[feature_cols].astype(float) Y = df[target_cols].astype(int) n = len(df) split = int(n*0.8) X_train, X_test = X.iloc[:split], X.iloc[split:] Y_train, Y_test = Y.iloc[:split], Y.iloc[split:] models = { "ExtraTrees": ExtraTreesClassifier(n_estimators=500, random_state=42, min_samples_leaf=2, max_features='sqrt', class_weight='balanced'), "RandomForest": RandomForestClassifier(n_estimators=500, random_state=42, min_samples_leaf=2, max_features='sqrt', class_weight='balanced'), "HistGB": HistGradientBoostingClassifier(random_state=42, max_iter=150, learning_rate=0.05, l2_regularization=0.1), "GradBoost": GradientBoostingClassifier(random_state=42, n_estimators=150, learning_rate=0.05, max_depth=2), "LogReg": make_pipeline(StandardScaler(), LogisticRegression(max_iter=1000, C=0.3, multi_class='auto')) } results=[] preds_by_model={} probas_by_model={} for name, base in models.items(): digit_preds=[] digit_probas=[] accs=[] for col in target_cols: m=clone(base) m.fit(X_train, Y_train[col]) pred=m.predict(X_test) acc=accuracy_score(Y_test[col], pred) accs.append(acc) digit_preds.append(pred) if hasattr(m, "predict_proba"): proba = m.predict_proba(X_test) # align classes 0-9 full = np.zeros((len(X_test),10)) for j,cl in enumerate(m.classes_): full[:, int(cl)] = proba[:,j] digit_probas.append(full) elif hasattr(m[-1], "predict_proba"): # not needed pass pred_arr=np.vstack(digit_preds).T exact=(pred_arr == Y_test.values).all(axis=1).mean() results.append([name, np.mean(accs), exact] + accs) preds_by_model[name]=pred_arr probas_by_model[name]=digit_probas res=pd.DataFrame(results, columns=["model","avg_digit_acc","exact4_acc"]+target_cols).sort_values("avg_digit_acc", ascending=False) res

Python

python
import pandas as pd, numpy as np, warnings, os, math, json, textwrap from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, GradientBoostingClassifier, HistGradientBoostingClassifier from sklearn.linear_model import LogisticRegression from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler from sklearn.metrics import accuracy_score from sklearn.base import clone warnings.filterwarnings("ignore") path="/mnt/data/prize2_features.csv" df=pd.read_csv(path) df['datetime_dt']=pd.to_datetime(df['datetime']) target_cols = ['target_d1_ribuan','target_d2_ratusan','target_d3_puluhan','target_d4_satuan'] drop_cols

Python

python
import pandas as pd, numpy as np, warnings, os, math, json, textwrap from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, GradientBoostingClassifier from sklearn.linear_model import LogisticRegression from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler from sklearn.metrics import accuracy_score from sklearn.base import clone warnings.filterwarnings("ignore") path="/mnt/data/prize2_features.csv" df=pd.read_csv(path) df['datetime_dt']=pd.to_datetime(df['datetime']) target_cols = ['target_d1_ribuan','target_d2_ratusan','target_d3_puluhan','target_d4_satuan'] drop_cols = ['datetime','datetime_dt','result'] + target_cols feature_cols = [c for c in df.columns if c not in drop_cols] X = df[feature_cols].astype(float) Y = df[target_cols].astype(int) n = len(df) split = int(n*0.8) X_train, X_test = X.iloc[:split], X.iloc[split:] Y_train, Y_test = Y.iloc[:split], Y.iloc[split:] models = { "ExtraTrees": ExtraTreesClassifier(n_estimators=120, random_state=42, min_samples_leaf=2, max_features='sqrt', class_weight='balanced', n_jobs=-1), "RandomForest": RandomForestClassifier(n_estimators=120, random_state=42, min_samples_leaf=2, max_features='sqrt', class_weight='balanced', n_jobs=-1), "GradBoost": GradientBoostingClassifier(random_state=42, n_estimators=60, learning_rate=0.05, max_depth=2), "LogReg": make_pipeline(StandardScaler(), LogisticRegression(max_iter=500, C=0.3)) } results=[] for name, base in models.items(): digit_preds=[] accs=[] for col in target_cols: m=clone(base) m.fit(X_train, Y_train[col]) pred=m.predict(X_test) accs.append(accuracy_score(Y_test[col], pred)) digit_preds.append(pred) pred_arr=np.vstack(digit_preds).T exact=(pred_arr == Y_test.values).all(axis=1).mean() results.append([name, np.mean(accs), exact] + accs) res=pd.DataFrame(results, columns=["model","avg_digit_acc","exact4_acc"]+target_cols).sort_values("avg_digit_acc", ascending=False) print(res.to_string(index=False)) print("test size", len(X_test), "split idx", split)

Python

python
import pandas as pd, numpy as np, warnings, math, itertools, os from sklearn.ensemble import ExtraTreesClassifier, RandomForestClassifier, GradientBoostingClassifier from sklearn.linear_model import LogisticRegression from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler from sklearn.base import clone from sklearn.metrics import accuracy_score warnings.filterwarnings("ignore") path="/mnt/data/prize2_features.csv" df=pd.read_csv(path) df['datetime_dt']=pd.to_datetime(df['datetime']) target_cols = ['target_d1_ribuan','target_d2_ratusan','target_d3_puluhan','target_d4_satuan'] drop_cols = ['datetime','datetime_dt','result'] + target_cols feature

思考

Saya sudah membaca datanya: ada 668 baris, dari 2026-02-11 sampai 2026-06-03 06:30, dengan target 4 digit. Dari backtest awal, model belum menunjukkan sinyal kuat; akurasi per digit hanya sekitar 10–11%, jadi hasil harus dianggap kandidat probabilistik, bukan kepastian.

Thought for 4m 3s

Python

python
import json, textwrap, os, pathlib script = r''' # ============================================================ # Prediksi hasil Prize2 berikutnya dari prize2_features.csv # Cocok dijalankan di Google Colab # ============================================================ # 1) Upload file CSV # Jalankan cell ini di Colab, lalu pilih file: prize2_features.csv try: from google.colab import files uploaded = files.upload() CSV_PATH = list(uploaded.keys())[0] except Exception: # Untuk lokal/Jupyter biasa CSV_PATH = "prize2_features.csv" # 2) Import library import warnings warnings.filterwarnings("ignore") import itertools import numpy as np import pandas as pd from sklearn.base import clone from sklearn.metrics import accuracy_score from sklearn.ensemble import ExtraTreesClassifier, RandomForestClassifier, GradientBoostingClassifier from sklearn.linear_model import LogisticRegression from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler # 3) Load data df = pd.read_csv(CSV_PATH) df["datetime"] = pd.to_datetime(df["datetime"]) df = df.sort_values("datetime").reset_index(drop=True) target_cols = [ "target_d1_ribuan", "target_d2_ratusan", "target_d3_puluhan", "target_d4_satuan", ] required = ["datetime", "result", "idx"] + target_cols missing = [c for c in required if c not in df.columns] if missing: raise ValueError(f"Kolom wajib tidak ditemukan: {missing}") # Pisahkan fitur dan target. # Hindari leakage: result dan kolom target tidak dipakai sebagai fitur. drop_cols = ["datetime", "result"] + target_cols feature_cols = [c for c in df.columns if c not in drop_cols] X = df[feature_cols].astype(float) Y = df[target_cols].astype(int) print("Jumlah data:", len(df)) print("Periode data:", df["datetime"].min(), "sampai", df["datetime"].max()) print("Hasil terakhir:", f"{int(df['result'].iloc[-1]):04d}", "pada", df["datetime"].iloc[-1]) # 4) Model kandidat models = { "ExtraTrees": ExtraTreesClassifier( n_estimators=200, random_state=42, min_samples_leaf=2, max_features="sqrt", class_weight="balanced", n_jobs=-1, ), "RandomForest": RandomForestClassifier( n_estimators=200, random_state=42, min_samples_leaf=2, max_features="sqrt", class_weight="balanced", n_jobs=-1, ), "GradBoost": GradientBoostingClassifier( random_state=42, n_estimators=80, learning_rate=0.05, max_depth=2, ), "LogReg": make_pipeline( StandardScaler(), LogisticRegression(max_iter=500, C=0.3), ), } # 5) Backtest sederhana: train 80% awal, test 20% akhir split = int(len(df) * 0.8) X_train, X_test = X.iloc[:split], X.iloc[split:] Y_train, Y_test = Y.iloc[:split], Y.iloc[split:] rows = [] for model_name, base_model in models.items(): digit_preds = [] digit_accs = [] for col in target_cols: model = clone(base_model) model.fit(X_train, Y_train[col]) pred = model.predict(X_test) digit_preds.append(pred) digit_accs.append(accuracy_score(Y_test[col], pred)) pred_arr = np.vstack(digit_preds).T exact4_acc = (pred_arr == Y_test.values).all(axis=1).mean() rows.append({ "model": model_name, "avg_digit_acc": np.mean(digit_accs), "exact_4_digit_acc": exact4_acc, "acc_d1": digit_accs[0], "acc_d2": digit_accs[1], "acc_d3": digit_accs[2], "acc_d4": digit_accs[3], }) backtest = pd.DataFrame(rows).sort_values("avg_digit_acc", ascending=False) print("\nBacktest 20% data terakhir:") display(backtest) # Catatan: # Untuk angka 0-9, tebakan acak per digit kira-kira 10%. # Jika avg_digit_acc hanya sekitar 10%-12%, sinyal historisnya lemah. # 6) Buat fitur untuk jadwal draw berikutnya slot_times = [(3,30), (6,30), (11,30), (13,30), (17,30), (21,30)] def infer_next_time(last_dt): """Ambil jadwal berikutnya dari pola 03:30, 06:30, 11:30, 13:30, 17:30, 21:30.""" last_dt = pd.Timestamp(last_dt) for h, m in slot_times: candidate = last_dt.normalize() + pd.Timedelta(hours=h, minutes=m) if candidate > last_dt: return candidate h, m = slot_times[0] return last_dt.normalize() + pd.Timedelta(days=1, hours=h, minutes=m) def build_next_feature_row(history_df, feature_cols): hist = history_df.copy() hist["datetime"] = pd.to_datetime(hist["datetime"]) hist = hist.sort_values("datetime").reset_index(drop=True) next_dt = infer_next_time(hist["datetime"].iloc[-1]) row = {} # Fitur waktu row["idx"] = float(hist["idx"].max() + 1) if "idx" in hist.columns else float(len(hist)) row["hour"] = float(next_dt.hour) row["minute"] = float(next_dt.minute) row["day"] = float(next_dt.day) row["month"] = float(next_dt.month) row["dayofweek"] = float(next_dt.dayofweek) slot_map = {t: i for i, t in enumerate(slot_times)} row["draw_slot"] = float(slot_map[(next_dt.hour, next_dt.minute)]) # Fitur lag digit for lag in [1, 2, 3, 5, 10]: vals = hist[target_cols].iloc[-lag].astype(int).to_dict() digits = [ vals["target_d1_ribuan"], vals["target_d2_ratusan"], vals["target_d3_puluhan"], vals["target_d4_satuan"], ] row[f"lag{lag}_d1_ribuan"] = float(digits[0]) row[f"lag{lag}_d2_ratusan"] = float(digits[1]) row[f"lag{lag}_d3_puluhan"] = float(digits[2]) row[f"lag{lag}_d4_satuan"] = float(digits[3]) row[f"lag{lag}_digit_sum"] = float(sum(digits)) row[f"lag{lag}_odd_count"] = float(sum(d % 2 == 1 for d in digits)) row[f"lag{lag}_unique_digit_count"] = float(len(set(digits))) row[f"lag{lag}_missing_count"] = float(10 - len(set(digits))) # Fitur rolling mean/std for w in [5, 10, 20, 50]: part = hist[target_cols].tail(w) for col, short in zip(target_cols, ["d1_ribuan", "d2_ratusan", "d3_puluhan", "d4_satuan"]): row[f"roll{w}_{short}_mean"] = float(part[col].mean()) row[f"roll{w}_{short}_std"] = float(part[col].std(ddof=1)) next_X = pd.DataFrame([row]) # Samakan kolom dengan data training. # Jika ada fitur tambahan yang belum dihitung, isi median training agar script tetap jalan. medians = X.median(numeric_only=True) for c in feature_cols: if c not in next_X.columns: next_X[c] = medians.get(c, 0) next_X = next_X[feature_cols].astype(float) next_X = next_X.fillna(medians) return next_dt, next_X next_dt, X_next = build_next_feature_row(df, feature_cols) # 7) Train ulang semua model pada seluruh data, lalu prediksi draw berikutnya per_digit_probs = [] model_vote_table = [] for col in target_cols: probs = [] votes = {"posisi": col} for model_name, base_model in models.items(): model = clone(base_model) model.fit(X, Y[col]) pred_digit = int(model.predict(X_next)[0]) votes[model_name] = pred_digit # Ambil probabilitas 0-9 proba = model.predict_proba(X_next)[0] full = np.zeros(10) for idx_class, class_label in enumerate(model.classes_): full[int(class_label)] = proba[idx_class] probs.append(full) avg_prob = np.mean(probs, axis=0) avg_prob = avg_prob / avg_prob.sum() per_digit_probs.append(avg_prob) model_vote_table.append(votes) votes_df = pd.DataFrame(model_vote_table) print("\nVote prediksi per posisi dari tiap model:") display(votes_df) # 8) Tampilkan digit teratas per posisi top_digits = [] for col, p in zip(target_cols, per_digit_probs): order = np.argsort(p)[::-1] top = [(int(d), float(p[d])) for d in order[:5]] top_digits.append(top) top_digit_df = pd.DataFrame({ "posisi": target_cols, "top_1": [f"{x[0][0]} ({x[0][1]:.2%})" for x in top_digits], "top_2": [f"{x[1][0]} ({x[1][1]:.2%})" for x in top_digits], "top_3": [f"{x[2][0]} ({x[2][1]:.2%})" for x in top_digits], "top_4": [f"{x[3][0]} ({x[3][1]:.2%})" for x in top_digits], "top_5": [f"{x[4][0]} ({x[4][1]:.2%})" for x in top_digits], }) print("\nTop digit per posisi:") display(top_digit_df) # 9) Kombinasi kandidat 4D teratas TOP_N_PER_DIGIT = 5 candidates = [] for digits in itertools.product(*[[d for d, prob in td[:TOP_N_PER_DIGIT]] for td in top_digits]): score = 1.0 for i, d in enumerate(digits): score *= per_digit_probs[i][d] candidates.append(("".join(map(str, digits)), score)) candidates = sorted(candidates, key=lambda x: x[1], reverse=True) pred_df = pd.DataFrame(candidates[:20], columns=["kandidat_4d", "skor_probabilitas_model"]) pred_df["skor_probabilitas_model"] = pred_df["skor_probabilitas_model"].map(lambda x: f"{x:.4%}") print(f"\nPrediksi untuk draw berikutnya: {next_dt}") print("Kandidat utama:", pred_df.iloc[0]["kandidat_4d"]) print("\nTop 20 kandidat:") display(pred_df) # 10) Simpan hasil prediksi out_path = "prediksi_next_prize2.csv" pred_df.to_csv(out_path, index=False) print(f"\nFile hasil prediksi disimpan sebagai: {out_path}") # PENTING: # Ini model statistik/ML berbasis data historis, bukan jaminan. # Untuk data undian/acak, akurasi biasanya sangat rendah dan exact 4D sangat sulit. ''' # Create .py file py_path = "/mnt/data/prediksi_prize2_colab.py" with open(py_path, "w", encoding="utf-8") as f: f.write(script.strip() + "\n") # Create notebook cells = [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Prediksi Hasil Prize2 Berikutnya\n", "\n", "Notebook ini membaca `prize2_features.csv`, melakukan backtest sederhana, lalu membuat kandidat prediksi untuk draw berikutnya.\n", "\n", "**Catatan penting:** model ini bersifat probabilistik. Untuk data undian/acak, tidak ada prediksi yang bisa dijamin akurat.\n", ], }, { "cell_type": "code", "execution_count": None, "metadata": {}, "outputs": [], "source": script.strip().splitlines(True), }, ] nb = { "cells": cells, "metadata": { "colab": {"provenance": []}, "kernelspec": {"display_name": "Python 3", "name": "python3"}, "language_info": {"name": "python"}, }, "nbformat": 4, "nbformat_minor": 5, } ipynb_path = "/mnt/data/prediksi_prize2_colab.ipynb" with open(ipynb_path, "w", encoding="utf-8") as f: json.dump(nb, f, ensure_ascii=False, indent=2) print(py_path) print(ipynb_path)

回答

Berdasarkan file prize2_features.csv, draw terakhir adalah 4880 pada 2026-06-03 06:30, jadi prediksi berikutnya saya asumsikan untuk 2026-06-03 11:30.

Hasil prediksi kandidat utama: 3880

Alternatif top kandidat:

  1. 3880
  2. 3886
  3. 7880
  4. 3820
  5. 3080
  6. 3830
  7. 7886
  8. 3826
  9. 7820
  10. 3086

Catatan penting: dari backtest 20% data terakhir, akurasi per digit hanya sekitar 10–11%, dan akurasi tepat 4 digit 0% pada test set. Jadi ini model probabilistik, bukan jaminan hasil pasti.

Saya sudah buatkan script Google Colab:

Download Notebook Colab (.ipynb)
Download Script Python (.py)

Di Colab, upload notebook-nya, jalankan, lalu pilih file prize2_features.csv saat diminta.

分享这个问答