# ===============================================...

Question

============================================================

SCRIPT PREDIKSI 4D - ADVANCED VERSION (PRO) - FIXED VERSION

Perbaikan: Fixed Indexing Error & Optimized Pattern Calculation

============================================================

import os
import numpy as np
import pandas as pd

-----------------------------

1) LOAD CSV

-----------------------------

try:
from google.colab import files
print('Silakan upload file CSV Anda...')
uploaded = files.upload()
CSV_PATH = next(iter(uploaded.keys()))
except Exception:
CSV_PATH = 'PRIZE_LEVEL_4_ELITE.csv'

print('Membaca file:', CSV_PATH)
df = pd.read_csv(CSV_PATH, dtype={'raw_number': str})

-----------------------------

2) CLEANING & FEATURE ENGINEERING

-----------------------------

def prepare_data(data):
data = data.copy()
if 'timestamp' not in data.columns or 'raw_number' not in data.columns:
raise ValueError("CSV wajib punya kolom 'timestamp' dan 'raw_number'.")

text
data['_orig_order'] = np.arange(len(data))
data['raw_number'] = data['raw_number'].astype(str).str.replace(r'\D', '', regex=True).str.zfill(4).str[-4:]
data['ts'] = pd.to_datetime(data['timestamp'])
data = data.sort_values(['ts', '_orig_order'], kind='mergesort').reset_index(drop=True)
data['hour'] = data['ts'].dt.strftime('%H:%M')

for j in range(4):
    data[f'd{j+1}'] = data['raw_number'].str[j].astype(int)

digit_cols = ['d1', 'd2', 'd3', 'd4']
data['sum_digits'] = data[digit_cols].sum(axis=1)
data['odd_count'] = (data[digit_cols] % 2).sum(axis=1)
data['high_count'] = (data[digit_cols] >= 5).sum(axis=1)

unique_counts = data['raw_number'].apply(lambda s: len(set(s)))
data['is_double'] = (unique_counts < 4).astype(int)
data['is_triple'] = (unique_counts <= 2).astype(int)
data['is_quad'] = (unique_counts == 1).astype(int)

return data

df = prepare_data(df)

-----------------------------

3) PRE-COMPUTING MATRICES (OPTIMIZED)

-----------------------------

NUMS = np.array([f'{i:04d}' for i in range(10000)])
DIGITS = np.array([[int(c) for c in s] for s in NUMS], dtype=np.int16)
SUMS = DIGITS.sum(axis=1)
ODD = (DIGITS % 2).sum(axis=1)
HIGH = (DIGITS >= 5).sum(axis=1)

Precompute Unique Counts for 10,000 numbers to speed up scoring

1 = 4 digits unique, 2 = double, 3 = triple/quad

UNIQUE_COUNTS_ARRAY = np.array([len(np.unique(row)) for row in DIGITS])

Transition Matrix (Correlation)

TRANSITION_PROBS = []
for j in range(1, 4):
matrix = np.zeros((10, 10))
for i in range(len(df) - 1):
prev_digit = df.iloc[i][f'd{j}']
curr_digit = df.iloc[i+1][f'd{j+1}']
matrix[int(prev_digit)][int(curr_digit)] += 1
matrix += 0.1
matrix /= matrix.sum(axis=1, keepdims=True)
TRANSITION_PROBS.append(matrix)

def get_gap_matrix(hist):
gap_matrix = np.zeros((4, 10))
for j in range(4):
col = f'd{j+1}'
for digit in range(10):
indices = hist.index[hist[col] == digit].tolist()
if not indices:
gap_matrix[j, digit] = len(hist)
else:
gap_matrix[j, digit] = len(hist) - indices[-1]
return gap_matrix

-----------------------------

4) CORE SCORING ENGINE (FIXED)

-----------------------------

def score_candidates(hist, prev_row, target_hour, weights):
logscore = np.zeros(len(NUMS), dtype=np.float64)
pos_cols = ['d1', 'd2', 'd3', 'd4']
recent_n = int(weights.get('recent_n', 300))

text
# 1. Frequency Score (Fixed Indexing)
if weights.get('freq', 0):
    for j, col in enumerate(pos_cols):
        # Global
        counts_g = np.bincount(hist[col].to_numpy(), minlength=10)
        # Mapping counts_g (10,) to DIGITS (10000,) using indexing
        logscore += weights['freq'] * np.log((counts_g[DIGITS[:, j]] + 1) / (len(hist) + 10))

        # Recent
        h_recent = hist.tail(recent_n)
        counts_r = np.bincount(h_recent[col].to_numpy(), minlength=10)
        logscore += weights['freq'] * np.log((counts_r[DIGITS[:, j]] + 1) / (recent_n + 10))

# 2. Gap Analysis Score
if weights.get('gap', 0):
    gaps = get_gap_matrix(hist)
    for j in range(4):
        digit_gaps = gaps[j, DIGITS[:, j]]
        logscore += weights['gap'] * np.log1p(digit_gaps)

# 3. Correlation Score
if weights.get('corr', 0):
    for j in range(3):
        d_prev = DIGITS[:, j]
        d_curr = DIGITS[:, j+1]
        trans_p = TRANSITION_PROBS[j][d_prev, d_curr]
        logscore += weights['corr'] * np.log(trans_p + 1e-9)

# 4. Sum & Odd/Even
if weights.get('sum', 0):
    counts = np.bincount(hist['sum_digits'].to_numpy(), minlength=37)
    logscore += weights['sum'] * np.log((counts[SUMS] + 1) / (len(hist) + 37))

if weights.get('odd_even', 0):
    counts = np.bincount(hist['odd_count'].to_numpy(), minlength=5)
    logscore += weights['odd_even'] * np.log((counts[ODD] + 1) / (len(hist) + 5))

# 5. Pattern Score (Optimized)
if weights.get('pattern', 0):
    # Menggunakan precomputed array
    logscore += weights['pattern'] * np.log(np.where(UNIQUE_COUNTS_ARRAY <= 2, 0.1, 0.5))

return logscore

-----------------------------

5) CONFIGURATIONS & BACKTEST

-----------------------------

WEIGHT_SETS = {
'balanced_pro': {
'freq': 0.8, 'gap': 1.2, 'corr': 1.5, 'sum': 0.5, 'odd_even': 0.3, 'pattern': 0.4, 'recent_n': 300
},
'gap_hunter': {
'freq': 0.5, 'gap': 2.5, 'corr': 0.5, 'sum': 0.2, 'odd_even': 0.2, 'pattern': 0.2, 'recent_n': 200
},
'pattern_master': {
'freq': 0.6, 'gap': 0.5, 'corr': 2.0, 'sum': 0.8, 'odd_even': 0.5, 'pattern': 1.5, 'recent_n': 400
}
}

def infer_next_timestamp(data):
slot_order = ['03:30', '06:30', '11:30', '13:30', '17:30', '21:30']
last_ts = data['ts'].iloc[-1]
last_hour = last_ts.strftime('%H:%M')
if last_hour in slot_order:
idx = slot_order.index(last_hour)
next_hour = slot_order[(idx + 1) % len(slot_order)]
next_date = last_ts.date()
if idx == len(slot_order) - 1:
next_date = (last_ts + pd.Timedelta(days=1)).date()
return pd.Timestamp(str(next_date) + ' ' + next_hour + ':00'), next_hour

text
unique_ts = data['ts'].drop_duplicates().sort_values()
gap = unique_ts.diff().dropna().median()
next_ts = last_ts + gap
return next_ts, next_ts.strftime('%H:%M')

def evaluate_weights(data, weights, last_n=300):
start = max(100, len(data) - last_n)
hits_top10 = 0
ranks = []
for i in range(start, len(data)):
hist = data.iloc[:i]
target = data.iloc[i]
prev = data.iloc[i-1]
scores = score_candidates(hist, prev, target['hour'], weights)
actual = int(target['raw_number'])
rank = int(1 + np.sum(scores > scores[actual]))
ranks.append(rank)
if rank <= 10: hits_top10 += 1

text
n = len(data) - start
return {'top10': hits_top10/n, 'med_rank': np.median(ranks)}

print('\n===== BACKTESTING MODEL PRO =====')
results = []
for name, w in WEIGHT_SETS.items():
res = evaluate_weights(df, w)
results.append({'model': name, **res})
print(f"Model {name}: Top10 Accuracy: {res['top10']:.2%}, Median Rank: {res['med_rank']}")

best_model_name = pd.DataFrame(results).sort_values('top10', ascending=False).iloc[0]['model']
best_weights = WEIGHT_SETS[best_model_name]
print(f'\n>>> Model Terpilih: {best_model_name}')

-----------------------------

6) PREDICTION EXECUTION

-----------------------------

next_ts, next_hour = infer_next_timestamp(df)
print(f'\nTarget Timestamp: {next_ts} ({next_hour})')

hist = df.copy()
prev = hist.iloc[-1]
main_predictions = []
all_results = []

for step in range(1, 4):
scores = score_candidates(hist, prev, next_hour, best_weights)

text
for p in main_predictions:
    scores[int(p)] = -1e18

top_indices = np.argsort(-scores)[:20]

step_rows = []
for rank, idx in enumerate(top_indices, 1):
    step_rows.append({
        'step': step, 'rank': rank, 'number': NUMS[idx], 'score': round(scores[idx], 4)
    })

current_step_df = pd.DataFrame(step_rows)
all_results.append(current_step_df)

winner = NUMS[top_indices[0]]
main_predictions.append(winner)

# Simulation for next step
new_row = {col: np.nan for col in hist.columns}
new_row.update({'timestamp': str(next_ts), 'raw_number': winner, 'ts': next_ts, 'hour': next_hour})
for j, c in enumerate(winner):
    new_row[f'd{j+1}'] = int(c)
new_row['sum_digits'] = sum(int(c) for c in winner)
new_row['odd_count'] = sum(int(c)%2 for c in winner)
new_row['high_count'] = sum(1 for c in winner if int(c)>=5)
new_row['is_double'] = 1 if len(set(winner)) < 4 else 0
new_row['is_triple'] = 1 if len(set(winner)) <= 2 else 0
new_row['is_quad'] = 1 if len(set(winner)) == 1 else 0

hist = pd.concat([hist, pd.DataFrame([new_row])], ignore_index=True)
prev = hist.iloc[-1]

print('\n===== HASIL PREDIKSI UTAMA =====')
print(f"Urutan 1: {main_predictions[0]}")
print(f"Urutan 2: {main_predictions[1]}")
print(f"Urutan 3: {main_predictions[2]}")

print('\n===== DAFTAR KANDIDAT TOP 20 =====')
full_output = pd.concat(all_results)
print(full_output.to_string(index=False))

full_output.to_csv('prediksi_pro_4d_fixed.csv', index=False)
try:
from google.colab import files
files.download('prediksi_pro_4d_fixed.csv')
except:
pass upgrade

Answer 1

Sudah saya upgrade dan test memakai file CSV yang Anda upload.

File hasil upgrade:

Download script upgrade .py
Download output prediksi CSV
Download ringkasan backtest CSV

Perubahan utama yang saya masukkan:

Backtest tanpa data leakage
Transition matrix sekarang dihitung dari histori saat itu saja, bukan dari full dataset masa depan.
target_hour dan prev_row sekarang benar-benar dipakai
Versi lama menerima parameter itu, tapi efeknya hampir tidak ada.
Scoring lebih stabil
Semua fitur utama dinormalisasi dengan robust z-score supaya bobot tidak berat sebelah karena beda skala.
Fitur tambahan
- global positional frequency
- recent frequency
- exponential decay frequency
- gap analysis capped
- intra-number transition
- sequential transition dari row sebelumnya
- hour/slot pattern
- odd/even, high/low, sum pattern
- repeat penalty angka yang baru muncul
Output lebih lengkap
CSV prediksi sekarang berisi target_timestamp, step, rank, number, score, sum_digits, odd_count, high_count, dan unique_count.

Hasil test pada data Anda:

Model	Top10	Top20	Top50	Median Rank
balanced_v2	0.33%	0.33%	1.00%	4068
gap_hunter_v2	0.67%	1.33%	1.67%	4364
transition_v2	0.33%	0.67%	0.67%	4164
hour_pattern_v2	0.00%	0.33%	0.67%	3875

Model terpilih oleh script: gap_hunter_v2.

Output utama dari run test:

Urutan	Angka
1	4851
2	1609
3	7540

Catatan penting: ini tetap analisis pola historis, bukan jaminan hasil acak/undian. Jalankan di Colab atau lokal seperti ini:

bash
python prediksi_4d_pro_upgrade.py --csv PRIZE_LEVEL_4_ELITE.csv

# ===============================================...

प्रश्न

============================================================

SCRIPT PREDIKSI 4D - ADVANCED VERSION (PRO) - FIXED VERSION

Perbaikan: Fixed Indexing Error & Optimized Pattern Calculation

============================================================

-----------------------------

1) LOAD CSV

-----------------------------

-----------------------------

2) CLEANING & FEATURE ENGINEERING

-----------------------------

-----------------------------

3) PRE-COMPUTING MATRICES (OPTIMIZED)

-----------------------------

Precompute Unique Counts for 10,000 numbers to speed up scoring

1 = 4 digits unique, 2 = double, 3 = triple/quad

Transition Matrix (Correlation)

-----------------------------

4) CORE SCORING ENGINE (FIXED)

-----------------------------

-----------------------------

5) CONFIGURATIONS & BACKTEST

-----------------------------

-----------------------------

6) PREDICTION EXECUTION

-----------------------------

Python

Python

Python

Python

Python

विचार

उत्तर

इस प्रश्नोत्तर को साझा करें