give me jupyter notebook code for this given quest...
Создано: 30 марта 2026 г.
Отвечено с использованием GPT-5.4 Thinking от Chat01
Создано: 30 марта 2026 г.
Отвечено с использованием GPT-5.4 Thinking от Chat01
give me jupyter notebook code for this given question and it should predict value of 19 and given task with specigic accurcy on my output.csc file now give me final code
give me jupyter notebook code for this given question and it should predict value of 19 and given task with specigic accurcy on my output.csc file now give me final code
pythonfrom PIL import Image img=Image.open('/mnt/data/image.png') img.size (864, 1152)
pythonimport matplotlib.pyplot as plt, numpy as np img=np.array(img) plt.figure(figsize=(8,10)) plt.imshow(img) plt.axis('off')
Use this as your final Jupyter notebook code.
It is built to:
train.csv and test.csvtime into numeric secondsoutput.csvpython# ========================= # 1. Import libraries # ========================= import warnings warnings.filterwarnings("ignore") import numpy as np import pandas as pd from sklearn.model_selection import KFold, cross_val_score from sklearn.pipeline import Pipeline from sklearn.impute import SimpleImputer from sklearn.ensemble import ( RandomForestRegressor, ExtraTreesRegressor, GradientBoostingRegressor, HistGradientBoostingRegressor ) from sklearn.linear_model import Ridge from sklearn.preprocessing import StandardScaler # ========================= # 2. Read datasets # ========================= train = pd.read_csv("train.csv") test = pd.read_csv("test.csv") print("Train shape:", train.shape) print("Test shape :", test.shape) print("\nTrain columns:\n", list(train.columns)) print("\nTest columns:\n", list(test.columns)) # Clean column names train.columns = train.columns.str.strip() test.columns = test.columns.str.strip() # ========================= # 3. Helper functions # ========================= def time_to_seconds(x): """ Convert HH:MM:SS to total seconds. Handles values like 0:00:45, 6:15:30, etc. """ if pd.isna(x): return np.nan # if already numeric, return as-is if isinstance(x, (int, float, np.integer, np.floating)): return float(x) x = str(x).strip() if x == "": return np.nan parts = x.split(":") try: parts = [float(p) for p in parts] if len(parts) == 3: h, m, s = parts return h * 3600 + m * 60 + s elif len(parts) == 2: m, s = parts return m * 60 + s elif len(parts) == 1: return parts[0] else: return np.nan except: return np.nan def preprocess(df, is_train=True): df = df.copy() # Detect ID column id_col = None if "id" in df.columns: id_col = "id" elif "ID" in df.columns: id_col = "ID" # Preserve original ID original_id = df[id_col].copy() if id_col is not None else pd.Series(range(len(df))) # Extract numeric part from ID for modeling if id_col is not None: df["id_num"] = ( df[id_col] .astype(str) .str.extract(r"(\d+)", expand=False) .astype(float) ) # Convert time column if "time" in df.columns: df["time_seconds"] = df["time"].apply(time_to_seconds) # Additional time features df["time_hours"] = df["time_seconds"] // 3600 df["time_minutes"] = (df["time_seconds"] % 3600) // 60 df["time_only_seconds"] = df["time_seconds"] % 60 df.drop(columns=["time"], inplace=True) # Numeric conversion for epoch columns and sensor columns for col in df.columns: if col not in ["id", "ID"]: df[col] = pd.to_numeric(df[col], errors="coerce") # Feature engineering from epochs if "end_epoch" in df.columns and "start_epoch" in df.columns: df["epoch_diff"] = df["end_epoch"] - df["start_epoch"] df["start_epoch_mod_day"] = df["start_epoch"] % 86400 df["end_epoch_mod_day"] = df["end_epoch"] % 86400 # Row-wise sensor summary stats sensor_cols = [c for c in df.columns if c.startswith("value_") and c != "value_19"] if len(sensor_cols) > 0: df["sensor_mean"] = df[sensor_cols].mean(axis=1) df["sensor_std"] = df[sensor_cols].std(axis=1) df["sensor_min"] = df[sensor_cols].min(axis=1) df["sensor_max"] = df[sensor_cols].max(axis=1) df["sensor_median"] = df[sensor_cols].median(axis=1) return df, original_id # ========================= # 4. Preprocess train and test # ========================= train_processed, train_ids = preprocess(train, is_train=True) test_processed, test_ids = preprocess(test, is_train=False) # Target column target_col = "value_19" if target_col not in train_processed.columns: raise ValueError("value_19 column not found in train.csv") y = train_processed[target_col].copy() # Drop target from features X = train_processed.drop(columns=[target_col], errors="ignore") # Drop raw ID columns from model features X = X.drop(columns=["id", "ID"], errors="ignore") X_test = test_processed.drop(columns=["id", "ID"], errors="ignore") # Align columns common_cols = sorted(list(set(X.columns).intersection(set(X_test.columns)))) X = X[common_cols].copy() X_test = X_test[common_cols].copy() print("\nFinal training features:", X.shape) print("Final testing features :", X_test.shape) # ========================= # 5. Models # ========================= cv = KFold(n_splits=5, shuffle=True, random_state=42) models = { "extra_trees": Pipeline([ ("imputer", SimpleImputer(strategy="median")), ("model", ExtraTreesRegressor( n_estimators=1200, max_features="sqrt", min_samples_leaf=1, random_state=42, n_jobs=-1 )) ]), "random_forest": Pipeline([ ("imputer", SimpleImputer(strategy="median")), ("model", RandomForestRegressor( n_estimators=1000, max_features=0.8, min_samples_leaf=1, random_state=42, n_jobs=-1 )) ]), "gradient_boosting": Pipeline([ ("imputer", SimpleImputer(strategy="median")), ("model", GradientBoostingRegressor( n_estimators=500, learning_rate=0.03, max_depth=3, subsample=0.9, random_state=42 )) ]), "hist_gb": Pipeline([ ("imputer", SimpleImputer(strategy="median")), ("model", HistGradientBoostingRegressor( learning_rate=0.03, max_depth=8, max_iter=600, l2_regularization=0.05, random_state=42 )) ]), "ridge": Pipeline([ ("imputer", SimpleImputer(strategy="median")), ("scaler", StandardScaler()), ("model", Ridge(alpha=3.0)) ]) } # ========================= # 6. Cross-validation # ========================= results = [] test_predictions = {} print("\nModel CV scores (R2):\n") for name, model in models.items(): scores = cross_val_score(model, X, y, cv=cv, scoring="r2", n_jobs=-1) mean_score = scores.mean() std_score = scores.std() print(f"{name:18s} -> Mean R2: {mean_score:.5f} | Std: {std_score:.5f}") # Fit on full data and predict test model.fit(X, y) test_pred = model.predict(X_test) results.append((name, mean_score)) test_predictions[name] = test_pred # Sort by best CV score results = sorted(results, key=lambda x: x[1], reverse=True) print("\nSorted leaderboard:") for r in results: print(r) # ========================= # 7. Blend best models # ========================= best_score = results[0][1] # Keep models close to best score selected_models = [(name, score) for name, score in results if score >= best_score - 0.03] # If only one model survives, still use it if len(selected_models) == 0: selected_models = [results[0]] print("\nSelected models for blending:") for name, score in selected_models: print(f"{name}: {score:.5f}") # Convert negative/very small scores into safe positive weights weights = np.array([max(score, 0.0001) for _, score in selected_models], dtype=float) weights = weights / weights.sum() final_pred = np.zeros(len(X_test)) for (name, score), w in zip(selected_models, weights): final_pred += w * test_predictions[name] # Optional clipping if target values are sensor-like and extreme noise appears # final_pred = np.clip(final_pred, y.min(), y.max()) # ========================= # 8. Create output.csv # ========================= # Screenshot note says the output columns should be exactly: ID and value_19 submission = pd.DataFrame({ "ID": test_ids, "value_19": final_pred }) submission.to_csv("output.csv", index=False) # Backup file in case platform actually expects lowercase id submission_lower = submission.rename(columns={"ID": "id"}) submission_lower.to_csv("output_lowercase_id.csv", index=False) print("\nDone.") print("Saved: output.csv") print("Also saved backup: output_lowercase_id.csv") print("\nPreview:") print(submission.head())
A couple of important points from your screenshots:
value_19time from HH:MM:SS to numericoutput.csvID in output.csv because your screenshot note says exact columns should be ID and value_19output_lowercase_id.csv as backup because the sample image shows lowercase idIf your platform rejects output.csv because of header name, change only this part:
pythonsubmission = pd.DataFrame({ "id": test_ids, "value_19": final_pred })
and save again.