Spaces:
Sleeping
Sleeping
| import numpy as np | |
| import pandas as pd | |
| from lightgbm import LGBMRegressor | |
| from sklearn.multioutput import MultiOutputRegressor | |
| from utils.metrics import mae, bias | |
| from pathlib import Path | |
| # --------------------------------------------------------- | |
| # CONFIG | |
| # --------------------------------------------------------- | |
| HORIZON = 14 # FreshRetailNet setup: 14-step forecast | |
| TRAIN_DATA_PATH = Path("data/processed/lgbm_ready/train.csv") | |
| TARGET_DATA_PATH = Path("data/processed/lgbm_ready/target.csv") | |
| METRICS_PATH = Path("metrics/lgbm_metrics.csv") | |
| PREDICTIONS_PATH = Path("metrics/lgbm_predictions.csv") | |
| INFERENCE_PATH = Path("data/processed/lgbm_ready/inference/inference_train.csv") | |
| VALIDATION_PATH = Path("data/processed/lgbm_ready/inference/inference_target.csv") | |
| # --------------------------------------------------------- | |
| # Base single-target LGBM | |
| # --------------------------------------------------------- | |
| base_lgbm = LGBMRegressor( | |
| n_estimators=500, | |
| learning_rate=0.05, | |
| max_depth=-1, | |
| subsample=0.8, | |
| colsample_bytree=0.8, | |
| random_state=42, | |
| n_jobs=-1, | |
| verbose=0, | |
| ) | |
| model = MultiOutputRegressor(base_lgbm) | |
| def build_features(df: pd.DataFrame) -> pd.DataFrame: | |
| """ | |
| For LGBM multi-output here, each row is: | |
| id | t1 | t2 | ... | tN | |
| We drop 'id' and use the time columns as features/targets. | |
| """ | |
| return df.drop(columns=["id"]) | |
| # --------------------------------------------------------- | |
| # Load data | |
| # --------------------------------------------------------- | |
| train_data = pd.read_csv(TRAIN_DATA_PATH) | |
| target_data = pd.read_csv(TARGET_DATA_PATH) | |
| inference_data = pd.read_csv(INFERENCE_PATH) | |
| validation_data = pd.read_csv(VALIDATION_PATH) | |
| # Sanity | |
| # print(train_data.head()) | |
| # print(target_data.head()) | |
| # print(inference_data.head()) | |
| # print(validation_data.head()) | |
| train_df = build_features(train_data) | |
| target_df = build_features(target_data) | |
| inference_df = build_features(inference_data) | |
| validation_df = build_features(validation_data) | |
| # --------------------------------------------------------- | |
| # Fit model | |
| # --------------------------------------------------------- | |
| model.fit(train_df, target_df) | |
| # Predict for inference windows | |
| preds = model.predict(inference_df) | |
| # If you want integer-unit forecasts, uncomment: | |
| # preds = np.round(preds) | |
| # --------------------------------------------------------- | |
| # Build long-format prediction DataFrame | |
| # --------------------------------------------------------- | |
| preds_df = pd.DataFrame(preds) # shape: [n_rows, HORIZON] | |
| # Long form: one row per (row_index, horizon_step) | |
| preds_df_long = preds_df.stack().reset_index() | |
| preds_df_long.columns = ["id_index", "h_raw", "forecast"] | |
| # Map back to actual ids using validation_data row order | |
| preds_df_long["id"] = validation_data.iloc[preds_df_long["id_index"]]["id"].values | |
| preds_df_long["h"] = preds_df_long["h_raw"].astype(int) + 1 # 1..HORIZON | |
| preds_df_long["model"] = "lightgbm" | |
| preds_df_final = preds_df_long[["id", "model", "h", "forecast"]] | |
| print(preds_df_final.head(HORIZON)) | |
| # --------------------------------------------------------- | |
| # Build long-format truth for validation horizon | |
| # --------------------------------------------------------- | |
| long = validation_data.melt( | |
| id_vars=["id"], | |
| var_name="h", | |
| value_name="sales", | |
| ) | |
| long["h"] = long["h"].astype(int) | |
| # --------------------------------------------------------- | |
| # Compute metrics per id | |
| # --------------------------------------------------------- | |
| metrics_rows = [] | |
| for (sku_id, model_name), g_fore in preds_df_final.groupby(["id", "model"]): | |
| g_test = long[long["id"] == sku_id].copy() | |
| if g_test["h"].max() < HORIZON: | |
| # safety check β skip if somehow shorter | |
| continue | |
| merged = pd.merge( | |
| g_test[["id", "h", "sales"]], | |
| g_fore[["id", "h", "forecast"]], | |
| on=["id", "h"], | |
| how="inner", | |
| ) | |
| if merged.empty: | |
| continue | |
| y_true = merged["sales"].values | |
| y_pred = merged["forecast"].values | |
| m = mae(y_true, y_pred) | |
| b = bias(y_true, y_pred) | |
| s = m + abs(b) | |
| metrics_rows.append( | |
| { | |
| "id": sku_id, | |
| "model": model_name, | |
| "mae": float(m), | |
| "bias": float(b), | |
| "score": float(s), | |
| } | |
| ) | |
| metrics_df = pd.DataFrame(metrics_rows) | |
| metrics_df.to_csv(METRICS_PATH, index=False) | |
| preds_df_final.to_csv(PREDICTIONS_PATH, index=False) | |
| print("Saved:") | |
| print(f" - forecasts β {PREDICTIONS_PATH}") | |
| print(f" - metrics β {METRICS_PATH}") | |