Spaces:
Sleeping
Sleeping
File size: 4,755 Bytes
b099c5d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 |
import pandas as pd
import numpy as np
from chronos import Chronos2Pipeline
from pathlib import Path
from utils.metrics import mae, bias
# ---------------------------------------------------------
# CONFIG
# ---------------------------------------------------------
HORIZON = 14
INFERENCE_INPUT_PATH = Path("data/processed/lgbm_ready/inference/inference_train.csv")
INFERENCE_TARGET_PATH = Path("data/processed/lgbm_ready/inference/inference_target.csv")
CHRONOS_OUTPUT_PATH = Path("metrics/chronos_predictions.csv")
CHRONOS_METRICS_PATH = Path("metrics/chronos_metrics.csv")
# ---------------------------------------------------------
# LOAD CHRONOS MODEL
# ---------------------------------------------------------
pipeline = Chronos2Pipeline.from_pretrained(
"amazon/chronos-2",
device_map="cpu", # you can switch to "cuda" once your env is ready
)
# ---------------------------------------------------------
# LOAD DATA (wide: id, 1..T)
# ---------------------------------------------------------
df_infer = pd.read_csv(INFERENCE_INPUT_PATH)
df_true = pd.read_csv(INFERENCE_TARGET_PATH)
# ---------------------------------------------------------
# Helper: wide -> Chronos long format
# ---------------------------------------------------------
def reshape_for_chronos(df: pd.DataFrame) -> pd.DataFrame:
"""
Convert wide format to Chronos format:
id | timestamp | target
We create a synthetic daily calendar starting at 2024-01-01.
"""
melted = df.melt(id_vars=["id"], var_name="day", value_name="target")
melted["day"] = melted["day"].astype(int)
melted["timestamp"] = pd.to_datetime("2024-01-01") + pd.to_timedelta(
melted["day"], unit="D"
)
return melted[["id", "timestamp", "target"]]
context_df = reshape_for_chronos(df_infer)
truth_df = reshape_for_chronos(df_true)
# ---------------------------------------------------------
# CHRONOS INFERENCE (univariate, per-id)
# ---------------------------------------------------------
prediction_rows = []
for id_val, df_series in context_df.groupby("id"):
df_series_sorted = df_series.sort_values("timestamp")
pred_df = pipeline.predict_df(
df_series_sorted,
future_df=None,
prediction_length=HORIZON,
quantile_levels=[0.1, 0.5, 0.9],
id_column="id",
timestamp_column="timestamp",
target="target",
)
# Chronos returns columns like: timestamp, target_name, predictions, 0.1, 0.5, 0.9
pred_df["id"] = id_val
pred_df["h"] = np.arange(1, HORIZON + 1) # horizon index
prediction_rows.append(pred_df)
chronos_preds = pd.concat(prediction_rows, ignore_index=True)
print(chronos_preds.head())
# ---------------------------------------------------------
# Save predictions (rename quantile columns)
# ---------------------------------------------------------
chronos_preds.rename(
columns={
"0.1": "q10",
"0.5": "q50",
"0.9": "q90",
},
inplace=True,
)
chronos_preds.to_csv(CHRONOS_OUTPUT_PATH, index=False)
print(f"Saved Chronos forecasts β {CHRONOS_OUTPUT_PATH}")
# ---------------------------------------------------------
# METRICS β align truth with horizon index h
# ---------------------------------------------------------
# Take last HORIZON days per id from truth_df, ordered by timestamp
true_filtered = (
truth_df.sort_values(["id", "timestamp"])
.groupby("id", group_keys=False)
.tail(HORIZON)
)
# Assign horizon index 1..H per id (aligned with Chronos h)
true_filtered["h"] = (
true_filtered.sort_values(["id", "timestamp"])
.groupby("id")
.cumcount() + 1
)
print(true_filtered.head())
# Merge on id + h, use q50 as point forecast
merged = pd.merge(
true_filtered[["id", "h", "target"]],
chronos_preds[["id", "h", "q50"]],
on=["id", "h"],
how="inner",
)
merged.rename(columns={"q50": "forecast"}, inplace=True)
# ---------------------------------------------------------
# Compute metrics per id
# ---------------------------------------------------------
metric_rows = []
for id_val, g in merged.groupby("id"):
y_true = g["target"].values
y_pred = g["forecast"].values
m = mae(y_true, y_pred)
b = bias(y_true, y_pred)
s = m + abs(b)
metric_rows.append(
{
"id": id_val,
"model": "chronos2",
"mae": float(m),
"bias": float(b),
"score": float(s),
}
)
metrics_df = pd.DataFrame(metric_rows)
metrics_df.to_csv(CHRONOS_METRICS_PATH, index=False)
print(f"Saved metrics β {CHRONOS_METRICS_PATH}")
|