from __future__ import annotations
from dataclasses import dataclass, field
from html import escape
from pathlib import Path
from typing import Dict, List
import pandas as pd
from .splits import TimeSplit
SEGMENT_COLORS = {
"train": "#1d4ed8",
"validation": "#d97706",
"test": "#059669",
}
[docs]
@dataclass(frozen=True)
class SimulationChartData:
"""Plot-ready description of a temporal simulation timeline.
Attributes:
title: Report title.
time_col: Name of the timestamp column used in the dataset.
dataset_start: Earliest timestamp present in the dataset.
dataset_end: Latest timestamp present in the dataset.
total_rows: Number of rows in the source dataset.
total_folds: Number of simulated folds.
strategy: Split strategy used to build the simulation.
size_kind: Unit family used by the partition sizes.
segment_order: Ordered list of segment names.
segment_colors: Color associated with each segment.
segment_stats: Aggregate per-segment row statistics across folds.
folds: Fold-level timeline payload ready for plotting.
"""
title: str
time_col: str
dataset_start: pd.Timestamp
dataset_end: pd.Timestamp
total_rows: int
total_folds: int
strategy: str
size_kind: str
segment_order: List[str]
segment_colors: Dict[str, str]
segment_stats: Dict[str, Dict[str, object]]
folds: List[Dict[str, object]]
[docs]
def to_dict(self) -> Dict[str, object]:
"""Return a serializable dictionary representation."""
return {
"title": self.title,
"time_col": self.time_col,
"dataset_start": self.dataset_start,
"dataset_end": self.dataset_end,
"total_rows": self.total_rows,
"total_folds": self.total_folds,
"strategy": self.strategy,
"size_kind": self.size_kind,
"segment_order": self.segment_order,
"segment_colors": self.segment_colors,
"segment_stats": self.segment_stats,
"folds": self.folds,
}
[docs]
@dataclass(frozen=True)
class SimulationSummary:
"""Structured description of a temporal simulation over a dataset.
Attributes:
title: Report title.
time_col: Name of the timestamp column used in the dataset.
dataset_start: Earliest timestamp present in the dataset.
dataset_end: Latest timestamp present in the dataset.
total_rows: Number of rows in the source dataset.
total_folds: Number of simulated folds.
strategy: Split strategy used to build the simulation.
size_kind: Unit family used by the partition sizes.
folds: Fold-by-fold segment metadata.
segment_order: Ordered list of segment names.
chart_data: Plot-ready representation of the same simulation.
html: Rendered HTML report.
"""
title: str
time_col: str
dataset_start: pd.Timestamp
dataset_end: pd.Timestamp
total_rows: int
total_folds: int
strategy: str
size_kind: str
folds: List[Dict[str, object]]
segment_order: List[str]
chart_data: SimulationChartData
html: str = field(repr=False)
[docs]
def to_dict(self) -> Dict[str, object]:
"""Return a serializable dictionary representation."""
return {
"title": self.title,
"time_col": self.time_col,
"dataset_start": self.dataset_start,
"dataset_end": self.dataset_end,
"total_rows": self.total_rows,
"total_folds": self.total_folds,
"strategy": self.strategy,
"size_kind": self.size_kind,
"segment_order": self.segment_order,
"folds": self.folds,
"chart_data": self.chart_data.to_dict(),
}
[docs]
def to_frame(self) -> pd.DataFrame:
"""Convert fold summaries into a tabular pandas DataFrame."""
rows = []
for fold in self.folds:
row = {
"fold": fold["fold"],
"simulation_start": fold["simulation_start"],
"simulation_end": fold["simulation_end"],
}
for segment_name, segment_info in fold["segments"].items():
row[f"{segment_name}_start"] = segment_info["start"]
row[f"{segment_name}_end"] = segment_info["end"]
row[f"{segment_name}_rows"] = segment_info["rows"]
rows.append(row)
return pd.DataFrame(rows)
[docs]
def write_html(self, path: str | Path) -> Path:
"""Write the rendered HTML report to ``path``."""
destination = Path(path)
destination.write_text(self.html, encoding="utf-8")
return destination
def build_simulation_summary(
splits: List[TimeSplit],
frame: pd.DataFrame,
time_col: object,
title: str,
) -> SimulationSummary:
dataset_start = pd.to_datetime(frame[time_col]).min()
dataset_end = pd.to_datetime(frame[time_col]).max()
time_col_label = str(time_col)
segment_order = list(splits[0].segments.keys()) if splits else []
fold_rows = [
_build_fold_summary(split=split, segment_order=segment_order) for split in splits
]
strategy = splits[0].metadata.get("strategy", "unknown") if splits else "unknown"
size_kind = splits[0].metadata.get("size_kind", "unknown") if splits else "unknown"
chart_data = _build_chart_data(
title=title,
time_col=time_col_label,
dataset_start=dataset_start,
dataset_end=dataset_end,
total_rows=len(frame),
total_folds=len(splits),
strategy=strategy,
size_kind=size_kind,
segment_order=segment_order,
folds=fold_rows,
)
html = _render_simulation_html(chart_data)
return SimulationSummary(
title=title,
time_col=time_col_label,
dataset_start=dataset_start,
dataset_end=dataset_end,
total_rows=len(frame),
total_folds=len(splits),
strategy=strategy,
size_kind=size_kind,
folds=fold_rows,
segment_order=segment_order,
chart_data=chart_data,
html=html,
)
def _build_fold_summary(split: TimeSplit, segment_order: List[str]) -> Dict[str, object]:
segments = {}
for segment_name in segment_order:
boundary = split.boundaries[segment_name]
segments[segment_name] = {
"start": boundary.start,
"end": boundary.end,
"rows": int(len(split.segments[segment_name])),
}
return {
"fold": split.fold,
"simulation_start": split.boundaries[segment_order[0]].start,
"simulation_end": split.boundaries[segment_order[-1]].end,
"segments": segments,
}
def _build_chart_data(
title: str,
time_col: str,
dataset_start: pd.Timestamp,
dataset_end: pd.Timestamp,
total_rows: int,
total_folds: int,
strategy: str,
size_kind: str,
segment_order: List[str],
folds: List[Dict[str, object]],
) -> SimulationChartData:
total_seconds = max((dataset_end - dataset_start).total_seconds(), 1.0)
segment_colors = {name: SEGMENT_COLORS.get(name, "#64748b") for name in segment_order}
segment_stats = {}
chart_folds = []
for name in segment_order:
rows = [int(fold["segments"][name]["rows"]) for fold in folds]
segment_stats[name] = {
"color": segment_colors[name],
"total_rows": int(sum(rows)),
"avg_rows": float(sum(rows) / len(rows)),
"min_rows": int(min(rows)),
"max_rows": int(max(rows)),
}
for fold in folds:
chart_segments = {}
for segment_name, segment_info in fold["segments"].items():
start_offset = (segment_info["start"] - dataset_start).total_seconds()
end_offset = (segment_info["end"] - dataset_start).total_seconds()
chart_segments[segment_name] = {
**segment_info,
"offset_pct": round(max((start_offset / total_seconds) * 100, 0.0), 4),
"width_pct": round(
max(((end_offset - start_offset) / total_seconds) * 100, 0.8),
4,
),
"color": segment_colors.get(segment_name, "#64748b"),
}
chart_folds.append(
{
**fold,
"simulation_span": fold["simulation_end"] - fold["simulation_start"],
"segments": chart_segments,
}
)
return SimulationChartData(
title=title,
time_col=time_col,
dataset_start=dataset_start,
dataset_end=dataset_end,
total_rows=total_rows,
total_folds=total_folds,
strategy=strategy,
size_kind=size_kind,
segment_order=segment_order,
segment_colors=segment_colors,
segment_stats=segment_stats,
folds=chart_folds,
)
def _render_simulation_html(chart_data: SimulationChartData) -> str:
legend = "".join(
(
f'<div class="legend-item"><span class="legend-chip" '
f'style="background:{chart_data.segment_colors.get(name, "#64748b")}"></span>'
f"{escape(name.title())}</div>"
)
for name in chart_data.segment_order
)
stat_cards = "".join(
(
'<div class="stat-card">'
f'<div class="stat-label">{escape(name.title())}</div>'
f'<div class="stat-value">{stats["avg_rows"]:.1f}</div>'
f'<div class="stat-meta">avg rows per fold · min {stats["min_rows"]} · '
f'max {stats["max_rows"]}</div>'
"</div>"
)
for name, stats in chart_data.segment_stats.items()
)
rows_html = []
for fold in chart_data.folds:
bars = []
chips = []
for segment_name, segment_info in fold["segments"].items():
tooltip = (
f"{segment_name}: {segment_info['start']} -> {segment_info['end']} "
f"({segment_info['rows']} rows)"
)
bars.append(
f'<div class="segment segment-{escape(segment_name)}" '
f'style="left:{segment_info["offset_pct"]:.4f}%;'
f'width:{segment_info["width_pct"]:.4f}%;'
f'background:{segment_info["color"]}" '
f'title="{escape(tooltip)}">'
f'<span>{escape(segment_name.title())}</span>'
"</div>"
)
chips.append(
'<div class="metric-chip">'
f'<span class="metric-chip-dot" style="background:{segment_info["color"]}"></span>'
f'{escape(segment_name.title())}: {segment_info["rows"]} rows'
"</div>"
)
rows_html.append(
'<section class="fold-card">'
'<div class="fold-header">'
f'<div><h3>Fold {fold["fold"]}</h3><p>{escape(str(fold["simulation_start"]))} '
f'to {escape(str(fold["simulation_end"]))}</p></div>'
f'<div class="fold-span">{escape(str(fold["simulation_span"]))}</div>'
"</div>"
'<div class="fold-track">'
'<div class="track-shell">'
f'{"".join(bars)}'
"</div>"
"</div>"
f'<div class="fold-metrics">{"".join(chips)}</div>'
"</section>"
)
return f"""<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<title>{escape(chart_data.title)}</title>
<style>
:root {{
--bg-top: #e0f2fe;
--bg-bottom: #f8fafc;
--panel: rgba(255, 255, 255, 0.92);
--panel-strong: #ffffff;
--text: #0f172a;
--muted: #475569;
--soft: #64748b;
--track: #dbeafe;
--track-border: #bfdbfe;
--border: rgba(148, 163, 184, 0.28);
--shadow: 0 24px 80px rgba(15, 23, 42, 0.12);
}}
* {{
box-sizing: border-box;
}}
body {{
margin: 0;
font-family: "Avenir Next", "Segoe UI", "Helvetica Neue", sans-serif;
background:
radial-gradient(circle at top left, rgba(29, 78, 216, 0.14), transparent 32%),
radial-gradient(circle at top right, rgba(5, 150, 105, 0.12), transparent 24%),
linear-gradient(180deg, var(--bg-top) 0%, var(--bg-bottom) 46%);
color: var(--text);
}}
.page {{
max-width: 1180px;
margin: 0 auto;
padding: 40px 24px 56px;
}}
.hero {{
background: linear-gradient(135deg, rgba(255, 255, 255, 0.96), rgba(239, 246, 255, 0.92));
border: 1px solid var(--border);
border-radius: 28px;
padding: 32px;
box-shadow: var(--shadow);
backdrop-filter: blur(10px);
}}
h1 {{
margin: 0 0 8px;
font-size: clamp(2rem, 4vw, 3.25rem);
line-height: 1;
letter-spacing: -0.04em;
}}
p {{
margin: 0;
color: var(--muted);
}}
code {{
font-family: "SFMono-Regular", Consolas, "Liberation Mono", monospace;
}}
.hero-subtitle {{
max-width: 760px;
font-size: 1rem;
line-height: 1.6;
}}
.meta-grid {{
display: grid;
grid-template-columns: repeat(auto-fit, minmax(160px, 1fr));
gap: 14px;
margin-top: 26px;
}}
.meta-item {{
padding: 14px 16px;
border-radius: 18px;
background: rgba(255, 255, 255, 0.82);
border: 1px solid var(--border);
}}
.meta-label {{
color: var(--soft);
font-size: 0.82rem;
text-transform: uppercase;
letter-spacing: 0.08em;
}}
.meta-value {{
margin-top: 6px;
font-size: 1.05rem;
font-weight: 600;
}}
.section {{
margin-top: 26px;
background: var(--panel);
border: 1px solid var(--border);
border-radius: 24px;
padding: 24px;
box-shadow: var(--shadow);
}}
.section-header {{
display: flex;
justify-content: space-between;
gap: 16px;
align-items: end;
margin-bottom: 18px;
}}
.section-header h2 {{
margin: 0;
font-size: 1.35rem;
}}
.legend {{
display: flex;
flex-wrap: wrap;
gap: 10px;
}}
.legend-item, .metric-chip {{
display: inline-flex;
align-items: center;
gap: 8px;
padding: 8px 12px;
border-radius: 999px;
background: rgba(248, 250, 252, 0.9);
border: 1px solid var(--border);
font-size: 0.92rem;
}}
.legend-chip, .metric-chip-dot {{
width: 11px;
height: 11px;
border-radius: 999px;
flex: 0 0 auto;
}}
.axis {{
display: flex;
justify-content: space-between;
gap: 16px;
margin-bottom: 18px;
color: var(--muted);
font-size: 0.92rem;
}}
.stat-grid {{
display: grid;
grid-template-columns: repeat(auto-fit, minmax(180px, 1fr));
gap: 14px;
}}
.stat-card {{
padding: 18px;
border-radius: 20px;
background: linear-gradient(180deg, rgba(255, 255, 255, 0.98), rgba(248, 250, 252, 0.95));
border: 1px solid var(--border);
}}
.stat-label {{
color: var(--soft);
text-transform: uppercase;
letter-spacing: 0.08em;
font-size: 0.76rem;
}}
.stat-value {{
margin-top: 8px;
font-size: 2rem;
font-weight: 700;
letter-spacing: -0.04em;
}}
.stat-meta {{
margin-top: 6px;
color: var(--muted);
line-height: 1.5;
}}
.timeline {{
display: grid;
gap: 14px;
}}
.fold-card {{
padding: 18px;
border-radius: 20px;
background: var(--panel-strong);
border: 1px solid var(--border);
}}
.fold-header {{
display: flex;
justify-content: space-between;
gap: 12px;
align-items: baseline;
margin-bottom: 14px;
}}
.fold-header h3 {{
margin: 0 0 4px;
font-size: 1.08rem;
}}
.fold-span {{
color: var(--muted);
font-size: 0.92rem;
white-space: nowrap;
}}
.track-shell {{
position: relative;
height: 34px;
border-radius: 999px;
background:
linear-gradient(90deg, rgba(191, 219, 254, 0.55), rgba(219, 234, 254, 0.8)),
var(--track);
border: 1px solid var(--track-border);
overflow: hidden;
}}
.segment {{
position: absolute;
top: 4px;
bottom: 4px;
border-radius: 999px;
min-width: 6px;
box-shadow: inset 0 0 0 1px rgba(255, 255, 255, 0.38);
}}
.segment span {{
position: absolute;
inset: 0;
display: flex;
align-items: center;
justify-content: center;
padding: 0 10px;
color: white;
font-size: 0.78rem;
font-weight: 600;
white-space: nowrap;
mix-blend-mode: screen;
}}
.fold-metrics {{
display: flex;
flex-wrap: wrap;
gap: 10px;
margin-top: 14px;
}}
@media (max-width: 720px) {{
.page {{
padding: 24px 14px 40px;
}}
.hero,
.section {{
padding: 18px;
border-radius: 20px;
}}
.fold-header,
.section-header {{
flex-direction: column;
align-items: start;
}}
.track-shell {{
height: 28px;
}}
.segment span {{
font-size: 0.7rem;
}}
}}
</style>
</head>
<body>
<main class="page">
<section class="hero">
<p class="meta-label">Temporal partition simulation overview</p>
<h1>{escape(chart_data.title)}</h1>
<p class="hero-subtitle">
Fold-by-fold visualization for a {escape(chart_data.strategy)} strategy over
{chart_data.total_rows} rows using the <code>{escape(chart_data.time_col)}</code> timeline.
</p>
<div class="meta-grid">
<div class="meta-item"><div class="meta-label">Dataset window</div><div class="meta-value">{escape(str(chart_data.dataset_start))} to {escape(str(chart_data.dataset_end))}</div></div>
<div class="meta-item"><div class="meta-label">Total folds</div><div class="meta-value">{chart_data.total_folds}</div></div>
<div class="meta-item"><div class="meta-label">Strategy</div><div class="meta-value">{escape(chart_data.strategy.title())}</div></div>
<div class="meta-item"><div class="meta-label">Sizing mode</div><div class="meta-value">{escape(chart_data.size_kind.title())}</div></div>
</div>
</section>
<section class="section">
<div class="section-header">
<div>
<h2>Segment profile</h2>
<p>Average and range of rows allocated to each segment across all folds.</p>
</div>
<div class="legend">{legend}</div>
</div>
<div class="stat-grid">
{stat_cards}
</div>
</section>
<section class="section">
<div class="section-header">
<div>
<h2>Timeline</h2>
<p>Each fold is plotted over the full dataset span so you can inspect overlap, drift and coverage at a glance.</p>
</div>
</div>
<div class="axis">
<span>{escape(str(chart_data.dataset_start))}</span>
<span>{escape(str(chart_data.dataset_end))}</span>
</div>
<div class="timeline">
{''.join(rows_html)}
</div>
</section>
</main>
</body>
</html>
"""