"""Parse tegrastats output stream → per-sample CSV rows.

tegrastats emits one line per sample. Each line begins with an ISO-ish
timestamp ("RAM 2345/7858MB ...") and includes RAM, GPU MHz, GPU load,
CPU load per-core, and thermal zone readings.

This parser is intentionally tolerant of unknown fields — JetPack 6.2 vs
6.3 vary in which tags they emit. Anything we cannot parse goes into an
``extras`` JSON column so downstream analysis can still inspect it.

Schema (CSV columns):
    timestamp_utc_iso, ram_used_mb, ram_total_mb, gpu_load_pct,
    gpu_freq_mhz, cpu_load_avg_pct, soc_temp_c, gpu_temp_c, extras_json

Usage:
    tegrastats --interval 200 | python3 tegrastats_parser.py --out out.csv
"""

from __future__ import annotations

import argparse
import csv
import json
import re
import sys
from datetime import datetime, timezone

UTC = timezone.utc
from pathlib import Path
from typing import IO


CSV_COLUMNS = (
    "timestamp_utc_iso",
    "ram_used_mb",
    "ram_total_mb",
    "gpu_load_pct",
    "gpu_freq_mhz",
    "cpu_load_avg_pct",
    "soc_temp_c",
    "gpu_temp_c",
    "extras_json",
)

_RAM_RE = re.compile(r"RAM\s+(\d+)/(\d+)MB")
_GR3D_RE = re.compile(r"GR3D_FREQ\s+(\d+)%@?(\d+)?")
_CPU_RE = re.compile(r"CPU\s+\[([^\]]+)\]")
_SOC_TEMP_RE = re.compile(r"(?:SOC|cpu)@(\d+(?:\.\d+)?)C", re.IGNORECASE)
_GPU_TEMP_RE = re.compile(r"GPU@(\d+(?:\.\d+)?)C", re.IGNORECASE)


def parse_line(line: str) -> dict[str, object] | None:
    """Parse one tegrastats line. Returns None if the line is empty/comment."""
    line = line.strip()
    if not line:
        return None

    row: dict[str, object] = {
        "timestamp_utc_iso": datetime.now(UTC).isoformat(timespec="milliseconds"),
        "ram_used_mb": "",
        "ram_total_mb": "",
        "gpu_load_pct": "",
        "gpu_freq_mhz": "",
        "cpu_load_avg_pct": "",
        "soc_temp_c": "",
        "gpu_temp_c": "",
        "extras_json": "",
    }

    if m := _RAM_RE.search(line):
        row["ram_used_mb"] = m.group(1)
        row["ram_total_mb"] = m.group(2)

    if m := _GR3D_RE.search(line):
        row["gpu_load_pct"] = m.group(1)
        if m.group(2):
            row["gpu_freq_mhz"] = m.group(2)

    if m := _CPU_RE.search(line):
        cpu_field = m.group(1)
        # Pattern looks like "67%@1190,55%@1190,..." or "off,55%@1190,..."
        loads: list[float] = []
        for tok in cpu_field.split(","):
            head = tok.strip().split("%", 1)[0]
            try:
                loads.append(float(head))
            except ValueError:
                continue
        if loads:
            row["cpu_load_avg_pct"] = f"{sum(loads) / len(loads):.1f}"

    if m := _SOC_TEMP_RE.search(line):
        row["soc_temp_c"] = m.group(1)
    if m := _GPU_TEMP_RE.search(line):
        row["gpu_temp_c"] = m.group(1)

    # Any line content not captured above goes into extras for downstream
    # debugging — we never silently drop data.
    extras = {"raw": line}
    row["extras_json"] = json.dumps(extras, separators=(",", ":"))
    return row


def stream_to_csv(source: IO[str], out_path: Path) -> int:
    """Stream tegrastats lines from ``source`` to a CSV file. Returns rows written."""
    out_path.parent.mkdir(parents=True, exist_ok=True)
    rows_written = 0
    with out_path.open("w", newline="", encoding="utf-8") as fh:
        writer = csv.DictWriter(fh, fieldnames=list(CSV_COLUMNS))
        writer.writeheader()
        for line in source:
            row = parse_line(line)
            if row is None:
                continue
            writer.writerow(row)
            fh.flush()
            rows_written += 1
    return rows_written


def main() -> int:
    parser = argparse.ArgumentParser(description="Parse tegrastats to CSV.")
    parser.add_argument("--out", type=Path, required=True)
    args = parser.parse_args()
    n = stream_to_csv(sys.stdin, args.out)
    print(f"tegrastats_parser: wrote {n} rows to {args.out}", file=sys.stderr)
    return 0


if __name__ == "__main__":
    raise SystemExit(main())