#!/usr/bin/env python3
"""
pr-velocity — derive "actioned" throughput from actionability snapshots.

A PR is "actioned" on day D if it was in the actionable set on day D-1 and is
no longer in it on day D (moved to not-our-move, or merged/closed).

Reads .actionability.prs from snapshot files (populated by aggregate.yml's jq
patch). Writes per-day .actionability.actioned into each snapshot, and rollup
.summary.actioned_{7d,30d} into actionability.json.

--backfill: one-time population of .actionability.prs in past snapshots from
git history of actionability.json. Run locally; CI never needs this.
"""

import argparse
import json
import subprocess
import sys
from datetime import date, timedelta
from pathlib import Path

DATA = Path("data/repos")


def _snap_dates(repo_dir: Path) -> list[tuple[date, Path]]:
    out = []
    for p in sorted((repo_dir / "snapshots").glob("????-??-??.json")):
        out.append((date.fromisoformat(p.stem), p))
    return out


def _actionable_prs(act_doc: dict) -> list[int]:
    return [
        pr["number"]
        for tier in act_doc.get("tiers", [])
        for st in tier.get("states", [])
        for pr in st.get("prs", [])
    ]


def backfill(repo_dir: Path) -> int:
    """Populate .actionability.prs in snapshots that lack it, from git history."""
    rel = str(repo_dir / "actionability.json")
    log = subprocess.check_output(
        ["git", "log", "--format=%H %cI", "--", rel], text=True
    ).splitlines()
    # latest commit per UTC date (git log is newest-first, so first wins)
    by_date: dict[str, str] = {}
    for line in log:
        h, ts = line.split(" ", 1)
        d = ts[:10]
        by_date.setdefault(d, h)

    patched = 0
    for d, snap in _snap_dates(repo_dir):
        doc = json.loads(snap.read_text())
        act = doc.get("actionability")
        if not act or "prs" in act:
            continue
        commit = by_date.get(d.isoformat())
        if not commit:
            continue
        try:
            blob = subprocess.check_output(
                ["git", "show", f"{commit}:{rel}"], text=True, stderr=subprocess.DEVNULL
            )
        except subprocess.CalledProcessError:
            continue
        act["prs"] = _actionable_prs(json.loads(blob))
        snap.write_text(json.dumps(doc, indent=2) + "\n")
        patched += 1

    # Seed historical per-day actioned via day-diff (best available granularity for the
    # past). compute() only accumulates today; this is the one-time reconstruction.
    snaps = _snap_dates(repo_dir)
    paths = dict(snaps)
    docs = {d: json.loads(p.read_text()) for d, p in snaps}
    have_prs = sorted(d for d, doc in docs.items() if "prs" in doc.get("actionability", {}))
    for prev, curr in zip(have_prs, have_prs[1:]):
        if (curr - prev).days > 3:
            continue
        act = docs[curr]["actionability"]
        if "actioned" in act:
            continue
        act["actioned"] = len(set(docs[prev]["actionability"]["prs"]) - set(act["prs"]))
        paths[curr].write_text(json.dumps(docs[curr], indent=2) + "\n")
    return patched


def compute(repo_dir: Path) -> dict | None:
    """Accumulate today's actioned count at 2h granularity; roll up 7d/30d.

    Each CI run: diff fresh actionability.json against the prs set stored from the
    PREVIOUS run (today's snapshot if present, else yesterday's final), add the
    delta to today's running total, then write the fresh prs. Past days' actioned
    are never recomputed — they were accumulated when current.
    """
    act_path = repo_dir / "actionability.json"
    act_doc = json.loads(act_path.read_text())
    new_prs = set(_actionable_prs(act_doc))

    snaps = _snap_dates(repo_dir)
    if not snaps:
        return None
    today_d, today_p = snaps[-1]
    today = json.loads(today_p.read_text())
    today_act = today.setdefault("actionability", {})

    # Reference set: previous run's prs (today's if present, else most recent prior day's).
    if "prs" in today_act:
        ref = set(today_act["prs"])
    else:
        ref = None
        for d, p in reversed(snaps[:-1]):
            prev_act = json.loads(p.read_text()).get("actionability", {})
            if "prs" in prev_act:
                if (today_d - d).days <= 3:  # gap guard — don't credit long silence
                    ref = set(prev_act["prs"])
                break

    if ref is not None:
        today_act["actioned"] = today_act.get("actioned", 0) + len(ref - new_prs)
    today_act["prs"] = sorted(new_prs)
    today_p.write_text(json.dumps(today, indent=2) + "\n")

    # Rollup 7d/30d from per-day actioned across snapshots. No trailing newline —
    # match pr-actionable's _emit() so the two scripts don't flip-flop.
    by_date = {}
    for d, p in snaps:
        a = json.loads(p.read_text()).get("actionability", {})
        if "actioned" in a:
            by_date[d] = a["actioned"]
    roll = lambda n: sum(v for d, v in by_date.items() if d > today_d - timedelta(days=n))
    act_doc["summary"]["actioned_7d"] = roll(7)
    act_doc["summary"]["actioned_30d"] = roll(30)
    act_path.write_text(json.dumps(act_doc, indent=2))

    return {"today": today_act.get("actioned"), "actioned_7d": roll(7), "actioned_30d": roll(30)}


def main():
    ap = argparse.ArgumentParser()
    ap.add_argument("--backfill", action="store_true")
    args = ap.parse_args()

    for act in DATA.glob("*/*/actionability.json"):
        repo_dir = act.parent
        label = f"{repo_dir.parent.name}/{repo_dir.name}"
        if args.backfill:
            n = backfill(repo_dir)
            print(f"{label}: backfilled prs into {n} snapshots", file=sys.stderr)
        r = compute(repo_dir)
        if r:
            print(f"{label}: today={r['today']} 7d={r['actioned_7d']} 30d={r['actioned_30d']}", file=sys.stderr)
        else:
            print(f"{label}: <2 snapshots with prs — skipped", file=sys.stderr)


if __name__ == "__main__":
    main()
