#!/usr/bin/env python3
"""vibe-parity-audit — prove vibestack's skills still mirror their upstream source.

Brand-neutral by design: this script names no upstream project. The upstream
repo, skill-name remaps, and the brand/path substitutions all live in a LOCAL,
UNTRACKED config file so the repo stays free of any upstream brand reference.

Config resolution (first found wins):
  1. --config <path>
  2. $VIBE_PARITY_CONF
  3. $VIBESTACK_HOME/parity.conf   (defaults to ~/.vibestack/parity.conf)

Run `vibe-parity-audit --init` to write a starter config, then fill it in.

How it works: for every local skill (minus the configured skips), it fetches the
upstream skill's generated SKILL.md, renders the local skill (expanding
{{include}} snippets), normalizes away the agreed adaptations (slim preamble,
When-to-invoke catalog block, brand/path substitutions, stub lines), and reports
the fraction of the local skill's workflow lines that appear in the upstream
version (exact or fuzzy). High coverage = the local skill faithfully mirrors
upstream; the gap is the adaptations.

Exit 0 if median coverage >= threshold (default 0.85), else 1 — usable in CI.
"""
import argparse, base64, difflib, json, os, re, subprocess, sys, tempfile, statistics

HOME = os.path.expanduser("~")
REPO = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
VIBESTACK_HOME = os.environ.get("VIBESTACK_HOME", os.path.join(HOME, ".vibestack"))

STARTER = """\
# vibe-parity-audit config — LOCAL ONLY. Do NOT commit. Keep the upstream
# identity and brand map here so the repo stays brand-neutral.

# Upstream repository to compare against, as owner/repo (for `gh api`).
upstream_repo = OWNER/REPO

# Optional: coverage threshold for a passing run (0.0–1.0). Default 0.85.
threshold = 0.85

# Skill-name remaps where the local name differs from the upstream dir name.
#   remap <local-skill> -> <upstream-dir>
# remap open-browser -> open-UPSTREAM-browser
# remap setup-memory -> setup-UPSTREAMBRAIN

# Local-only skills with no upstream counterpart (skipped).
# skip my-local-skill

# Brand/path substitutions applied to the UPSTREAM text before comparison so it
# reads in local terms. Order matters (longest/most-specific first).
#   sub <from> -> <to>
# sub ~/.claude/skills/UPSTREAM -> ~/.claude/skills/vibestack
# sub UPSTREAMBRAIN -> secondbrain
# sub ~/.UPSTREAM -> ~/.vibestack
# sub UPSTREAM- -> vibe-
# sub UPSTREAM -> vibestack
"""


def load_config(path):
    cfg = {"upstream_repo": None, "threshold": 0.85, "remap": {}, "skip": set(), "subs": []}
    with open(path) as f:
        for raw in f:
            line = raw.strip()
            if not line or line.startswith("#"):
                continue
            if line.startswith("remap "):
                a, b = line[6:].split("->", 1)
                cfg["remap"][a.strip()] = b.strip()
            elif line.startswith("skip "):
                cfg["skip"].add(line[5:].strip())
            elif line.startswith("sub "):
                a, b = line[4:].split("->", 1)
                cfg["subs"].append((a.strip(), b.strip()))
            elif "=" in line:
                k, v = line.split("=", 1)
                k, v = k.strip(), v.strip()
                if k == "upstream_repo":
                    cfg["upstream_repo"] = v
                elif k == "threshold":
                    cfg["threshold"] = float(v)
    return cfg


def resolve_config_path(arg):
    for cand in (arg, os.environ.get("VIBE_PARITY_CONF"),
                 os.path.join(VIBESTACK_HOME, "parity.conf")):
        if cand and os.path.exists(cand):
            return cand
    return None


def fetch_upstream(repo, skill_dir):
    try:
        out = subprocess.run(
            ["gh", "api", f"repos/{repo}/contents/{skill_dir}/SKILL.md", "--jq", ".content"],
            capture_output=True, text=True, timeout=60)
        if out.returncode != 0 or not out.stdout.strip():
            return None
        return base64.b64decode(out.stdout).decode("utf-8", "replace")
    except Exception:
        return None


def render_local(skill):
    src = os.path.join(REPO, "skills", skill, "SKILL.md")
    renderer = os.path.join(REPO, "bin", "vibe-render-skill")
    tmp = tempfile.NamedTemporaryFile("w+", suffix=".md", delete=False)
    tmp.close()
    try:
        subprocess.run([renderer, src, tmp.name], capture_output=True, timeout=30)
        with open(tmp.name) as f:
            return f.read()
    except Exception:
        with open(src) as f:
            return f.read()
    finally:
        try:
            os.unlink(tmp.name)
        except OSError:
            pass


def strip_frontmatter(t):
    parts = t.split("---", 2)
    return parts[2] if len(parts) >= 3 else t


def norm_lines(text, subs=None):
    text = strip_frontmatter(text)
    if subs:
        for a, b in subs:
            text = text.replace(a, b)
    out, skip_section = [], False
    for line in text.split("\n"):
        st = line.strip()
        if st.startswith("## "):
            # skip the preamble and When-to-invoke catalog sections wholesale
            skip_section = st.startswith("## Preamble") or st.startswith("## When to invoke")
            if skip_section:
                continue
        if skip_section:
            continue
        if not st or st == "```":
            continue
        if "{{" in st or "${ctx" in st or "{PREAMBLE}" in st:
            continue
        if st.startswith("true #") or "NOT_AVAILABLE" in st:
            continue
        if "vibe-slug" in st or st.startswith("LEARNINGS:") or "vibe-learnings-search" in st:
            continue
        out.append(re.sub(r"\s+", " ", st))
    return out


def coverage(local_lines, up_lines):
    up_set = set(up_lines)
    sig = [l for l in local_lines if len(l) > 15]
    if not sig:
        return 1.0, 0
    found = 0
    for l in sig:
        if l in up_set or difflib.get_close_matches(l, up_lines, n=1, cutoff=0.82):
            found += 1
    return found / len(sig), len(sig)


def main():
    ap = argparse.ArgumentParser(description="Prove vibestack skills mirror their upstream source.")
    ap.add_argument("--config")
    ap.add_argument("--init", action="store_true", help="write a starter config and exit")
    ap.add_argument("--skill", help="audit only this skill")
    ap.add_argument("--show-unmatched", metavar="SKILL", help="list local lines not found upstream")
    ap.add_argument("--threshold", type=float, help="override pass threshold")
    args = ap.parse_args()

    if args.init:
        os.makedirs(VIBESTACK_HOME, exist_ok=True)
        dest = os.path.join(VIBESTACK_HOME, "parity.conf")
        if os.path.exists(dest):
            print(f"Config already exists: {dest}")
            return 0
        with open(dest, "w") as f:
            f.write(STARTER)
        print(f"Wrote starter config: {dest}\nFill in upstream_repo + remaps + subs, then re-run.")
        return 0

    cfg_path = resolve_config_path(args.config)
    if not cfg_path:
        print("No parity config found. Run `vibe-parity-audit --init`, then fill it in.\n"
              f"Looked at: --config, $VIBE_PARITY_CONF, {VIBESTACK_HOME}/parity.conf", file=sys.stderr)
        return 2
    cfg = load_config(cfg_path)
    if not cfg["upstream_repo"] or "/" not in cfg["upstream_repo"]:
        print(f"Set `upstream_repo = owner/repo` in {cfg_path}", file=sys.stderr)
        return 2
    threshold = args.threshold if args.threshold is not None else cfg["threshold"]

    skills_dir = os.path.join(REPO, "skills")
    if not os.path.isdir(skills_dir):
        print(f"No skills/ dir at {REPO}. Run this from the vibestack repo checkout "
              "(bin/vibe-parity-audit).", file=sys.stderr)
        return 2
    skills = sorted(d for d in os.listdir(skills_dir)
                    if os.path.isfile(os.path.join(skills_dir, d, "SKILL.md")))
    if args.skill:
        skills = [s for s in skills if s == args.skill]
    skills = [s for s in skills if s not in cfg["skip"]]

    if args.show_unmatched:
        s = args.show_unmatched
        up = fetch_upstream(cfg["upstream_repo"], cfg["remap"].get(s, s))
        if up is None:
            print(f"No upstream skill for {s}", file=sys.stderr)
            return 2
        U = norm_lines(up, cfg["subs"])
        V = [l for l in norm_lines(render_local(s)) if len(l) > 15]
        for l in V:
            if l not in set(U) and not difflib.get_close_matches(l, U, n=1, cutoff=0.82):
                print("  - " + l[:110])
        return 0

    rows, missing = [], []
    for s in skills:
        up = fetch_upstream(cfg["upstream_repo"], cfg["remap"].get(s, s))
        if up is None:
            missing.append(s)
            continue
        cov, n = coverage(norm_lines(render_local(s)), norm_lines(up, cfg["subs"]))
        rows.append((cov, s, n))
    rows.sort()

    print(f"{'skill':26}{'coverage':>9}")
    print("-" * 35)
    for cov, s, n in rows:
        flag = "  <-- review" if cov < threshold else ""
        print(f"{s:26}{cov * 100:7.0f}%{flag}")
    if missing:
        print(f"\nno upstream counterpart (local-only): {', '.join(missing)}")
    if not rows:
        print("no skills compared", file=sys.stderr)
        return 2
    covs = [r[0] for r in rows]
    med = statistics.median(covs)
    print(f"\ncompared {len(rows)} | median {med*100:.0f}% | mean {statistics.mean(covs)*100:.0f}% | "
          f">={int(threshold*100)}%: {sum(1 for c in covs if c >= threshold)}/{len(rows)}")
    verdict = "PASS" if med >= threshold else "FAIL"
    print(f"VERDICT: {verdict} (median {med*100:.0f}% vs threshold {int(threshold*100)}%)")
    print("Low scorers are expected for the most-adapted skills; run "
          "`--show-unmatched <skill>` to confirm each gap is a documented adaptation.")
    return 0 if med >= threshold else 1


if __name__ == "__main__":
    sys.exit(main())
