#!/usr/bin/env bash
# PreToolUse hook (Bash, gated `git commit*`):
# Scans staged diff for high-confidence secret patterns. Emits permissionDecision=deny
# with hookSpecificOutput JSON when a match is found; exits 0 otherwise.
#
# userConfig: prevent_secret_commit (boolean, default true). Disable by setting
# CLAUDE_PLUGIN_OPTION_PREVENT_SECRET_COMMIT=false.
set -uo pipefail

# Opt-out: silently allow when disabled.
if [[ "${CLAUDE_PLUGIN_OPTION_PREVENT_SECRET_COMMIT:-true}" == "false" ]]; then
  exit 0
fi

INPUT="${TOOL_INPUT:-${1:-}}"

# Confirm this is a git commit (defense in depth — matcher should already gate).
if ! echo "$INPUT" | grep -Eq 'git[[:space:]]+commit'; then
  exit 0
fi

# Need git available + cwd inside a repo.
command -v git >/dev/null 2>&1 || exit 0
git rev-parse --is-inside-work-tree >/dev/null 2>&1 || exit 0

# Capture staged diff (cwd-relative). Limit size to keep <100ms.
FULL_DIFF="$(git diff --cached --no-color 2>/dev/null | head -c 524288 || true)"
[[ -z "$FULL_DIFF" ]] && exit 0

# Only scan added lines (+) to avoid false positives when removing secrets.
# Keep +++ headers for file attribution, strip --- headers and context lines.
DIFF="$(echo "$FULL_DIFF" | grep -E '^(\+|\+\+\+ b/)' | grep -v '^--- ' || true)"
[[ -z "$DIFF" ]] && exit 0
# Only scan added lines — removals of secrets should be allowed.
# Filter to '+' lines, excluding '+++ b/' diff headers.
ADDED_LINES="$(echo "$DIFF" | grep '^+' | grep -v '^+++' || true)"
[[ -z "$ADDED_LINES" ]] && exit 0

# Patterns: label|regex (POSIX ERE, BSD-grep compatible — no \d, no \b lookarounds).
PATTERNS=(
  "Stripe live key|sk_live_[a-zA-Z0-9]{24,}"
  "Stripe test key|sk_test_[a-zA-Z0-9]{24,}"
  "AWS access key|AKIA[0-9A-Z]{16}"
  "GitHub personal token|ghp_[A-Za-z0-9]{36}"
  "GitHub OAuth token|gho_[A-Za-z0-9]{36}"
  "GitHub app server token|ghs_[A-Za-z0-9]{36}"
  "Slack bot token|xoxb-[A-Za-z0-9-]+"
  "Slack app token|xoxa-[A-Za-z0-9-]+"
  "Slack user token|xoxp-[A-Za-z0-9-]+"
  "JWT|eyJ[A-Za-z0-9_=-]+\\.eyJ[A-Za-z0-9_=-]+\\.?[A-Za-z0-9_.+/=-]*"
  "Anthropic API key|ANTHROPIC_API_KEY=sk-ant-"
  "OpenAI API key|OPENAI_API_KEY=sk-"
  "Hardcoded password|(^|[^a-zA-Z_-])password[[:space:]]*=[[:space:]]*['\"][^'\"]{8,}['\"]"
)

HITS=()
for entry in "${PATTERNS[@]}"; do
  label="${entry%%|*}"
  regex="${entry#*|}"
  if echo "$ADDED_LINES" | grep -Eq "$regex"; then
    # Find which file contains the match (best-effort — last "+++ b/" before hit).
    file="$(echo "$DIFF" | grep -E "(^\\+\\+\\+ b/|$regex)" | grep -B1 -E "$regex" | grep -E '^\+\+\+ b/' | tail -1 | sed 's|^+++ b/||' || true)"
    if [[ -n "$file" ]]; then
      HITS+=("$label in $file")
    else
      HITS+=("$label in staged diff")
    fi
  fi
done

if (( ${#HITS[@]} > 0 )); then
  reason="Blocked git commit — likely secret(s) in staged diff: $(IFS='; '; echo "${HITS[*]}"). Unstage the secret, rotate it, and use an env var or secret manager. To override (NOT RECOMMENDED) set CLAUDE_PLUGIN_OPTION_PREVENT_SECRET_COMMIT=false."
  # Emit JSON on stdout. Use python3 for safe JSON escaping (always present on macOS/Linux).
  python3 -c '
import json, sys
reason = sys.argv[1]
print(json.dumps({
  "hookSpecificOutput": {
    "hookEventName": "PreToolUse",
    "permissionDecision": "deny",
    "permissionDecisionReason": reason
  }
}))' "$reason"
  exit 0
fi

exit 0
