#!/bin/sh

# ============================================================
# Pre-commit guard: block private files and secrets
# ============================================================

STAGED=$(git diff --cached --name-only --diff-filter=ACM)

if [ -z "$STAGED" ]; then
  exit 0
fi

ERROR=0

# --- 1. Block private files ---
# Allowlist: files that look like they match but are safe to commit
ALLOWED_FILES=".env.example"

is_allowed() {
  for allowed in $ALLOWED_FILES; do
    if [ "$1" = "$allowed" ]; then
      return 0
    fi
  done
  return 1
}

PRIVATE_PATTERNS="
CLAUDE.md
.claude/
PRD.md
DEVELOPMENT.md
docs/PRD.md
docs/DEVELOPMENT.md
scripts/debug-
.env
.env.local
.env.*.local
openclaw*.md
"

for pattern in $PRIVATE_PATTERNS; do
  MATCHED=$(echo "$STAGED" | grep -F "$pattern" || true)
  if [ -n "$MATCHED" ]; then
    for file in $MATCHED; do
      if is_allowed "$file"; then
        continue
      fi
      echo "❌ BLOCKED: private file staged for commit:"
      echo "   $file"
      echo "   This file is for local development only and must not be pushed."
      echo ""
      ERROR=1
    done
  fi
done

# --- 2. Scan for hardcoded secrets ---
# Match only actual secret *values* — not identifier/env var names like
# `OPENAI_API_KEY` in docs, `process.env.X_API_KEY` code, or
# `envVarName: 'Y_API_KEY'` declarations. We look for:
#   - Provider-specific prefixed keys: sk-..., sk_live_..., sk_test_..., ghp_..., xoxb-..., AKIA...
#   - PEM private keys (RSA/DSA/EC/OPENSSH)
#   - JWT tokens (three base64 segments joined by dots)
#   - Feishu/DingTalk webhook URLs with embedded tokens
#   - `apiKey/password/secret = "<20+ char value>"` style assignments
SECRETS_REGEX='(sk-[a-zA-Z0-9]{20,}|sk_(live|test)_[a-zA-Z0-9_]{20,}|gh[pso]_[a-zA-Z0-9]{36}|AKIA[0-9A-Z]{16}|xoxb-[0-9]+-[0-9]+-[A-Za-z0-9]{20,}|-----BEGIN[[:space:]]*(RSA|DSA|EC|OPENSSH)?[[:space:]]*PRIVATE[[:space:]]+KEY|eyJ[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+|webhook\.feishu\.cn/open-apis/bot/v2/hook/[a-f0-9-]{20,}|oapi\.dingtalk\.com/robot/send\?access_token=[a-zA-Z0-9]{20,}|(api[_-]?key|apiKey|access[_-]?token|password)[[:space:]]*[:=][[:space:]]*["'"'"'][a-zA-Z0-9+/=_-]{20,}["'"'"'])'

for file in $STAGED; do
  # Skip the hook file itself to avoid self-detection
  case "$file" in .husky/*) continue ;; esac
  if [ -f "$file" ]; then
    MATCH=$(git diff --cached -- "$file" | grep -E "^\+" | grep -iE "$SECRETS_REGEX" | head -3 || true)
    if [ -n "$MATCH" ]; then
      echo "❌ BLOCKED: possible secret in $file:"
      echo "$MATCH" | head -3
      echo ""
      ERROR=1
    fi
  fi
done

# --- 3. Check for hardcoded Chinese in components (i18n rule) ---
for file in $STAGED; do
  case "$file" in
    src/components/*|src/app/*)
      if [ -f "$file" ]; then
        CHINESE=$(git diff --cached -- "$file" | grep -E "^\+" | LC_ALL=en_US.UTF-8 grep -E '[⺀-⿿㇀-㇯㐀-䶵一-鿿豈-頻並-龎]' | grep -v "// " | grep -v "import " | head -3 || true)
        if [ -n "$CHINESE" ]; then
          echo "⚠️  WARNING: hardcoded Chinese text in $file:"
          echo "$CHINESE" | head -3
          echo "   Use t('key') from useT() instead."
          echo ""
          ERROR=1
        fi
      fi
      ;;
  esac
done

if [ $ERROR -ne 0 ]; then
  echo "==========================================="
  echo "Commit blocked. Fix the issues above."
  echo "If intentional, use: git commit --no-verify"
  echo "==========================================="
  exit 1
fi
