#!/bin/bash
# Pre-commit hook for anti-pattern detection
# Per TESTING_GUIDELINES.md and NT_TEST_ANTI_PATTERN_TRIAGE_DEC_17_2025.md
#
# Installation:
#   git config core.hooksPath .githooks
#   chmod +x .githooks/pre-commit

set -e

echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
echo "🛡️  PRE-COMMIT: Anti-Pattern Detection"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"

VIOLATIONS=0

# ========================================
# CHECK 1: NULL-TESTING Anti-Pattern Detection
# ========================================
echo ""
echo "🔍 Checking for NULL-TESTING anti-patterns..."

# Get staged test files
STAGED_TEST_FILES=$(git diff --cached --name-only --diff-filter=ACM | grep '_test\.go$' || true)

if [ -n "$STAGED_TEST_FILES" ]; then
    # Check for ToNot(BeNil()) without immediate value validation
    NULL_NIL=$(echo "$STAGED_TEST_FILES" | xargs grep -nH 'ToNot(BeNil())' 2>/dev/null || true)
    if [ -n "$NULL_NIL" ]; then
        echo "❌ VIOLATION: NULL-TESTING anti-pattern detected (ToNot(BeNil))"
        echo "$NULL_NIL"
        echo ""
        echo "   Per TESTING_GUIDELINES.md: Validate business outcomes, not just nil checks"
        echo "   Example fix: Expect(*event.ActorType).To(Equal(\"service\"))"
        echo ""
        VIOLATIONS=$((VIOLATIONS + 1))
    fi

    # Check for ToNot(BeEmpty()) without specific value/count validation
    NULL_EMPTY=$(echo "$STAGED_TEST_FILES" | xargs grep -nH 'ToNot(BeEmpty())' 2>/dev/null || true)
    if [ -n "$NULL_EMPTY" ]; then
        # Exclude approved exceptions
        NULL_EMPTY_FILTERED=$(echo "$NULL_EMPTY" | grep -v 'APPROVED EXCEPTION' || true)
        if [ -n "$NULL_EMPTY_FILTERED" ]; then
            echo "❌ VIOLATION: NULL-TESTING anti-pattern detected (ToNot(BeEmpty))"
            echo "$NULL_EMPTY_FILTERED"
            echo ""
            echo "   Per TESTING_GUIDELINES.md: Validate specific values or counts"
            echo "   Example fix: Expect(events).To(HaveLen(2)) or Expect(phase).To(Equal(NotificationPhaseSent))"
            echo ""
            VIOLATIONS=$((VIOLATIONS + 1))
        fi
    fi
fi

# ========================================
# CHECK 2: Skip() Anti-Pattern Detection
# ========================================
echo ""
echo "🔍 Checking for Skip() in integration tests..."

STAGED_INTEGRATION=$(git diff --cached --name-only --diff-filter=ACM | grep 'test/integration/.*_test\.go$' || true)

if [ -n "$STAGED_INTEGRATION" ]; then
    SKIP_VIOLATIONS=$(echo "$STAGED_INTEGRATION" | xargs grep -nH '\bSkip(' 2>/dev/null | grep -v 'APPROVED EXCEPTION' || true)
    if [ -n "$SKIP_VIOLATIONS" ]; then
        echo "❌ VIOLATION: Skip() forbidden in integration tests with required infrastructure"
        echo "$SKIP_VIOLATIONS"
        echo ""
        echo "   Per DD-AUDIT-003: Integration tests must Fail() when required infrastructure unavailable"
        echo "   Example fix: Use Fail() instead of Skip() for mandatory audit infrastructure"
        echo ""
        VIOLATIONS=$((VIOLATIONS + 1))
    fi
fi

# ========================================
# CHECK 3: time.Sleep() Anti-Pattern Detection
# ========================================
echo ""
echo "🔍 Checking for time.Sleep() without approved exceptions..."

if [ -n "$STAGED_TEST_FILES" ]; then
    # Check for time.Sleep() outside approved exception files
    SLEEP_VIOLATIONS=$(echo "$STAGED_TEST_FILES" | xargs grep -nH 'time\.Sleep(' 2>/dev/null | \
        grep -v 'crd_rapid_lifecycle_test.go' | \
        grep -v 'APPROVED EXCEPTION' || true)

    if [ -n "$SLEEP_VIOLATIONS" ]; then
        echo "⚠️  WARNING: time.Sleep() detected (use Eventually/Consistently preferred)"
        echo "$SLEEP_VIOLATIONS"
        echo ""
        echo "   Per TESTING_GUIDELINES.md: Prefer Eventually/Consistently for async validation"
        echo "   If intentional timing test, add ✅ APPROVED EXCEPTION comment"
        echo ""
        # Don't block commit, just warn
    fi
fi

# ========================================
# CHECK 4: Sensitive Data Leak Detection
# ========================================
echo ""
echo "🔍 Checking for sensitive data in staged files..."

STAGED_ALL=$(git diff --cached --name-only --diff-filter=ACM || true)

if [ -n "$STAGED_ALL" ]; then
    STAGED_DIFF=$(git diff --cached -U0 -- $STAGED_ALL | \
        grep -v '^---' | grep -v '^+++' | \
        grep '^+' | \
        grep -v 'pre-commit:allow-sensitive' || true)

    LEAK_MATCHES=""

    # 4a: Cloud provider API endpoint URLs
    EP_MATCHES=$(echo "$STAGED_DIFF" | \
        grep -iE '(aiplatform|bigquery|storage|compute|run)\.googleapis\.com' || true)
    EP_MATCHES="${EP_MATCHES}$(echo "$STAGED_DIFF" | \
        grep -iE '(execute-api|lambda|sagemaker|bedrock)\.[a-z0-9-]+\.amazonaws\.com' || true)"
    EP_MATCHES="${EP_MATCHES}$(echo "$STAGED_DIFF" | \
        grep -iE '(cognitiveservices|openai)\.azure\.com' || true)"
    if [ -n "$EP_MATCHES" ]; then
        LEAK_MATCHES="${LEAK_MATCHES}${EP_MATCHES}\n"
    fi

    # 4b: Cloud project/account/subscription identifiers in config fields
    ID_MATCHES=$(echo "$STAGED_DIFF" | \
        grep -iE '(project_id|account_id|subscription_id|tenant_id)\s*[:=]\s*"[^"]+' | \
        grep -ivE '(your-|example-|placeholder|REDACTED|<|TODO|""$)' || true)
    if [ -n "$ID_MATCHES" ]; then
        LEAK_MATCHES="${LEAK_MATCHES}${ID_MATCHES}\n"
    fi

    # 4c: Well-known API key formats
    KEY_MATCHES=$(echo "$STAGED_DIFF" | \
        grep -oE 'sk-[A-Za-z0-9_-]{20,}' || true)
    KEY_MATCHES="${KEY_MATCHES}$(echo "$STAGED_DIFF" | \
        grep -oE 'ghp_[A-Za-z0-9]{36,}' || true)"
    KEY_MATCHES="${KEY_MATCHES}$(echo "$STAGED_DIFF" | \
        grep -oE 'ghs_[A-Za-z0-9]{36,}' || true)"
    KEY_MATCHES="${KEY_MATCHES}$(echo "$STAGED_DIFF" | \
        grep -oE 'AKIA[A-Z0-9]{16}' || true)"
    KEY_MATCHES="${KEY_MATCHES}$(echo "$STAGED_DIFF" | \
        grep -oE 'xox[bpras]-[A-Za-z0-9-]{10,}' || true)"
    if [ -n "$KEY_MATCHES" ]; then
        LEAK_MATCHES="${LEAK_MATCHES}${KEY_MATCHES}\n"
    fi

    # 4d: Absolute paths to credential files
    CRED_PATH_MATCHES=$(echo "$STAGED_DIFF" | \
        grep -E '(/home/|/Users/|/root/|/etc/|/var/)[^ ]*credentials[^ ]*\.(json|yaml|yml|key|pem)' | \
        grep -v '_test\.go' | \
        grep -v 'GOOGLE_APPLICATION_CREDENTIALS' || true)
    if [ -n "$CRED_PATH_MATCHES" ]; then
        LEAK_MATCHES="${LEAK_MATCHES}${CRED_PATH_MATCHES}\n"
    fi

    if [ -n "$LEAK_MATCHES" ]; then
        echo "❌ VIOLATION: Potential sensitive data detected in staged changes"
        echo ""
        echo "$LEAK_MATCHES" | head -20
        echo ""
        echo "   Sensitive data (cloud endpoints, project IDs, API keys, credential paths)"
        echo "   must not be committed to the repository."
        echo ""
        echo "   To fix: move values to Kubernetes Secrets, Helm values files (.gitignored),"
        echo "   or environment variables."
        echo "   To override: add '# pre-commit:allow-sensitive' on the flagged line."
        echo ""
        VIOLATIONS=$((VIOLATIONS + 1))
    fi
fi

# ========================================
# CHECK 5: CRD Docs Drift Detection
# ========================================
echo ""
echo "🔍 Checking if CRD docs are up to date..."

STAGED_TYPES=$(git diff --cached --name-only --diff-filter=ACM | grep -E '(api/.*_types\.go|pkg/shared/types/.*)$' || true)

if [ -n "$STAGED_TYPES" ]; then
    if command -v make &>/dev/null && [ -f Makefile ]; then
    _CRD_TMPDIR=$(mktemp -d)
    cp docs/generated/crds.md "$_CRD_TMPDIR/crds-before.md" 2>/dev/null || true
    make generate-crd-docs >/dev/null 2>&1 || true
    if [ -f "$_CRD_TMPDIR/crds-before.md" ] && ! diff -q "$_CRD_TMPDIR/crds-before.md" docs/generated/crds.md >/dev/null 2>&1; then
        echo "❌ VIOLATION: CRD docs are out of date"
        echo "   Staged api/*_types.go or pkg/shared/types/ changes require regenerating CRD docs."
        echo "   Run: make generate-crd-docs"
        echo "   Then stage: git add docs/generated/crds.md"
        echo ""
        cp "$_CRD_TMPDIR/crds-before.md" docs/generated/crds.md
        VIOLATIONS=$((VIOLATIONS + 1))
    fi
    rm -rf "$_CRD_TMPDIR"
    fi
fi

# ========================================
# CHECK 6: CRD-to-OpenAPI Enum Drift Detection (Issue #838)
# ========================================
echo ""
echo "🔍 Checking for CRD-to-OpenAPI enum drift..."

STAGED_ENUM_SOURCES=$(git diff --cached --name-only --diff-filter=ACM | \
    grep -E '(api/openapi/.*\.yaml|api/.*_types\.go)$' || true)

if [ -n "$STAGED_ENUM_SOURCES" ]; then
    # Regenerate CRDs from Go types to catch any uncommitted drift
    if command -v make &>/dev/null && [ -f Makefile ]; then
        make manifests >/dev/null 2>&1 || true
    fi

    # Compare CRD enums against OpenAPI spec
    if command -v go &>/dev/null; then
        if ! go run ./scripts/validation/check-openapi-crd-enum-drift/... . 2>/dev/null; then
            echo "❌ VIOLATION: CRD-to-OpenAPI enum drift detected"
            echo "   CRD enums contain values missing from the OpenAPI spec."
            echo "   The ogen-generated client will reject these at runtime (Issue #838)."
            echo ""
            echo "   To fix: add missing values to api/openapi/data-storage-v1.yaml"
            echo "   Then run: make generate-datastorage-client"
            echo ""
            VIOLATIONS=$((VIOLATIONS + 1))
        fi
    fi
fi

# ========================================
# RESULT
# ========================================
echo ""
if [ $VIOLATIONS -gt 0 ]; then
    echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
    echo "❌ COMMIT BLOCKED: $VIOLATIONS anti-pattern violation(s) detected"
    echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
    echo ""
    echo "📚 References:"
    echo "   - docs/development/business-requirements/TESTING_GUIDELINES.md"
    echo "   - docs/handoff/NT_TEST_ANTI_PATTERN_TRIAGE_DEC_17_2025.md"
    echo "   - .golangci.yml (forbidigo rules)"
    echo ""
    exit 1
else
    echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
    echo "✅ PASS: No anti-pattern violations detected"
    echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
    echo ""
    exit 0
fi

