# Caddy Configuration for Tale Platform
# Single entry point for both local development and production
#
# Supports blue-green deployments with automatic health-based routing
#
# TLS Configuration (environment variables):
# - TLS_MODE: Certificate mode
#   - "selfsigned" (default): Self-signed certificates via Caddy's internal CA
#   - "letsencrypt": Free trusted certificates from Let's Encrypt
#
# - TLS_EMAIL: Email for Let's Encrypt notifications (optional but recommended)
#
# Users always access via HTTPS regardless of TLS mode.
#
# Development: https://tale.local (TLS_MODE=selfsigned)
# Production:  https://yourdomain.com (TLS_MODE=letsencrypt)
#
# Blue-Green Deployment:
# - Routes to "platform" DNS alias (resolves to whichever color is running)
# - Only one platform instance can be healthy at a time (DB constraint)
# - Shows loading page during the brief transition window

# Global options
{
	default_sni {$HOST:tale.local}
}

# Health check endpoint (internal only, not exposed externally)
# Separate port avoids interfering with Caddy's automatic ACME challenge handling on :80
:2020 {
	respond /health "OK" 200
}

# ============================================================================
# Docs site — separate host block proxying to services/docs.
# The Caddy entrypoint substitutes the TLS placeholder line below and the
# {$DOCS_ORIGIN} placeholder on the next line.
# ============================================================================
{$DOCS_ORIGIN:https://docs.tale.local} {
	# TLS_PLACEHOLDER

	log {
		output stdout
		format console
		level INFO
	}

	handle /health {
		respond "OK" 200
	}

	encode gzip zstd

	reverse_proxy docs:3002 {
		health_uri /api/health
		health_interval 5s
		health_timeout 3s
		health_status 200

		fail_duration 10s
		max_fails 2
		unhealthy_status 500 502 503 504

		header_up Host {host}
		header_up X-Real-IP {remote_host}
		header_up X-Forwarded-For {remote_host}
		header_up X-Forwarded-Proto {scheme}
	}

	header {
		-Server
	}
}

{$SITE_ORIGIN:https://tale.local} {
	# TLS Configuration - This line is replaced by entrypoint based on TLS_MODE
	# DO NOT MODIFY - entrypoint will replace this placeholder
	# TLS_PLACEHOLDER

	log {
		output stdout
		format console
		level INFO
	}

	# Strip base path prefix for subpath deployments (no-op when empty)
	# BASE_PATH_PLACEHOLDER

	# Remove WebSocket compression extension header before proxying
	# This prevents "Invalid frame header" errors caused by permessage-deflate
	# compression negotiation issues between browser, Caddy, and backend
	request_header -Sec-WebSocket-Extensions

	# Health check endpoint for the proxy itself
	handle /health {
		respond "OK" 200
	}

	# ============================================================================
	# Direct Convex Proxies (bypass Express for better WebSocket stability)
	# ============================================================================

	# WebSocket: Main app Convex sync (/ws_api/* -> convex:3210)
	handle /ws_api/* {
		uri strip_prefix /ws_api
		reverse_proxy convex:3210
	}

	# WebSocket: Dashboard Convex sync (/api/:version/sync -> convex:3210)
	handle /api/*/sync {
		reverse_proxy convex:3210
	}

	# HTTP: Convex site proxy for actions (/http_api/* -> convex:3211)
	handle /http_api/* {
		uri strip_prefix /http_api
		reverse_proxy convex:3211
	}

	# HTTP: Convex Dashboard static assets at root paths (logo, icons)
	# Dashboard HTML references these at root but they're served under /convex-dashboard/
	@convexDashboardAssets {
		path /convex-logo-only.svg /convex-light.svg /apple-touch-icon.png
	}
	handle @convexDashboardAssets {
		uri replace / /convex-dashboard/ 1
		reverse_proxy convex:6791
	}

	# Redirect dashboard routes at root to /convex-dashboard prefix
	# Dashboard "Go to Reference" opens new tabs without the prefix
	@convexDashboardRedirect {
		path /data /data/* /functions /functions/* /logs /logs/* /history /history/* /settings /settings/* /files /files/* /schedules /schedules/*
		query component=*
	}
	redir @convexDashboardRedirect {$BASE_PATH:}/convex-dashboard{uri} temporary

	# HTTP: Convex Dashboard UI (/convex-dashboard* -> convex:6791)
	handle /convex-dashboard* {
		reverse_proxy convex:6791
	}

	# HTTP: Convex internal action callbacks (/api/actions/* -> convex:3210)
	handle /api/actions/* {
		reverse_proxy convex:3210
	}

	# HTTP: Sandbox callback API (/api/sandbox/* -> convex:3211)
	# Must come BEFORE the generic /api/* block so the more specific path
	# wins. Skips access logging by default since the path itself is HMAC-
	# authenticated and adds nothing diagnostically. The spawner is the
	# only legitimate caller; runtime containers cannot reach proxy.
	handle /api/sandbox/* {
		log_skip
		reverse_proxy convex:3211
	}

	# HTTP: Convex storage upload/download (/api/storage/* -> convex:3210)
	# `log_skip`: Convex's `generateUploadUrl()` embeds a 1-hour upload
	# token in the URL's query string. Default INFO-level access logs
	# would write that token to stdout (audit finding R2V7). The path
	# itself is auth-bound by the token; access logging adds no security
	# value here.
	handle /api/storage/* {
		log_skip
		reverse_proxy convex:3210
	}

	# HTTP: Convex Dashboard admin API (Authorization: Convex* header -> convex:3210)
	@convexAdminApi {
		path /api/*
		header Authorization Convex*
	}
	handle @convexAdminApi {
		reverse_proxy convex:3210
	}

	# HTTP: All other /api/* requests -> Convex HTTP endpoint (convex:3211)
	# Covers auth, SSO, documents, API gateway, workflow triggers, etc.
	handle /api/* {
		reverse_proxy convex:3211
	}

	# ============================================================================
	# Metrics Proxy (enabled when METRICS_BEARER_TOKEN is set)
	# Proxies /metrics/{service} → internal {service}:{port}/metrics
	# ============================================================================

	@metricsAuth {
		path /metrics/crawler /metrics/rag /metrics/platform /metrics/convex
		expression `"{$METRICS_BEARER_TOKEN:}" != ""`
		header Authorization "Bearer {$METRICS_BEARER_TOKEN:}"
	}
	handle @metricsAuth {
		handle /metrics/crawler {
			rewrite * /metrics
			reverse_proxy crawler:8002
		}
		handle /metrics/rag {
			rewrite * /metrics
			reverse_proxy rag:8001
		}
		handle /metrics/platform {
			rewrite * /metrics
			reverse_proxy platform:3000
		}
		handle /metrics/convex {
			rewrite * /metrics/convex
			reverse_proxy platform:3000
		}
	}

	# Block all other /metrics requests (no token, wrong token, or unknown service)
	# Returns 401 so scrapers (e.g. Grafana Cloud) know auth is required
	@metricsBlock {
		path_regexp (?i)^/metrics(/.*)?$
	}
	handle @metricsBlock {
		header WWW-Authenticate "Bearer"
		respond "Unauthorized" 401
	}

	# ============================================================================
	# WebDAV uploads — defense-in-depth body cap (RAM blow-up protection)
	# ============================================================================
	# The platform process enforces a per-method limit (XML methods ~64 KB,
	# PUT default 5 GB) but a malicious client streaming an unbounded body
	# can still tie up Caddy's buffer. Cap at the proxy too. Driven by the
	# same WEBDAV_MAX_PUT_BYTES the platform uses (a raw byte count; Caddy
	# accepts a plain integer as bytes) so the two ceilings stay in sync.
	# Operators raising the platform limit above 5 GB must also set this env
	# on the proxy container, or the proxy stays the binding cap (default 5 GB).
	handle /dav/* {
		request_body {
			max_size {$WEBDAV_MAX_PUT_BYTES:5GB}
		}
		reverse_proxy platform:3000 {
			# Forward the real client IP. The WebDAV failed-auth throttle
			# keys its per-IP bucket on the first X-Forwarded-For hop, so an
			# attacker is rate-limited by their own source rather than locking
			# out a whole org. Without this the platform sees only Caddy's IP.
			header_up Host {host}
			header_up X-Real-IP {remote_host}
			header_up X-Forwarded-For {remote_host}
			header_up X-Forwarded-Proto {scheme}
		}
	}

	# ============================================================================
	# Express Server (static files + HTML env injection only)
	# ============================================================================

	# Default: Express server for static files and index.html with env injection
	reverse_proxy platform:3000 {
		# Active health checking. Interval is 5s (not 2s): the platform has a
		# long cold boot (Convex connect + asset load — see its 180s compose
		# start_period), and a 2s probe logged a failed health check every two
		# seconds until it came up, which read as "repeated proxy failures"
		# during normal startup (#1447). Caddy still routes correctly once the
		# backend is up; 5s matches the docs upstream and cuts the boot noise.
		health_uri /api/health
		health_interval 5s
		health_timeout 2s
		health_status 200
		health_passes 2

		# Passive health checking (circuit breaker)
		fail_duration 10s
		max_fails 2
		unhealthy_status 500 502 503 504

		# Retry on failure
		lb_try_duration 5s
		lb_try_interval 250ms

		# Headers
		header_up Host {host}
		header_up X-Real-IP {remote_host}
		header_up X-Forwarded-For {remote_host}
		header_up X-Forwarded-Proto {scheme}
	}

	# Encode responses, but skip WebSocket endpoints
	@notWebSocket {
		not path /api/*/sync
		not path /ws_api/*
	}
	encode @notWebSocket gzip zstd

	# Loading page when platform is unavailable
	# During deployments, there's a brief window when the old platform loses
	# its DB connection and the new one isn't ready yet.
	# Shows a friendly loading page instead of a raw 502/503 error.
	handle_errors 502 503 504 {
		root * /var/www
		rewrite * /maintenance.html
		file_server
	}

	header {
		-Server
		# Content security headers (CSP, HSTS, X-Frame-Options, X-Content-Type-Options,
		# Referrer-Policy, Permissions-Policy) are emitted by the platform server via
		# Hono `secureHeaders` middleware (see services/platform/server.ts) so the
		# app is the single source of truth. Caddy keeps only transport concerns.
	}

}
