#!/bin/bash
# pre-launch — Launch PRE (Personal Reasoning Engine) from any directory
#
# Connects to Ollama (default port 11434) running Gemma 4 31B.
# Ollama must be running (e.g., via Ollama.app or `ollama serve`).

set -e

# Resolve symlinks so we find binaries even when called via ~/.local/bin symlink
SOURCE="$0"
while [ -L "$SOURCE" ]; do
    DIR="$(cd "$(dirname "$SOURCE")" && pwd)"
    SOURCE="$(readlink "$SOURCE")"
    # Handle relative symlinks
    [[ "$SOURCE" != /* ]] && SOURCE="$DIR/$SOURCE"
done
PRE_DIR="$(cd "$(dirname "$SOURCE")" && pwd)"
PRE_BIN="$PRE_DIR/pre"
PORT="${PRE_PORT:-11434}"

# Read configured context window (written by install.sh based on RAM)
MODEL_CTX=131072
CTX_FILE="$HOME/.pre/context"
if [ -f "$CTX_FILE" ]; then
    read -r _ctx < "$CTX_FILE" 2>/dev/null
    _ctx="${_ctx//[!0-9]/}"
    if [ -n "$_ctx" ] && [ "$_ctx" -ge 2048 ] 2>/dev/null && [ "$_ctx" -le 262144 ] 2>/dev/null; then
        MODEL_CTX="$_ctx"
    fi
fi

# Detect GPU for status display
EGPU_STATUS=""
if [ "$(uname -s)" = "Linux" ]; then
    # Linux: show NVIDIA GPU info if available
    if command -v nvidia-smi &>/dev/null; then
        GPU_NAME=$(nvidia-smi --query-gpu=name --format=csv,noheader 2>/dev/null | head -1 || true)
        [ -n "$GPU_NAME" ] && EGPU_STATUS=" + $GPU_NAME (CUDA)"
    fi
else
    # macOS: detect eGPU (TinyGPU)
    if systemextensionsctl list 2>/dev/null | grep -qi "tinygpu"; then
        EGPU_STATUS=" + eGPU (TinyGPU)"
    fi
fi

# Check if Ollama is running
if ! curl -sf http://localhost:${PORT}/v1/models >/dev/null 2>&1; then
    echo "  Ollama not running. Starting..."
    ollama serve >/dev/null 2>&1 &
    sleep 2
    if ! curl -sf http://localhost:${PORT}/v1/models >/dev/null 2>&1; then
        echo "Error: Cannot connect to Ollama on port $PORT"
        echo "Start Ollama.app or run: ollama serve"
        exit 1
    fi
fi

# Verify the model is available
if ! ollama list 2>/dev/null | grep -q "pre-gemma4"; then
    echo "  Custom model not found. Creating from Modelfile..."
    if [ -f "$PRE_DIR/Modelfile" ]; then
        ollama create pre-gemma4 -f "$PRE_DIR/Modelfile" || {
            echo "Error: Failed to create pre-gemma4 model"
            exit 1
        }
    else
        echo "Error: pre-gemma4 not found and no Modelfile at $PRE_DIR/Modelfile"
        echo "Run: cd $PRE_DIR && ollama create pre-gemma4 -f Modelfile"
        exit 1
    fi
fi

# Pre-warm the model into GPU memory (avoids cold-start TTFT penalty).
# CRITICAL: Send a real message with explicit num_ctx to force full KV cache
# allocation. Ollama loads lazily — empty messages defer KV allocation to the
# first real request. Explicit num_ctx ensures the KV cache is fully allocated
# during warmup, not during the user's first prompt.
if ! ollama ps 2>/dev/null | grep -q "pre-gemma4"; then
    echo "  Loading model into GPU (context: ${MODEL_CTX} tokens)..."
    # Send a trivial real request with matching num_ctx to force full load.
    curl -sf --max-time 300 http://localhost:${PORT}/api/chat \
        -d "{\"model\":\"pre-gemma4\",\"messages\":[{\"role\":\"user\",\"content\":\"hi\"}],\"keep_alive\":\"24h\",\"options\":{\"num_predict\":1,\"num_ctx\":${MODEL_CTX}}}" \
        >/dev/null 2>&1 || true
    # Verify it loaded
    if ! ollama ps 2>/dev/null | grep -q "pre-gemma4"; then
        echo "  Warning: Model may not have fully loaded. First request may be slow."
    fi
fi

echo "  Ollama running on port $PORT.${EGPU_STATUS}"

if [ ! -x "$PRE_BIN" ]; then
    echo "Error: PRE binary not found at $PRE_BIN"
    echo "Run: cd $PRE_DIR && make pre"
    exit 1
fi

# Start the web GUI server in the background (if node is available)
WEB_DIR="$PRE_DIR/../web"
WEB_PORT="${PRE_WEB_PORT:-7749}"
WEB_PID=""
WEB_PID_FILE="$HOME/.pre/web-gui.pid"
WEB_STARTED_BY_US=false

if [ -f "$WEB_DIR/server.js" ] && command -v node >/dev/null 2>&1; then
    # Check if a server is already running on our port
    EXISTING_PID=$(lsof -ti :"$WEB_PORT" -sTCP:LISTEN 2>/dev/null | head -1 || ss -tlnp "sport = :$WEB_PORT" 2>/dev/null | grep -oP 'pid=\K\d+' | head -1)

    if [ -n "$EXISTING_PID" ]; then
        # Check if it's managed by launchd (PPID=1) — don't touch it
        EXISTING_PPID=$(ps -p "$EXISTING_PID" -o ppid= 2>/dev/null | tr -d ' ')
        if [ "$EXISTING_PPID" = "1" ]; then
            echo "  Web GUI running via LaunchAgent (PID $EXISTING_PID) at http://localhost:${WEB_PORT}"
        else
            # Not launchd-managed — stop it and start fresh
            if ps -p "$EXISTING_PID" -o command= 2>/dev/null | grep -q "node.*server.js"; then
                echo "  Stopping previous web GUI (PID $EXISTING_PID)..."
                kill "$EXISTING_PID" 2>/dev/null
                sleep 1
                kill -0 "$EXISTING_PID" 2>/dev/null && kill -9 "$EXISTING_PID" 2>/dev/null
            fi
            EXISTING_PID=""
        fi
    fi

    # Clean up stale PID file
    if [ -f "$WEB_PID_FILE" ]; then
        OLD_PID=$(cat "$WEB_PID_FILE" 2>/dev/null)
        if [ -n "$OLD_PID" ] && ! kill -0 "$OLD_PID" 2>/dev/null; then
            rm -f "$WEB_PID_FILE"
        fi
    fi

    # Only start a new server if nothing is listening on the port
    if [ -z "$EXISTING_PID" ]; then
        PRE_CWD="$(pwd)" PRE_WEB_PORT="$WEB_PORT" node "$WEB_DIR/server.js" >/dev/null 2>&1 &
        WEB_PID=$!
        echo "$WEB_PID" > "$WEB_PID_FILE"
        sleep 1
        if kill -0 "$WEB_PID" 2>/dev/null; then
            echo "  Web GUI running at http://localhost:${WEB_PORT}"
            WEB_STARTED_BY_US=true
        else
            echo "  Web GUI failed to start (run 'cd $WEB_DIR && npm install' first)"
            WEB_PID=""
            rm -f "$WEB_PID_FILE"
        fi
    fi
fi

# Cleanup: kill the web server when PRE exits (Ctrl+C, normal exit, etc.)
cleanup() {
    if [ -n "$WEB_PID" ] && $WEB_STARTED_BY_US; then
        kill "$WEB_PID" 2>/dev/null
        wait "$WEB_PID" 2>/dev/null
        rm -f "$WEB_PID_FILE"
    fi
}
trap cleanup EXIT
trap 'cleanup; exit 130' INT
trap 'cleanup; exit 143' TERM

# Launch PRE with current directory as working dir, forward all args
# Not using exec — we need the trap to fire on exit for cleanup.
"$PRE_BIN" --port "$PORT" --dir "$(pwd)" "$@"
