#!/usr/bin/env bash
set -euo pipefail

PROJECT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
CONFIG_PATH="${MORAINE_CONFIG:-$HOME/.moraine/config.toml}"
WRITE_CONFIG=0

usage() {
  cat <<EOF
usage: $(basename "$0") [--config <path>] [--write-config]

options:
  --config <path>   config file path (default: MORAINE_CONFIG or ~/.moraine/config.toml)
  --write-config    explicitly allow writing a missing config by copying config/moraine.toml
EOF
}

while [[ $# -gt 0 ]]; do
  case "$1" in
    --config)
      if [[ $# -lt 2 ]]; then
        echo "--config requires a value" >&2
        exit 2
      fi
      CONFIG_PATH="$2"
      shift 2
      ;;
    --write-config)
      WRITE_CONFIG=1
      shift
      ;;
    --help|-h)
      usage
      exit 0
      ;;
    *)
      echo "unknown argument: $1" >&2
      usage >&2
      exit 2
      ;;
  esac
done

if [[ ! -f "$CONFIG_PATH" ]]; then
  if [[ "$WRITE_CONFIG" -eq 1 ]]; then
    mkdir -p "$(dirname "$CONFIG_PATH")"
    cp "$PROJECT_ROOT/config/moraine.toml" "$CONFIG_PATH"
    echo "wrote default config to $CONFIG_PATH"
  else
    echo "config not found: $CONFIG_PATH" >&2
    echo "refusing to write config without explicit opt-in" >&2
    echo "rerun with --write-config to copy $PROJECT_ROOT/config/moraine.toml" >&2
    exit 1
  fi
fi

config_get() {
  local key="$1"
  "$PROJECT_ROOT/bin/moraine" --config "$CONFIG_PATH" config get "$key"
}

CLICKHOUSE_URL="$(config_get "clickhouse.url")"
CLICKHOUSE_DB="$(config_get "clickhouse.database")"
CLICKHOUSE_USER="$(config_get "clickhouse.username")"
CLICKHOUSE_PASSWORD="$(config_get "clickhouse.password")"
CLICKHOUSE_URL="${CLICKHOUSE_URL:-http://127.0.0.1:8123}"
CLICKHOUSE_DB="${CLICKHOUSE_DB:-moraine}"

CURL_AUTH_ARGS=()
if [[ -n "$CLICKHOUSE_USER" ]]; then
  CURL_AUTH_ARGS+=(--user "${CLICKHOUSE_USER}:${CLICKHOUSE_PASSWORD}")
fi

clickhouse_curl() {
  curl -fsS "${CURL_AUTH_ARGS[@]}" "$@"
}

if ! clickhouse_curl "$CLICKHOUSE_URL/?query=SELECT%201" >/dev/null 2>&1; then
  echo "clickhouse is unavailable at $CLICKHOUSE_URL" >&2
  exit 1
fi

"$PROJECT_ROOT/bin/moraine" db migrate --config "$CONFIG_PATH"

run_sql() {
  local stmt="$1"
  clickhouse_curl --data-binary "$stmt" "$CLICKHOUSE_URL/?database=$CLICKHOUSE_DB" >/dev/null
}

echo "backfilling search index tables in $CLICKHOUSE_DB"

run_sql "TRUNCATE TABLE ${CLICKHOUSE_DB}.search_postings"
run_sql "TRUNCATE TABLE ${CLICKHOUSE_DB}.search_documents"

run_sql "INSERT INTO ${CLICKHOUSE_DB}.search_documents (doc_version, ingested_at, event_uid, compacted_parent_uid, session_id, session_date, source_name, harness, source_file, source_generation, source_line_no, source_offset, source_ref, record_ts, event_class, payload_type, actor_role, name, phase, text_content, payload_json, token_usage_json) SELECT event_version, ingested_at, event_uid, origin_event_id, session_id, session_date, source_name, harness, source_file, source_generation, source_line_no, source_offset, source_ref, record_ts, event_kind, payload_type, actor_kind, tool_name, if(tool_phase != '', tool_phase, op_status), text_content, payload_json, token_usage_json FROM ${CLICKHOUSE_DB}.events WHERE lengthUTF8(replaceRegexpAll(text_content, '\\\\s+', '')) > 0"

DOCS="$(clickhouse_curl "$CLICKHOUSE_URL/?query=SELECT%20count()%20FROM%20${CLICKHOUSE_DB}.search_documents")"
POSTINGS="$(clickhouse_curl "$CLICKHOUSE_URL/?query=SELECT%20count()%20FROM%20${CLICKHOUSE_DB}.search_postings")"
TERMS="$(clickhouse_curl "$CLICKHOUSE_URL/?query=SELECT%20count()%20FROM%20${CLICKHOUSE_DB}.search_term_stats")"
CORPUS_DOCS="$(clickhouse_curl "$CLICKHOUSE_URL/?query=SELECT%20sum(docs)%20FROM%20${CLICKHOUSE_DB}.search_corpus_stats")"

echo "search_documents: $DOCS"
echo "search_postings: $POSTINGS"
echo "search_term_stats (terms): $TERMS"
echo "search_corpus_stats (docs): $CORPUS_DOCS"
