#!/usr/bin/env racket
#lang racket/base

;; beagle-normalize-ms: rewrite legacy cursed (ms "STR-WITH-\n") forms
;; in .bnix files to the canonical ~''…'' reader form.
;;
;; Usage:
;;   bin/beagle-normalize-ms <file.bnix|dir>...
;;     --dry-run    print diffs instead of writing
;;     --check      exit non-zero if any file would change (CI gate)
;;
;; The transform is local: each (ms "…") whose string contains a
;; literal newline becomes a ~''…'' block placed inline at the same
;; position, with body indented relative to the column where `(ms`
;; started. All other source text is preserved byte-for-byte.

(require racket/string
         racket/port
         racket/file
         racket/list
         racket/path
         racket/cmdline)

(define DRY-RUN? (make-parameter #f))
(define CHECK?   (make-parameter #f))
(define VERBOSE? (make-parameter #f))

;; --- string scanning -------------------------------------------------------

;; Read a Racket-style "…" string literal starting at text[start] (which
;; must be #\"). Returns (values end-index raw-unescaped-text).
;; Handles \", \\, \n, \t, \r and 1-char unknown escapes (pass through).
(define (read-racket-string text start)
  (unless (char=? (string-ref text start) #\")
    (error 'read-racket-string "expected opening \" at ~a" start))
  (define n (string-length text))
  (define out (open-output-string))
  (let loop ([i (+ start 1)])
    (cond
      [(>= i n) (error 'read-racket-string "unterminated string from ~a" start)]
      [(char=? (string-ref text i) #\")
       (values (+ i 1) (get-output-string out))]
      [(char=? (string-ref text i) #\\)
       (cond
         [(>= (+ i 1) n) (error 'read-racket-string "trailing backslash at ~a" i)]
         [else
          (define c (string-ref text (+ i 1)))
          (case c
            [(#\n) (write-char #\newline out)]
            [(#\t) (write-char #\tab out)]
            [(#\r) (write-char #\return out)]
            [(#\\) (write-char #\\ out)]
            [(#\") (write-char #\" out)]
            [else  (write-char c out)])
          (loop (+ i 2))])]
      [else
       (write-char (string-ref text i) out)
       (loop (+ i 1))])))

;; Skip whitespace forward; returns new index.
(define (skip-ws text i)
  (define n (string-length text))
  (let loop ([j i])
    (cond
      [(>= j n) j]
      [(memv (string-ref text j) '(#\space #\tab #\newline #\return)) (loop (+ j 1))]
      [else j])))

;; Walk forward through balanced parens/brackets/braces starting at the
;; first char AFTER an opening delimiter. Returns position past the
;; matching close. Strings (single-quoted "" and ~"") and line comments
;; (;) are respected. Used to skip over (ms forms we don't want to
;; rewrite (multi-operand or interp-bearing).
(define (scan-balanced text start)
  (define n (string-length text))
  (let loop ([i start] [depth 1])
    (cond
      [(>= i n) i]
      [(zero? depth) i]
      [else
       (define c (string-ref text i))
       (case c
         [(#\( #\[ #\{) (loop (+ i 1) (+ depth 1))]
         [(#\) #\] #\}) (if (= depth 1) (+ i 1) (loop (+ i 1) (- depth 1)))]
         [(#\")
          (define-values (end _txt) (read-racket-string text i))
          (loop end depth)]
         [(#\;)
          ;; Line comment to EOL.
          (let comment-loop ([k i])
            (cond
              [(>= k n) (loop k depth)]
              [(char=? (string-ref text k) #\newline) (loop (+ k 1) depth)]
              [else (comment-loop (+ k 1))]))]
         [else (loop (+ i 1) depth)])])))

;; Compute column number (0-indexed) of text[idx] by walking back to the
;; start of its line.
(define (column-of text idx)
  (let loop ([j idx])
    (cond
      [(<= j 0) j]
      [(char=? (string-ref text (- j 1)) #\newline) (- idx j)]
      [else (loop (- j 1))])))

;; --- body rendering --------------------------------------------------------

(define (escape-tilde-body s)
  (define s1 (regexp-replace* #rx"''" s "'''"))
  (regexp-replace* #rx"\\$\\{" s1 "''${"))

;; render-tilde-multi (below) handles both single- and multi-operand
;; (ms …) — the single-operand path uses a one-element operand list.

;; --- find + rewrite (ms "…") sites -----------------------------------------

;; Read one operand of an (ms …) form starting at text[i]. Returns
;; (values kind text-substring unescaped-or-#f next-index)
;;   kind: 'string | 'expr
;; Strings come back with their Racket-string-literal escapes resolved
;; in `unescaped`. Non-string operands return the verbatim source text;
;; `unescaped` is #f.
(define (read-ms-operand text i)
  (define n (string-length text))
  (cond
    [(>= i n) (error 'read-ms-operand "EOF mid-operand")]
    [(char=? (string-ref text i) #\")
     (define-values (end raw) (read-racket-string text i))
     (values 'string (substring text i end) raw end)]
    [(memv (string-ref text i) '(#\( #\[ #\{))
     (define end (scan-balanced text (+ i 1)))
     (values 'expr (substring text i end) #f end)]
    [else
     ;; Atomic operand: symbol/number/keyword/etc. Read until ws, close,
     ;; or quote.
     (let loop ([j i])
       (cond
         [(>= j n) (values 'expr (substring text i j) #f j)]
         [(memv (string-ref text j)
                '(#\space #\tab #\newline #\return
                  #\) #\] #\} #\" #\( #\[ #\{ #\;))
          (values 'expr (substring text i j) #f j)]
         [else (loop (+ j 1))]))]))

;; Try to interpret text[i..] as an (ms …) form (single- or multi-
;; operand). Returns:
;;   (values 'rewrite end-idx replacement-text)
;;     — any string operand contains a literal \n, so it's the legacy
;;       cursed form from the old importer; rewrite it.
;;   (values 'skip end-idx #f)
;;     — (ms …) but every string operand is single-line (canonical
;;       multi-operand form) — leave alone.
;;   (values #f #f #f)
;;     — not an (ms …) form at all.
(define (try-rewrite-ms text i)
  (define n (string-length text))
  (cond
    [(not (and (<= (+ i 4) n)
               (string=? (substring text i (+ i 3)) "(ms")
               (memv (string-ref text (+ i 3))
                     '(#\space #\tab #\newline #\return))))
     (values #f #f #f)]
    [else
     (define open-col (column-of text i))
     ;; Collect operands until we hit ')'.
     (let collect ([k (skip-ws text (+ i 3))]
                   [operands '()])
       (cond
         [(>= k n) (values #f #f #f)]
         [(char=? (string-ref text k) #\))
          (define operands* (reverse operands))
          (define any-multi-line?
            (for/or ([op (in-list operands*)])
              (and (eq? (car op) 'string)
                   (regexp-match? #rx"\n" (caddr op)))))
          (cond
            [any-multi-line?
             ;; CURSED legacy form (post-importer). Rewrite.
             (values 'rewrite (+ k 1)
                     (render-tilde-multi operands* open-col))]
            [else
             ;; Canonical multi-operand or single-line single-string.
             (values 'skip (+ k 1) #f)])]
         [else
          (define-values (kind src raw end) (read-ms-operand text k))
          (collect (skip-ws text end)
                   (cons (list kind src raw) operands))]))]))

;; Render a list of (kind src raw) operands as a ~''…'' block. String
;; operands contribute their raw content; expression operands contribute
;; ${SRC} markers. Same indentation rules as render-tilde.
(define (render-tilde-multi operands col-open)
  (define body
    (apply string-append
      (for/list ([op (in-list operands)])
        (define kind (car op))
        (define src (cadr op))
        (define raw (caddr op))
        (cond
          [(eq? kind 'string) (escape-tilde-body raw)]
          [else
           ;; Embed the expression source verbatim. The bnix reader
           ;; expects single-line ${…}; if the expression source spans
           ;; lines, collapse to one (typical operands are short
           ;; identifiers or qualified-name calls).
           (format "${~a}"
                   (regexp-replace* #rx"\n[ \t]*" src " "))]))))
  ;; The body already has '' → ''' and ${ → ''${ applied via
  ;; escape-tilde-body for string operands. Expression operands emit
  ;; their own ${…} markers which must NOT be escaped — they're real
  ;; interps.
  (define body-ind (make-string (+ col-open 2) #\space))
  (define close-ind (make-string col-open #\space))
  (define lines (string-split body "\n" #:trim? #f))
  (define indented-lines
    (for/list ([l (in-list lines)])
      (cond
        [(regexp-match? #rx"^[ \t]*$" l) ""]
        [else (string-append body-ind l)])))
  (string-append "~''\n"
                 (string-join indented-lines "\n")
                 "\n" close-ind "''"))

(define (normalize-text text)
  (define n (string-length text))
  (define out (open-output-string))
  (define changes 0)
  (let loop ([i 0])
    (cond
      [(>= i n) (void)]
      [else
       (define c (string-ref text i))
       (case c
         [(#\")
          (define-values (end _raw) (read-racket-string text i))
          (display (substring text i end) out)
          (loop end)]
         [(#\;)
          (let comment-loop ([k i])
            (cond
              [(>= k n) (display (substring text i k) out) (loop k)]
              [(char=? (string-ref text k) #\newline)
               (display (substring text i (+ k 1)) out)
               (loop (+ k 1))]
              [else (comment-loop (+ k 1))]))]
         [(#\()
          (define-values (kind end replacement) (try-rewrite-ms text i))
          (cond
            [(eq? kind 'rewrite)
             (display replacement out)
             (set! changes (+ changes 1))
             (loop end)]
            [else
             (display c out)
             (loop (+ i 1))])]
         [else
          (display c out)
          (loop (+ i 1))])]))
  (values (get-output-string out) changes))

;; --- CLI -------------------------------------------------------------------

(define (process-file path)
  (define text (file->string path))
  (define-values (new changes) (normalize-text text))
  (cond
    [(zero? changes)
     (when (VERBOSE?) (printf "~a: unchanged~n" path))
     #f]
    [(DRY-RUN?)
     (printf "~a: would normalise ~a (ms …) form(s)~n" path changes)
     (when (VERBOSE?) (printf "----- new contents -----~n~a~n" new))
     #t]
    [else
     (with-output-to-file path #:exists 'replace
       (lambda () (display new)))
     (printf "~a: normalised ~a form(s)~n" path changes)
     #t]))

(define (collect-bnix-files paths)
  (apply append
    (for/list ([p (in-list paths)])
      (define pp (string->path p))
      (cond
        [(directory-exists? pp)
         (for/list ([f (in-directory pp)]
                    #:when (equal? (path-get-extension f) #".bnix"))
           f)]
        [(file-exists? pp) (list pp)]
        [else (error 'beagle-normalize-ms "no such path: ~a" p)]))))

(define (main)
  (define targets
    (command-line
     #:program "beagle-normalize-ms"
     #:once-each
     [("--dry-run") "Print summary and don't write" (DRY-RUN? #t)]
     [("--check") "Exit non-zero if any file would change" (CHECK? #t) (DRY-RUN? #t)]
     [("-v" "--verbose") "Verbose output" (VERBOSE? #t)]
     #:args paths
     (when (null? paths)
       (eprintf "usage: beagle-normalize-ms [--dry-run] [--check] <file|dir>...~n")
       (exit 2))
     paths))
  (define files (collect-bnix-files targets))
  (define any-changed? #f)
  (for ([f (in-list files)])
    (when (process-file f) (set! any-changed? #t)))
  (when (and (CHECK?) any-changed?)
    (exit 1)))

(main)
