Directory structure:
└── tree-sitter-tree-sitter/
    ├── README.md
    ├── build.zig
    ├── build.zig.zon
    ├── Cargo.toml
    ├── CONTRIBUTING.md
    ├── Dockerfile
    ├── FUNDING.json
    ├── LICENSE
    ├── Makefile
    ├── Package.swift
    ├── .dockerignore
    ├── .editorconfig
    ├── crates/
    │   ├── cli/
    │   │   ├── README.md
    │   │   ├── build.rs
    │   │   ├── Cargo.toml
    │   │   ├── benches/
    │   │   │   └── benchmark.rs
    │   │   ├── eslint/
    │   │   │   ├── index.js
    │   │   │   └── package.json
    │   │   ├── npm/
    │   │   │   ├── cli.js
    │   │   │   ├── dsl.d.ts
    │   │   │   ├── install.js
    │   │   │   └── package.json
    │   │   └── src/
    │   │       ├── fuzz.rs
    │   │       ├── highlight.rs
    │   │       ├── init.rs
    │   │       ├── input.rs
    │   │       ├── logger.rs
    │   │       ├── parse.rs
    │   │       ├── playground.html
    │   │       ├── playground.rs
    │   │       ├── query.rs
    │   │       ├── query_testing.rs
    │   │       ├── tags.rs
    │   │       ├── test_highlight.rs
    │   │       ├── test_tags.rs
    │   │       ├── tests.rs
    │   │       ├── tree_sitter_cli.rs
    │   │       ├── util.rs
    │   │       ├── version.rs
    │   │       ├── wasm.rs
    │   │       ├── fuzz/
    │   │       │   ├── allocations.rs
    │   │       │   ├── corpus_test.rs
    │   │       │   ├── edits.rs
    │   │       │   ├── random.rs
    │   │       │   └── scope_sequence.rs
    │   │       ├── templates/
    │   │       │   ├── __init__.py
    │   │       │   ├── __init__.pyi
    │   │       │   ├── _cargo.toml
    │   │       │   ├── binding.go
    │   │       │   ├── binding.gyp
    │   │       │   ├── binding_test.go
    │   │       │   ├── binding_test.js
    │   │       │   ├── build.rs
    │   │       │   ├── build.zig
    │   │       │   ├── build.zig.zon
    │   │       │   ├── cmakelists.cmake
    │   │       │   ├── gitattributes
    │   │       │   ├── gitignore
    │   │       │   ├── go.mod
    │   │       │   ├── grammar.js
    │   │       │   ├── index.d.ts
    │   │       │   ├── index.js
    │   │       │   ├── js-binding.cc
    │   │       │   ├── lib.rs
    │   │       │   ├── makefile
    │   │       │   ├── package.json
    │   │       │   ├── package.swift
    │   │       │   ├── PARSER_NAME.h
    │   │       │   ├── PARSER_NAME.pc.in
    │   │       │   ├── py-binding.c
    │   │       │   ├── pyproject.toml
    │   │       │   ├── root.zig
    │   │       │   ├── setup.py
    │   │       │   ├── test_binding.py
    │   │       │   ├── tests.swift
    │   │       │   └── .editorconfig
    │   │       └── tests/
    │   │           ├── async_context_test.rs
    │   │           ├── corpus_test.rs
    │   │           ├── detect_language.rs
    │   │           ├── helpers.rs
    │   │           ├── highlight_test.rs
    │   │           ├── language_test.rs
    │   │           ├── node_test.rs
    │   │           ├── parser_hang_test.rs
    │   │           ├── pathological_test.rs
    │   │           ├── tags_test.rs
    │   │           ├── test_highlight_test.rs
    │   │           ├── test_tags_test.rs
    │   │           ├── text_provider_test.rs
    │   │           ├── tree_test.rs
    │   │           ├── wasm_language_test.rs
    │   │           ├── helpers/
    │   │           │   ├── allocations.rs
    │   │           │   ├── dirs.rs
    │   │           │   ├── edits.rs
    │   │           │   ├── fixtures.rs
    │   │           │   └── query_helpers.rs
    │   │           └── proc_macro/
    │   │               ├── Cargo.toml
    │   │               └── src/
    │   │                   └── lib.rs
    │   ├── config/
    │   │   ├── README.md
    │   │   ├── Cargo.toml
    │   │   └── src/
    │   │       └── tree_sitter_config.rs
    │   ├── generate/
    │   │   ├── README.md
    │   │   ├── Cargo.toml
    │   │   └── src/
    │   │       ├── build_tables.rs
    │   │       ├── dedup.rs
    │   │       ├── dsl.js
    │   │       ├── generate.rs
    │   │       ├── grammars.rs
    │   │       ├── nfa.rs
    │   │       ├── parse_grammar.rs
    │   │       ├── prepare_grammar.rs
    │   │       ├── rules.rs
    │   │       ├── tables.rs
    │   │       ├── build_tables/
    │   │       │   ├── build_lex_table.rs
    │   │       │   ├── build_parse_table.rs
    │   │       │   ├── coincident_tokens.rs
    │   │       │   ├── item.rs
    │   │       │   ├── item_set_builder.rs
    │   │       │   ├── minimize_parse_table.rs
    │   │       │   └── token_conflicts.rs
    │   │       ├── prepare_grammar/
    │   │       │   ├── expand_repeats.rs
    │   │       │   ├── expand_tokens.rs
    │   │       │   ├── extract_default_aliases.rs
    │   │       │   ├── extract_tokens.rs
    │   │       │   ├── flatten_grammar.rs
    │   │       │   ├── intern_symbols.rs
    │   │       │   └── process_inlines.rs
    │   │       └── templates/
    │   │           ├── alloc.h
    │   │           └── array.h
    │   ├── highlight/
    │   │   ├── README.md
    │   │   ├── Cargo.toml
    │   │   ├── include/
    │   │   │   └── tree_sitter/
    │   │   │       └── highlight.h
    │   │   └── src/
    │   │       ├── c_lib.rs
    │   │       └── highlight.rs
    │   ├── language/
    │   │   ├── README.md
    │   │   ├── Cargo.toml
    │   │   └── src/
    │   │       └── language.rs
    │   ├── loader/
    │   │   ├── README.md
    │   │   ├── build.rs
    │   │   ├── Cargo.toml
    │   │   └── emscripten-version
    │   ├── tags/
    │   │   ├── README.md
    │   │   ├── Cargo.toml
    │   │   ├── include/
    │   │   │   └── tree_sitter/
    │   │   │       └── tags.h
    │   │   └── src/
    │   │       ├── c_lib.rs
    │   │       └── tags.rs
    │   └── xtask/
    │       ├── Cargo.toml
    │       └── src/
    │           ├── benchmark.rs
    │           ├── build_wasm.rs
    │           ├── bump.rs
    │           ├── check_wasm_exports.rs
    │           ├── clippy.rs
    │           ├── fetch.rs
    │           ├── generate.rs
    │           ├── main.rs
    │           ├── test.rs
    │           ├── upgrade_emscripten.rs
    │           └── upgrade_wasmtime.rs
    ├── docs/
    │   ├── book.toml
    │   └── src/
    │       ├── 3-syntax-highlighting.md
    │       ├── 4-code-navigation.md
    │       ├── 5-implementation.md
    │       ├── 6-contributing.md
    │       ├── 7-playground.md
    │       ├── index.md
    │       ├── SUMMARY.md
    │       ├── assets/
    │       │   ├── css/
    │       │   │   ├── mdbook-admonish.css
    │       │   │   └── playground.css
    │       │   ├── js/
    │       │   │   └── playground.js
    │       │   └── schemas/
    │       │       ├── config.schema.json
    │       │       └── grammar.schema.json
    │       ├── cli/
    │       │   ├── build.md
    │       │   ├── complete.md
    │       │   ├── dump-languages.md
    │       │   ├── fuzz.md
    │       │   ├── generate.md
    │       │   ├── highlight.md
    │       │   ├── index.md
    │       │   ├── init-config.md
    │       │   ├── init.md
    │       │   ├── parse.md
    │       │   ├── playground.md
    │       │   ├── query.md
    │       │   ├── tags.md
    │       │   ├── test.md
    │       │   └── version.md
    │       ├── creating-parsers/
    │       │   ├── 1-getting-started.md
    │       │   ├── 2-the-grammar-dsl.md
    │       │   ├── 3-writing-the-grammar.md
    │       │   ├── 4-external-scanners.md
    │       │   ├── 5-writing-tests.md
    │       │   ├── 6-publishing.md
    │       │   └── index.md
    │       └── using-parsers/
    │           ├── 1-getting-started.md
    │           ├── 2-basic-parsing.md
    │           ├── 3-advanced-parsing.md
    │           ├── 4-walking-trees.md
    │           ├── 6-static-node-types.md
    │           ├── index.md
    │           └── queries/
    │               ├── 1-syntax.md
    │               ├── 2-operators.md
    │               ├── 3-predicates-and-directives.md
    │               ├── 4-api.md
    │               └── index.md
    ├── lib/
    │   ├── README.md
    │   ├── Cargo.toml
    │   ├── CMakeLists.txt
    │   ├── tree-sitter.pc.in
    │   ├── .ccls
    │   ├── binding_rust/
    │   │   ├── README.md
    │   │   ├── build.rs
    │   │   ├── ffi.rs
    │   │   ├── util.rs
    │   │   └── wasm_language.rs
    │   ├── binding_web/
    │   │   ├── README.md
    │   │   ├── CONTRIBUTING.md
    │   │   ├── eslint.config.mjs
    │   │   ├── package.json
    │   │   ├── tsconfig.json
    │   │   ├── vitest.config.ts
    │   │   ├── web-tree-sitter.d.cts
    │   │   ├── web-tree-sitter.d.ts
    │   │   ├── lib/
    │   │   │   ├── exports.txt
    │   │   │   ├── imports.js
    │   │   │   ├── prefix.js
    │   │   │   ├── tree-sitter.c
    │   │   │   └── web-tree-sitter.d.ts
    │   │   ├── script/
    │   │   │   ├── build.js
    │   │   │   ├── check-artifacts-fresh.ts
    │   │   │   └── generate-dts.js
    │   │   ├── src/
    │   │   │   ├── bindings.ts
    │   │   │   ├── constants.ts
    │   │   │   ├── index.ts
    │   │   │   ├── language.ts
    │   │   │   ├── lookahead_iterator.ts
    │   │   │   ├── marshal.ts
    │   │   │   ├── node.ts
    │   │   │   ├── parser.ts
    │   │   │   ├── query.ts
    │   │   │   ├── tree.ts
    │   │   │   └── tree_cursor.ts
    │   │   └── test/
    │   │       ├── helper.ts
    │   │       ├── language.test.ts
    │   │       ├── node.test.ts
    │   │       ├── parser.test.ts
    │   │       ├── query.test.ts
    │   │       └── tree.test.ts
    │   ├── include/
    │   │   └── tree_sitter/
    │   │       └── api.h
    │   └── src/
    │       ├── alloc.c
    │       ├── alloc.h
    │       ├── array.h
    │       ├── atomic.h
    │       ├── clock.h
    │       ├── error_costs.h
    │       ├── get_changed_ranges.c
    │       ├── get_changed_ranges.h
    │       ├── host.h
    │       ├── language.c
    │       ├── language.h
    │       ├── length.h
    │       ├── lexer.c
    │       ├── lexer.h
    │       ├── lib.c
    │       ├── node.c
    │       ├── parser.h
    │       ├── point.h
    │       ├── reduce_action.h
    │       ├── reusable_node.h
    │       ├── stack.c
    │       ├── stack.h
    │       ├── subtree.c
    │       ├── subtree.h
    │       ├── tree.c
    │       ├── tree.h
    │       ├── tree_cursor.c
    │       ├── tree_cursor.h
    │       ├── ts_assert.h
    │       ├── unicode.h
    │       ├── wasm_store.h
    │       ├── portable/
    │       │   └── endian.h
    │       ├── unicode/
    │       │   ├── README.md
    │       │   ├── ICU_SHA
    │       │   ├── LICENSE
    │       │   ├── ptypes.h
    │       │   ├── umachine.h
    │       │   ├── urename.h
    │       │   ├── utf.h
    │       │   ├── utf16.h
    │       │   └── utf8.h
    │       └── wasm/
    │           ├── stdlib-symbols.txt
    │           └── stdlib.c
    ├── test/
    │   └── fixtures/
    │       ├── error_corpus/
    │       │   ├── readme.md
    │       │   ├── c_errors.txt
    │       │   ├── javascript_errors.txt
    │       │   ├── json_errors.txt
    │       │   ├── python_errors.txt
    │       │   └── ruby_errors.txt
    │       ├── grammars/
    │       │   └── .gitkeep
    │       ├── template_corpus/
    │       │   ├── readme.md
    │       │   └── ruby_templates.txt
    │       └── test_grammars/
    │           ├── readme.md
    │           ├── aliased_inlined_rules/
    │           │   ├── corpus.txt
    │           │   └── grammar.js
    │           ├── aliased_rules/
    │           │   ├── corpus.txt
    │           │   └── grammar.js
    │           ├── aliased_token_rules/
    │           │   ├── corpus.txt
    │           │   └── grammar.js
    │           ├── aliased_unit_reductions/
    │           │   ├── corpus.txt
    │           │   └── grammar.js
    │           ├── aliases_in_root/
    │           │   ├── corpus.txt
    │           │   └── grammar.js
    │           ├── anonymous_tokens_with_escaped_chars/
    │           │   ├── corpus.txt
    │           │   └── grammar.js
    │           ├── associativity_left/
    │           │   ├── corpus.txt
    │           │   └── grammar.js
    │           ├── associativity_missing/
    │           │   ├── expected_error.txt
    │           │   └── grammar.js
    │           ├── associativity_right/
    │           │   ├── corpus.txt
    │           │   └── grammar.js
    │           ├── conflict_in_repeat_rule/
    │           │   ├── expected_error.txt
    │           │   └── grammar.js
    │           ├── conflict_in_repeat_rule_after_external_token/
    │           │   ├── expected_error.txt
    │           │   └── grammar.js
    │           ├── conflicting_precedence/
    │           │   ├── expected_error.txt
    │           │   └── grammar.js
    │           ├── depends_on_column/
    │           │   ├── corpus.txt
    │           │   ├── grammar.js
    │           │   └── scanner.c
    │           ├── dynamic_precedence/
    │           │   ├── readme.md
    │           │   ├── corpus.txt
    │           │   └── grammar.js
    │           ├── epsilon_external_extra_tokens/
    │           │   ├── corpus.txt
    │           │   ├── grammar.js
    │           │   └── scanner.c
    │           ├── epsilon_external_tokens/
    │           │   ├── corpus.txt
    │           │   ├── grammar.js
    │           │   └── scanner.c
    │           ├── epsilon_rules/
    │           │   ├── expected_error.txt
    │           │   └── grammar.js
    │           ├── external_and_internal_anonymous_tokens/
    │           │   ├── readme.md
    │           │   ├── corpus.txt
    │           │   ├── grammar.js
    │           │   └── scanner.c
    │           ├── external_and_internal_tokens/
    │           │   ├── corpus.txt
    │           │   ├── grammar.js
    │           │   └── scanner.c
    │           ├── external_extra_tokens/
    │           │   ├── corpus.txt
    │           │   ├── grammar.js
    │           │   └── scanner.c
    │           ├── external_tokens/
    │           │   ├── corpus.txt
    │           │   ├── grammar.js
    │           │   └── scanner.c
    │           ├── external_unicode_column_alignment/
    │           │   ├── README.md
    │           │   ├── corpus.txt
    │           │   ├── grammar.js
    │           │   └── scanner.c
    │           ├── extra_non_terminals/
    │           │   ├── corpus.txt
    │           │   └── grammar.js
    │           ├── extra_non_terminals_with_shared_rules/
    │           │   ├── corpus.txt
    │           │   └── grammar.js
    │           ├── get_col_eof/
    │           │   ├── corpus.txt
    │           │   ├── grammar.js
    │           │   └── scanner.c
    │           ├── get_col_should_hang_not_crash/
    │           │   ├── corpus.txt
    │           │   ├── grammar.js
    │           │   └── scanner.c
    │           ├── immediate_tokens/
    │           │   ├── corpus.txt
    │           │   └── grammar.js
    │           ├── inline_rules/
    │           │   ├── corpus.txt
    │           │   └── grammar.js
    │           ├── inlined_aliased_rules/
    │           │   ├── readme.md
    │           │   ├── corpus.txt
    │           │   └── grammar.js
    │           ├── inverted_external_token/
    │           │   ├── readme.md
    │           │   ├── corpus.txt
    │           │   ├── grammar.js
    │           │   └── scanner.c
    │           ├── invisible_start_rule/
    │           │   ├── expected_error.txt
    │           │   └── grammar.js
    │           ├── lexical_conflicts_due_to_state_merging/
    │           │   ├── readme.md
    │           │   ├── corpus.txt
    │           │   └── grammar.js
    │           ├── named_precedences/
    │           │   ├── readme.txt
    │           │   ├── corpus.txt
    │           │   └── grammar.js
    │           ├── named_rule_aliased_as_anonymous/
    │           │   ├── readme.md
    │           │   ├── corpus.txt
    │           │   └── grammar.js
    │           ├── nested_inlined_rules/
    │           │   ├── readme.md
    │           │   ├── corpus.txt
    │           │   └── grammar.js
    │           ├── next_sibling_from_zwt/
    │           │   ├── corpus.txt
    │           │   └── grammar.js
    │           ├── partially_resolved_conflict/
    │           │   ├── readme.txt
    │           │   ├── expected_error.txt
    │           │   └── grammar.js
    │           ├── precedence_on_single_child_missing/
    │           │   ├── readme.md
    │           │   ├── expected_error.txt
    │           │   └── grammar.js
    │           ├── precedence_on_single_child_negative/
    │           │   ├── readme.md
    │           │   ├── corpus.txt
    │           │   └── grammar.js
    │           ├── precedence_on_single_child_positive/
    │           │   ├── readme.md
    │           │   ├── corpus.txt
    │           │   └── grammar.js
    │           ├── precedence_on_subsequence/
    │           │   ├── corpus.txt
    │           │   └── grammar.js
    │           ├── precedence_on_token/
    │           │   ├── readme.md
    │           │   ├── corpus.txt
    │           │   └── grammar.js
    │           ├── readme_grammar/
    │           │   ├── corpus.txt
    │           │   └── grammar.js
    │           ├── reserved_words/
    │           │   ├── corpus.txt
    │           │   └── grammar.js
    │           ├── start_rule_is_blank/
    │           │   ├── corpus.txt
    │           │   └── grammar.js
    │           ├── start_rule_is_token/
    │           │   ├── corpus.txt
    │           │   └── grammar.js
    │           ├── unicode_classes/
    │           │   ├── corpus.txt
    │           │   └── grammar.js
    │           ├── unused_rules/
    │           │   ├── readme.md
    │           │   ├── corpus.txt
    │           │   └── grammar.js
    │           └── uses_current_column/
    │               ├── corpus.txt
    │               ├── grammar.js
    │               └── scanner.c
    ├── .cargo/
    │   └── config.toml
    ├── .github/
    │   ├── cliff.toml
    │   ├── dependabot.yml
    │   ├── FUNDING.yml
    │   ├── actions/
    │   │   └── cache/
    │   │       └── action.yml
    │   ├── ISSUE_TEMPLATE/
    │   │   ├── bug_report.yml
    │   │   ├── config.yml
    │   │   └── feature_request.yml
    │   ├── scripts/
    │   │   ├── close_unresponsive.js
    │   │   ├── cross.sh
    │   │   ├── make.sh
    │   │   ├── remove_response_label.js
    │   │   ├── reviewers_remove.js
    │   │   └── tree-sitter.sh
    │   └── workflows/
    │       ├── backport.yml
    │       ├── bindgen.yml
    │       ├── build.yml
    │       ├── ci.yml
    │       ├── docs.yml
    │       ├── emscripten.yml
    │       ├── nvim_ts.yml
    │       ├── release.yml
    │       ├── response.yml
    │       ├── reviewers_remove.yml
    │       ├── sanitize.yml
    │       └── wasm_exports.yml
    └── .zed/
        └── settings.json


Files Content:

================================================
FILE: README.md
================================================
# tree-sitter

[![DOI](https://zenodo.org/badge/14164618.svg)](https://zenodo.org/badge/latestdoi/14164618)
[![discord][discord]](https://discord.gg/w7nTvsVJhm)
[![matrix][matrix]](https://matrix.to/#/#tree-sitter-chat:matrix.org)

Tree-sitter is a parser generator tool and an incremental parsing library. It can build a concrete syntax tree for a source file and efficiently update the syntax tree as the source file is edited. Tree-sitter aims to be:

- **General** enough to parse any programming language
- **Fast** enough to parse on every keystroke in a text editor
- **Robust** enough to provide useful results even in the presence of syntax errors
- **Dependency-free** so that the runtime library (which is written in pure C) can be embedded in any application

## Links
- [Documentation](https://tree-sitter.github.io)
- [Rust binding](lib/binding_rust/README.md)
- [WASM binding](lib/binding_web/README.md)
- [Command-line interface](crates/cli/README.md)

[discord]: https://img.shields.io/discord/1063097320771698699?logo=discord&label=discord
[matrix]: https://img.shields.io/matrix/tree-sitter-chat%3Amatrix.org?logo=matrix&label=matrix



================================================
FILE: build.zig
================================================
const std = @import("std");

pub fn build(b: *std.Build) !void {
  const target = b.standardTargetOptions(.{});
  const optimize = b.standardOptimizeOption(.{});

  const wasm = b.option(bool, "enable-wasm", "Enable Wasm support") orelse false;
  const shared = b.option(bool, "build-shared", "Build a shared library") orelse false;
  const amalgamated = b.option(bool, "amalgamated", "Build using an amalgamated source") orelse false;

  const lib: *std.Build.Step.Compile = b.addLibrary(.{
    .name = "tree-sitter",
    .linkage = if (shared) .dynamic else .static,
    .root_module = b.createModule(.{
      .target = target,
      .optimize = optimize,
      .link_libc = true,
      .pic = if (shared) true else null,
    }),
  });

  if (amalgamated) {
    lib.addCSourceFile(.{
      .file = b.path("lib/src/lib.c"),
      .flags = &.{"-std=c11"},
    });
  } else {
    lib.addCSourceFiles(.{
      .root = b.path("lib/src"),
      .files = try findSourceFiles(b),
      .flags = &.{"-std=c11"},
    });
  }

  lib.addIncludePath(b.path("lib/include"));
  lib.addIncludePath(b.path("lib/src"));
  lib.addIncludePath(b.path("lib/src/wasm"));

  lib.root_module.addCMacro("_POSIX_C_SOURCE", "200112L");
  lib.root_module.addCMacro("_DEFAULT_SOURCE", "");

  if (wasm) {
    if (b.lazyDependency(wasmtimeDep(target.result), .{})) |wasmtime| {
      lib.root_module.addCMacro("TREE_SITTER_FEATURE_WASM", "");
      lib.addSystemIncludePath(wasmtime.path("include"));
      lib.addLibraryPath(wasmtime.path("lib"));
      lib.linkSystemLibrary("wasmtime");
    }
  }

  lib.installHeadersDirectory(b.path("lib/include"), ".", .{});

  b.installArtifact(lib);
}

fn wasmtimeDep(target: std.Target) []const u8 {
  const arch = target.cpu.arch;
  const os = target.os.tag;
  const abi = target.abi;
  return switch (os) {
    .linux => switch (arch) {
      .x86_64 => switch (abi) {
        .gnu => "wasmtime_c_api_x86_64_linux",
        .musl => "wasmtime_c_api_x86_64_musl",
        .android => "wasmtime_c_api_x86_64_android",
        else => null
      },
      .aarch64 => switch (abi) {
        .gnu => "wasmtime_c_api_aarch64_linux",
        .android => "wasmtime_c_api_aarch64_android",
        else => null
      },
      .s390x => "wasmtime_c_api_s390x_linux",
      .riscv64 => "wasmtime_c_api_riscv64gc_linux",
      else => null
    },
    .windows => switch (arch) {
      .x86_64 => switch (abi) {
        .gnu => "wasmtime_c_api_x86_64_mingw",
        .msvc => "wasmtime_c_api_x86_64_windows",
        else => null
      },
      else => null
    },
    .macos => switch (arch) {
      .x86_64 => "wasmtime_c_api_x86_64_macos",
      .aarch64 => "wasmtime_c_api_aarch64_macos",
      else => null
    },
    else => null
  } orelse std.debug.panic(
    "Unsupported target for wasmtime: {s}-{s}-{s}",
    .{ @tagName(arch), @tagName(os), @tagName(abi) }
  );
}

fn findSourceFiles(b: *std.Build) ![]const []const u8 {
  var sources = std.ArrayList([]const u8).init(b.allocator);

  var dir = try b.build_root.handle.openDir("lib/src", .{ .iterate = true });
  var iter = dir.iterate();
  defer dir.close();

  while (try iter.next()) |entry| {
    if (entry.kind != .file) continue;
    const file = entry.name;
    const ext = std.fs.path.extension(file);
    if (std.mem.eql(u8, ext, ".c") and !std.mem.eql(u8, file, "lib.c")) {
      try sources.append(b.dupe(file));
    }
  }

  return sources.items;
}



================================================
FILE: build.zig.zon
================================================
.{
  .name = .tree_sitter,
  .fingerprint = 0x841224b447ac0d4f,
  .version = "0.26.0",
  .paths = .{
    "build.zig",
    "build.zig.zon",
    "lib/src",
    "lib/include",
    "README.md",
    "LICENSE",
  },
  .dependencies = .{
    .wasmtime_c_api_aarch64_android = .{
      .url = "https://github.com/bytecodealliance/wasmtime/releases/download/v29.0.1/wasmtime-v29.0.1-aarch64-android-c-api.tar.xz",
      .hash = "N-V-__8AAC3KCQZMd5ea2CkcbjldaVqCT7BT_9_rLMId6V__",
      .lazy = true,
    },
    .wasmtime_c_api_aarch64_linux = .{
      .url = "https://github.com/bytecodealliance/wasmtime/releases/download/v29.0.1/wasmtime-v29.0.1-aarch64-linux-c-api.tar.xz",
      .hash = "N-V-__8AAGUY3gU6jj2CNJAYb7HiMNVPV1FIcTCI6RSSYwXu",
      .lazy = true,
    },
    .wasmtime_c_api_aarch64_macos = .{
      .url = "https://github.com/bytecodealliance/wasmtime/releases/download/v29.0.1/wasmtime-v29.0.1-aarch64-macos-c-api.tar.xz",
      .hash = "N-V-__8AAM1GMARD6LGQebhVsSZ0uePUoo3Fw5nEO2L764vf",
      .lazy = true,
    },
    .wasmtime_c_api_riscv64gc_linux = .{
      .url = "https://github.com/bytecodealliance/wasmtime/releases/download/v29.0.1/wasmtime-v29.0.1-riscv64gc-linux-c-api.tar.xz",
      .hash = "N-V-__8AAN2cuQadBwMc8zJxv0sMY99Ae1Nc1dZcZAK9b4DZ",
      .lazy = true,
    },
    .wasmtime_c_api_s390x_linux = .{
      .url = "https://github.com/bytecodealliance/wasmtime/releases/download/v29.0.1/wasmtime-v29.0.1-s390x-linux-c-api.tar.xz",
      .hash = "N-V-__8AAPevngYz99mwT0KQY9my2ax1p6APzgLEJeV4II9U",
      .lazy = true,
    },
    .wasmtime_c_api_x86_64_android = .{
      .url = "https://github.com/bytecodealliance/wasmtime/releases/download/v29.0.1/wasmtime-v29.0.1-x86_64-android-c-api.tar.xz",
      .hash = "N-V-__8AABHIEgaTyzPfjgnnCy0dwJiXoDiJFblCkYOJsQvy",
      .lazy = true,
    },
    .wasmtime_c_api_x86_64_linux = .{
      .url = "https://github.com/bytecodealliance/wasmtime/releases/download/v29.0.1/wasmtime-v29.0.1-x86_64-linux-c-api.tar.xz",
      .hash = "N-V-__8AALUN5AWSEDRulL9u-OJJ-l0_GoT5UFDtGWZayEIq",
      .lazy = true,
    },
    .wasmtime_c_api_x86_64_macos = .{
      .url = "https://github.com/bytecodealliance/wasmtime/releases/download/v29.0.1/wasmtime-v29.0.1-x86_64-macos-c-api.tar.xz",
      .hash = "N-V-__8AANUeXwSPh13TqJCSSFdi87GEcHs8zK6FqE4v_TjB",
      .lazy = true,
    },
    .wasmtime_c_api_x86_64_mingw = .{
      .url = "https://github.com/bytecodealliance/wasmtime/releases/download/v29.0.1/wasmtime-v29.0.1-x86_64-mingw-c-api.zip",
      .hash = "N-V-__8AALundgW-p1ffOnd7bsYyL8SY5OziDUZu7cXio2EL",
      .lazy = true,
    },
    .wasmtime_c_api_x86_64_musl = .{
      .url = "https://github.com/bytecodealliance/wasmtime/releases/download/v29.0.1/wasmtime-v29.0.1-x86_64-musl-c-api.tar.xz",
      .hash = "N-V-__8AALMZ5wXJWW5qY-3MMjTAYR0MusckvzCsmg-69ALH",
      .lazy = true,
    },
    .wasmtime_c_api_x86_64_windows = .{
      .url = "https://github.com/bytecodealliance/wasmtime/releases/download/v29.0.1/wasmtime-v29.0.1-x86_64-windows-c-api.zip",
      .hash = "N-V-__8AAG-uVQVEDMsB1ymJzxpHcoiXo1_I3TFnPM5Zjy1i",
      .lazy = true,
    },
  }
}



================================================
FILE: Cargo.toml
================================================
[workspace]
default-members = ["crates/cli"]
members = [
  "crates/cli",
  "crates/config",
  "crates/generate",
  "crates/highlight",
  "crates/loader",
  "crates/tags",
  "crates/xtask",
  "crates/language",
  "lib",
]
resolver = "2"

[workspace.package]
version = "0.26.0"
authors = [
  "Max Brunsfeld <maxbrunsfeld@gmail.com>",
  "Amaan Qureshi <amaanq12@gmail.com>",
]
edition = "2021"
rust-version = "1.82"
homepage = "https://tree-sitter.github.io/tree-sitter"
repository = "https://github.com/tree-sitter/tree-sitter"
license = "MIT"
keywords = ["incremental", "parsing"]
categories = ["command-line-utilities", "parsing"]

[workspace.lints.clippy]
dbg_macro = "deny"
todo = "deny"
pedantic = { level = "warn", priority = -1 }
nursery = { level = "warn", priority = -1 }
cargo = { level = "warn", priority = -1 }

# The lints below are a specific subset of the pedantic+nursery lints
# that we explicitly allow in the tree-sitter codebase because they either:
#
# 1. Contain false positives,
# 2. Are unnecessary, or
# 3. Worsen the code

branches_sharing_code = "allow"
cast_lossless = "allow"
cast_possible_truncation = "allow"
cast_possible_wrap = "allow"
cast_precision_loss = "allow"
cast_sign_loss = "allow"
checked_conversions = "allow"
cognitive_complexity = "allow"
collection_is_never_read = "allow"
fallible_impl_from = "allow"
fn_params_excessive_bools = "allow"
inline_always = "allow"
if_not_else = "allow"
items_after_statements = "allow"
match_wildcard_for_single_variants = "allow"
missing_errors_doc = "allow"
missing_panics_doc = "allow"
module_name_repetitions = "allow"
multiple_crate_versions = "allow"
needless_for_each = "allow"
obfuscated_if_else = "allow"
option_if_let_else = "allow"
or_fun_call = "allow"
range_plus_one = "allow"
redundant_clone = "allow"
redundant_closure_for_method_calls = "allow"
ref_option = "allow"
similar_names = "allow"
string_lit_as_bytes = "allow"
struct_excessive_bools = "allow"
struct_field_names = "allow"
transmute_undefined_repr = "allow"
too_many_lines = "allow"
unnecessary_wraps = "allow"
unused_self = "allow"
used_underscore_items = "allow"

[profile.optimize]
inherits = "release"
strip = true         # Automatically strip symbols from the binary.
lto = true           # Link-time optimization.
opt-level = 3        # Optimization level 3.
codegen-units = 1    # Maximum size reduction optimizations.

[profile.size]
inherits = "optimize"
opt-level = "s"       # Optimize for size.

[profile.release-dev]
inherits = "release"
lto = false
debug = true
debug-assertions = true
overflow-checks = true
incremental = true
codegen-units = 256

[workspace.dependencies]
ansi_colours = "1.2.3"
anstyle = "1.0.11"
anyhow = "1.0.98"
bstr = "1.12.0"
cc = "1.2.30"
clap = { version = "4.5.41", features = [
  "cargo",
  "derive",
  "env",
  "help",
  "string",
  "unstable-styles",
] }
clap_complete = "4.5.55"
clap_complete_nushell = "4.5.8"
ctor = "0.2.9"
ctrlc = { version = "3.4.7", features = ["termination"] }
dialoguer = { version = "0.11.0", features = ["fuzzy-select"] }
etcetera = "0.10.0"
filetime = "0.2.25"
flate2 = "1.1.2"
fs4 = "0.12.0"
git2 = "0.20.2"
glob = "0.3.2"
heck = "0.5.0"
html-escape = "0.2.13"
indexmap = "2.10.0"
indoc = "2.0.6"
libloading = "0.8.8"
log = { version = "0.4.27", features = ["std"] }
memchr = "2.7.5"
once_cell = "1.21.3"
path-slash = "0.2.1"
pretty_assertions = "1.4.1"
rand = "0.8.5"
regex = "1.11.1"
regex-syntax = "0.8.5"
rustc-hash = "2.1.1"
semver = { version = "1.0.26", features = ["serde"] }
serde = { version = "1.0.219", features = ["derive"] }
serde_derive = "1.0.217"
serde_json = { version = "1.0.141", features = ["preserve_order"] }
similar = "2.7.0"
smallbitvec = "2.6.0"
streaming-iterator = "0.1.9"
tar = "0.4.40"
tempfile = "3.20.0"
thiserror = "2.0.12"
tiny_http = "0.12.0"
toml = "0.8.23"
topological-sort = "0.2.2"
unindent = "0.2.4"
url = { version = "2.5.4", features = ["serde"] }
walkdir = "2.5.0"
wasmparser = "0.224.1"
webbrowser = "1.0.5"

tree-sitter = { version = "0.26.0", path = "./lib" }
tree-sitter-generate = { version = "0.26.0", path = "./crates/generate" }
tree-sitter-language = { path = "./crates/language" }
tree-sitter-loader = { version = "0.26.0", path = "./crates/loader" }
tree-sitter-config = { version = "0.26.0", path = "./crates/config" }
tree-sitter-highlight = { version = "0.26.0", path = "./crates/highlight" }
tree-sitter-tags = { version = "0.26.0", path = "./crates/tags" }



================================================
FILE: CONTRIBUTING.md
================================================
See [docs/src/6-contributing.md](./docs/src/6-contributing.md)



================================================
FILE: Dockerfile
================================================
FROM rust:1.76-buster

WORKDIR /app

RUN apt-get update
RUN apt-get install -y nodejs

COPY . .

CMD cargo test --all-features



================================================
FILE: FUNDING.json
================================================
{
  "drips": {
    "ethereum": {
      "ownedBy": "0xc01246694085eF6914C527EBdFb4d8C77dfeaf8e"
    }
  }
}



================================================
FILE: LICENSE
================================================
The MIT License (MIT)

Copyright (c) 2018-2025 Max Brunsfeld

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.



================================================
FILE: Makefile
================================================
VERSION := 0.26.0
DESCRIPTION := An incremental parsing system for programming tools
HOMEPAGE_URL := https://tree-sitter.github.io/tree-sitter/

# install directory layout
PREFIX ?= /usr/local
INCLUDEDIR ?= $(PREFIX)/include
LIBDIR ?= $(PREFIX)/lib
BINDIR ?= $(PREFIX)/bin
PCLIBDIR ?= $(LIBDIR)/pkgconfig

# collect sources
ifneq ($(AMALGAMATED),1)
	SRC := $(wildcard lib/src/*.c)
	# do not double-include amalgamation
	SRC := $(filter-out lib/src/lib.c,$(SRC))
else
	# use amalgamated build
	SRC := lib/src/lib.c
endif
OBJ := $(SRC:.c=.o)

# define default flags, and override to append mandatory flags
ARFLAGS := rcs
CFLAGS ?= -O3 -Wall -Wextra -Wshadow -Wpedantic -Werror=incompatible-pointer-types
override CFLAGS += -std=c11 -fPIC -fvisibility=hidden
override CFLAGS += -D_POSIX_C_SOURCE=200112L -D_DEFAULT_SOURCE
override CFLAGS += -Ilib/src -Ilib/src/wasm -Ilib/include

# ABI versioning
SONAME_MAJOR := $(word 1,$(subst ., ,$(VERSION)))
SONAME_MINOR := $(word 2,$(subst ., ,$(VERSION)))

# OS-specific bits
MACHINE := $(shell $(CC) -dumpmachine)

ifneq ($(findstring darwin,$(MACHINE)),)
	SOEXT = dylib
	SOEXTVER_MAJOR = $(SONAME_MAJOR).$(SOEXT)
	SOEXTVER = $(SONAME_MAJOR).$(SONAME_MINOR).$(SOEXT)
	LINKSHARED += -dynamiclib -Wl,-install_name,$(LIBDIR)/libtree-sitter.$(SOEXTVER)
else ifneq ($(findstring mingw32,$(MACHINE)),)
	SOEXT = dll
	LINKSHARED += -s -shared -Wl,--out-implib,libtree-sitter.dll.a
else
	SOEXT = so
	SOEXTVER_MAJOR = $(SOEXT).$(SONAME_MAJOR)
	SOEXTVER = $(SOEXT).$(SONAME_MAJOR).$(SONAME_MINOR)
	LINKSHARED += -shared -Wl,-soname,libtree-sitter.$(SOEXTVER)
ifneq ($(filter $(shell uname),FreeBSD NetBSD DragonFly),)
	PCLIBDIR := $(PREFIX)/libdata/pkgconfig
endif
endif

all: libtree-sitter.a libtree-sitter.$(SOEXT) tree-sitter.pc

libtree-sitter.a: $(OBJ)
	$(AR) $(ARFLAGS) $@ $^

libtree-sitter.$(SOEXT): $(OBJ)
	$(CC) $(LDFLAGS) $(LINKSHARED) $^ $(LDLIBS) -o $@
ifneq ($(STRIP),)
	$(STRIP) $@
endif

ifneq ($(findstring mingw32,$(MACHINE)),)
libtree-sitter.dll.a: libtree-sitter.$(SOEXT)
endif

tree-sitter.pc: lib/tree-sitter.pc.in
	sed -e 's|@PROJECT_VERSION@|$(VERSION)|' \
		-e 's|@CMAKE_INSTALL_LIBDIR@|$(LIBDIR:$(PREFIX)/%=%)|' \
		-e 's|@CMAKE_INSTALL_INCLUDEDIR@|$(INCLUDEDIR:$(PREFIX)/%=%)|' \
		-e 's|@PROJECT_DESCRIPTION@|$(DESCRIPTION)|' \
		-e 's|@PROJECT_HOMEPAGE_URL@|$(HOMEPAGE_URL)|' \
		-e 's|@CMAKE_INSTALL_PREFIX@|$(PREFIX)|' $< > $@

clean:
	$(RM) $(OBJ) tree-sitter.pc libtree-sitter.a libtree-sitter.$(SOEXT) libtree-stitter.dll.a

install: all
	install -d '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter '$(DESTDIR)$(PCLIBDIR)' '$(DESTDIR)$(LIBDIR)'
	install -m644 lib/include/tree_sitter/api.h '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter/api.h
	install -m644 tree-sitter.pc '$(DESTDIR)$(PCLIBDIR)'/tree-sitter.pc
	install -m644 libtree-sitter.a '$(DESTDIR)$(LIBDIR)'/libtree-sitter.a
ifneq ($(findstring mingw32,$(MACHINE)),)
	install -d '$(DESTDIR)$(BINDIR)'
	install -m755 libtree-sitter.dll '$(DESTDIR)$(BINDIR)'/libtree-sitter.dll
	install -m755 libtree-sitter.dll.a '$(DESTDIR)$(LIBDIR)'/libtree-sitter.dll.a
else
	install -m755 libtree-sitter.$(SOEXT) '$(DESTDIR)$(LIBDIR)'/libtree-sitter.$(SOEXTVER)
	cd '$(DESTDIR)$(LIBDIR)' && ln -sf libtree-sitter.$(SOEXTVER) libtree-sitter.$(SOEXTVER_MAJOR)
	cd '$(DESTDIR)$(LIBDIR)' && ln -sf libtree-sitter.$(SOEXTVER_MAJOR) libtree-sitter.$(SOEXT)
endif

uninstall:
	$(RM) '$(DESTDIR)$(LIBDIR)'/libtree-sitter.a \
		'$(DESTDIR)$(LIBDIR)'/libtree-sitter.$(SOEXTVER) \
		'$(DESTDIR)$(LIBDIR)'/libtree-sitter.$(SOEXTVER_MAJOR) \
		'$(DESTDIR)$(LIBDIR)'/libtree-sitter.$(SOEXT) \
		'$(DESTDIR)$(INCLUDEDIR)'/tree_sitter/api.h \
		'$(DESTDIR)$(PCLIBDIR)'/tree-sitter.pc
	rmdir '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter

.PHONY: all install uninstall clean


##### Dev targets #####

test:
	cargo xtask fetch-fixtures
	cargo xtask generate-fixtures
	cargo xtask test

test-wasm:
	cargo xtask generate-fixtures --wasm
	cargo xtask test-wasm

lint:
	cargo update --workspace --locked --quiet
	cargo check --workspace --all-targets
	cargo fmt --all --check
	cargo clippy --workspace --all-targets -- -D warnings

lint-web:
	npm --prefix lib/binding_web ci
	npm --prefix lib/binding_web run lint

format:
	cargo fmt --all

changelog:
	@git-cliff --config .github/cliff.toml --prepend CHANGELOG.md --latest --github-token $(shell gh auth token)

.PHONY: test test-wasm lint format changelog



================================================
FILE: Package.swift
================================================
// swift-tools-version: 5.8
// The swift-tools-version declares the minimum version of Swift required to build this package.

import PackageDescription

let package = Package(
    name: "TreeSitter",
    products: [
        // Products define the executables and libraries a package produces, and make them visible to other packages.
        .library(
            name: "TreeSitter",
            targets: ["TreeSitter"]),
    ],
    targets: [
        .target(name: "TreeSitter",
                path: "lib",
                exclude: [
                        "src/unicode/ICU_SHA",
                        "src/unicode/README.md",
                        "src/unicode/LICENSE",
                        "src/wasm/stdlib-symbols.txt",
                        "src/lib.c",
                ],
                sources: ["src"],
                publicHeadersPath: "include",
                cSettings: [
                        .headerSearchPath("src"),
                        .define("_POSIX_C_SOURCE", to: "200112L"),
                        .define("_DEFAULT_SOURCE"),
                ]),
    ],
    cLanguageStandard: .c11
)



================================================
FILE: .dockerignore
================================================
target
.git



================================================
FILE: .editorconfig
================================================
root = true

[*]
indent_style = space
indent_size = 2
tab_width = 8
end_of_line = lf
insert_final_newline = true

[*.rs]
indent_size = 4

[Makefile]
indent_style = tab
indent_size = 8



================================================
FILE: crates/cli/README.md
================================================
# Tree-sitter CLI

[![crates.io badge]][crates.io] [![npmjs.com badge]][npmjs.com]

[crates.io]: https://crates.io/crates/tree-sitter-cli
[crates.io badge]: https://img.shields.io/crates/v/tree-sitter-cli.svg?color=%23B48723
[npmjs.com]: https://www.npmjs.org/package/tree-sitter-cli
[npmjs.com badge]: https://img.shields.io/npm/v/tree-sitter-cli.svg?color=%23BF4A4A

The Tree-sitter CLI allows you to develop, test, and use Tree-sitter grammars from the command line. It works on `MacOS`, `Linux`, and `Windows`.

### Installation

You can install the `tree-sitter-cli` with `cargo`:

```sh
cargo install --locked tree-sitter-cli
```

or with `npm`:

```sh
npm install tree-sitter-cli
```

You can also download a pre-built binary for your platform from [the releases page].

### Dependencies

The `tree-sitter` binary itself has no dependencies, but specific commands have dependencies that must be present at runtime:

* To generate a parser from a grammar, you must have [`node`](https://nodejs.org) on your PATH.
* To run and test parsers, you must have a C and C++ compiler on your system.

### Commands

* `generate` - The `tree-sitter generate` command will generate a Tree-sitter parser based on the grammar in the current working directory. See [the documentation] for more information.

* `test` - The `tree-sitter test` command will run the unit tests for the Tree-sitter parser in the current working directory. See [the documentation] for more information.

* `parse` - The `tree-sitter parse` command will parse a file (or list of files) using Tree-sitter parsers.

[the documentation]: https://tree-sitter.github.io/tree-sitter/creating-parsers
[the releases page]: https://github.com/tree-sitter/tree-sitter/releases/latest



================================================
FILE: crates/cli/build.rs
================================================
use std::{
    env,
    path::{Path, PathBuf},
    process::Command,
    time::SystemTime,
};

fn main() {
    if let Some(git_sha) = read_git_sha() {
        println!("cargo:rustc-env=BUILD_SHA={git_sha}");
    }

    println!("cargo:rustc-check-cfg=cfg(sanitizing)");
    println!("cargo:rustc-check-cfg=cfg(TREE_SITTER_EMBED_WASM_BINDING)");

    if web_playground_files_present() {
        println!("cargo:rustc-cfg=TREE_SITTER_EMBED_WASM_BINDING");
    }

    let build_time = SystemTime::now()
        .duration_since(SystemTime::UNIX_EPOCH)
        .unwrap()
        .as_secs_f64();
    println!("cargo:rustc-env=BUILD_TIME={build_time}");

    #[cfg(any(
        target_os = "linux",
        target_os = "android",
        target_os = "freebsd",
        target_os = "openbsd",
        target_os = "netbsd",
        target_os = "dragonfly",
    ))]
    {
        let out_dir = PathBuf::from(env::var("OUT_DIR").unwrap()).join("dynamic-symbols.txt");
        std::fs::write(
            &out_dir,
            "{
                ts_current_malloc;
                ts_current_calloc;
                ts_current_realloc;
                ts_current_free;
            };",
        )
        .unwrap();
        println!(
            "cargo:rustc-link-arg=-Wl,--dynamic-list={}",
            out_dir.display()
        );
    }
}

fn web_playground_files_present() -> bool {
    let paths = [
        "../docs/src/assets/js/playground.js",
        "../lib/binding_web/web-tree-sitter.js",
        "../lib/binding_web/web-tree-sitter.wasm",
    ];

    paths.iter().all(|p| Path::new(p).exists())
}

fn read_git_sha() -> Option<String> {
    let crate_path = PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap());

    if !crate_path.parent().is_some_and(|p| p.join(".git").exists()) {
        return None;
    }

    Command::new("git")
        .args(["rev-parse", "HEAD"])
        .current_dir(crate_path)
        .output()
        .map_or(None, |output| {
            if !output.status.success() {
                return None;
            }
            Some(String::from_utf8_lossy(&output.stdout).to_string())
        })
}



================================================
FILE: crates/cli/Cargo.toml
================================================
[package]
name = "tree-sitter-cli"
version.workspace = true
description = "CLI tool for developing, testing, and using Tree-sitter parsers"
authors.workspace = true
edition.workspace = true
rust-version.workspace = true
readme = "README.md"
homepage.workspace = true
repository.workspace = true
documentation = "https://docs.rs/tree-sitter-cli"
license.workspace = true
keywords.workspace = true
categories.workspace = true
include = ["build.rs", "README.md", "benches/*", "src/**"]

[lints]
workspace = true

[lib]
path = "src/tree_sitter_cli.rs"

[[bin]]
name = "tree-sitter"
path = "src/main.rs"
doc = false

[[bench]]
name = "benchmark"
harness = false

[features]
wasm = ["tree-sitter/wasm", "tree-sitter-loader/wasm"]

[dependencies]
ansi_colours.workspace = true
anstyle.workspace = true
anyhow.workspace = true
bstr.workspace = true
clap.workspace = true
clap_complete.workspace = true
clap_complete_nushell.workspace = true
ctor.workspace = true
ctrlc.workspace = true
dialoguer.workspace = true
filetime.workspace = true
glob.workspace = true
heck.workspace = true
html-escape.workspace = true
indexmap.workspace = true
indoc.workspace = true
log.workspace = true
memchr.workspace = true
rand.workspace = true
regex.workspace = true
regex-syntax.workspace = true
rustc-hash.workspace = true
semver.workspace = true
serde.workspace = true
serde_derive.workspace = true
serde_json.workspace = true
similar.workspace = true
smallbitvec.workspace = true
streaming-iterator.workspace = true
tiny_http.workspace = true
topological-sort.workspace = true
url.workspace = true
walkdir.workspace = true
wasmparser.workspace = true
webbrowser.workspace = true

tree-sitter.workspace = true
tree-sitter-generate.workspace = true
tree-sitter-config.workspace = true
tree-sitter-highlight.workspace = true
tree-sitter-loader.workspace = true
tree-sitter-tags.workspace = true

[dev-dependencies]
encoding_rs = "0.8.35"
widestring = "1.2.0"
tree_sitter_proc_macro = { path = "src/tests/proc_macro", package = "tree-sitter-tests-proc-macro" }

tempfile.workspace = true
pretty_assertions.workspace = true
unindent.workspace = true



================================================
FILE: crates/cli/benches/benchmark.rs
================================================
use std::{
    collections::BTreeMap,
    env, fs,
    path::{Path, PathBuf},
    str,
    sync::LazyLock,
    time::Instant,
};

use anyhow::Context;
use tree_sitter::{Language, Parser, Query};
use tree_sitter_loader::{CompileConfig, Loader};

include!("../src/tests/helpers/dirs.rs");

static LANGUAGE_FILTER: LazyLock<Option<String>> =
    LazyLock::new(|| env::var("TREE_SITTER_BENCHMARK_LANGUAGE_FILTER").ok());
static EXAMPLE_FILTER: LazyLock<Option<String>> =
    LazyLock::new(|| env::var("TREE_SITTER_BENCHMARK_EXAMPLE_FILTER").ok());
static REPETITION_COUNT: LazyLock<usize> = LazyLock::new(|| {
    env::var("TREE_SITTER_BENCHMARK_REPETITION_COUNT")
        .map(|s| s.parse::<usize>().unwrap())
        .unwrap_or(5)
});
static TEST_LOADER: LazyLock<Loader> =
    LazyLock::new(|| Loader::with_parser_lib_path(SCRATCH_DIR.clone()));

#[allow(clippy::type_complexity)]
static EXAMPLE_AND_QUERY_PATHS_BY_LANGUAGE_DIR: LazyLock<
    BTreeMap<PathBuf, (Vec<PathBuf>, Vec<PathBuf>)>,
> = LazyLock::new(|| {
    fn process_dir(result: &mut BTreeMap<PathBuf, (Vec<PathBuf>, Vec<PathBuf>)>, dir: &Path) {
        if dir.join("grammar.js").exists() {
            let relative_path = dir.strip_prefix(GRAMMARS_DIR.as_path()).unwrap();
            let (example_paths, query_paths) = result.entry(relative_path.to_owned()).or_default();

            if let Ok(example_files) = fs::read_dir(dir.join("examples")) {
                example_paths.extend(example_files.filter_map(|p| {
                    let p = p.unwrap().path();
                    if p.is_file() {
                        Some(p)
                    } else {
                        None
                    }
                }));
            }

            if let Ok(query_files) = fs::read_dir(dir.join("queries")) {
                query_paths.extend(query_files.filter_map(|p| {
                    let p = p.unwrap().path();
                    if p.is_file() {
                        Some(p)
                    } else {
                        None
                    }
                }));
            }
        } else {
            for entry in fs::read_dir(dir).unwrap() {
                let entry = entry.unwrap().path();
                if entry.is_dir() {
                    process_dir(result, &entry);
                }
            }
        }
    }

    let mut result = BTreeMap::new();
    process_dir(&mut result, &GRAMMARS_DIR);
    result
});

fn main() {
    let max_path_length = EXAMPLE_AND_QUERY_PATHS_BY_LANGUAGE_DIR
        .values()
        .flat_map(|(e, q)| {
            e.iter()
                .chain(q.iter())
                .map(|s| s.file_name().unwrap().to_str().unwrap().len())
        })
        .max()
        .unwrap_or(0);

    eprintln!("Benchmarking with {} repetitions", *REPETITION_COUNT);

    let mut parser = Parser::new();
    let mut all_normal_speeds = Vec::new();
    let mut all_error_speeds = Vec::new();

    for (language_path, (example_paths, query_paths)) in
        EXAMPLE_AND_QUERY_PATHS_BY_LANGUAGE_DIR.iter()
    {
        let language_name = language_path.file_name().unwrap().to_str().unwrap();

        if let Some(filter) = LANGUAGE_FILTER.as_ref() {
            if language_name != filter.as_str() {
                continue;
            }
        }

        eprintln!("\nLanguage: {language_name}");
        let language = get_language(language_path);
        parser.set_language(&language).unwrap();

        eprintln!("  Constructing Queries");
        for path in query_paths {
            if let Some(filter) = EXAMPLE_FILTER.as_ref() {
                if !path.to_str().unwrap().contains(filter.as_str()) {
                    continue;
                }
            }

            parse(path, max_path_length, |source| {
                Query::new(&language, str::from_utf8(source).unwrap())
                    .with_context(|| format!("Query file path: {}", path.display()))
                    .expect("Failed to parse query");
            });
        }

        eprintln!("  Parsing Valid Code:");
        let mut normal_speeds = Vec::new();
        for example_path in example_paths {
            if let Some(filter) = EXAMPLE_FILTER.as_ref() {
                if !example_path.to_str().unwrap().contains(filter.as_str()) {
                    continue;
                }
            }

            normal_speeds.push(parse(example_path, max_path_length, |code| {
                parser.parse(code, None).expect("Failed to parse");
            }));
        }

        eprintln!("  Parsing Invalid Code (mismatched languages):");
        let mut error_speeds = Vec::new();
        for (other_language_path, (example_paths, _)) in
            EXAMPLE_AND_QUERY_PATHS_BY_LANGUAGE_DIR.iter()
        {
            if other_language_path != language_path {
                for example_path in example_paths {
                    if let Some(filter) = EXAMPLE_FILTER.as_ref() {
                        if !example_path.to_str().unwrap().contains(filter.as_str()) {
                            continue;
                        }
                    }

                    error_speeds.push(parse(example_path, max_path_length, |code| {
                        parser.parse(code, None).expect("Failed to parse");
                    }));
                }
            }
        }

        if let Some((average_normal, worst_normal)) = aggregate(&normal_speeds) {
            eprintln!("  Average Speed (normal): {average_normal} bytes/ms");
            eprintln!("  Worst Speed (normal):   {worst_normal} bytes/ms");
        }

        if let Some((average_error, worst_error)) = aggregate(&error_speeds) {
            eprintln!("  Average Speed (errors): {average_error} bytes/ms");
            eprintln!("  Worst Speed (errors):   {worst_error} bytes/ms");
        }

        all_normal_speeds.extend(normal_speeds);
        all_error_speeds.extend(error_speeds);
    }

    eprintln!("\n  Overall");
    if let Some((average_normal, worst_normal)) = aggregate(&all_normal_speeds) {
        eprintln!("  Average Speed (normal): {average_normal} bytes/ms");
        eprintln!("  Worst Speed (normal):   {worst_normal} bytes/ms");
    }

    if let Some((average_error, worst_error)) = aggregate(&all_error_speeds) {
        eprintln!("  Average Speed (errors): {average_error} bytes/ms");
        eprintln!("  Worst Speed (errors):   {worst_error} bytes/ms");
    }
    eprintln!();
}

fn aggregate(speeds: &[usize]) -> Option<(usize, usize)> {
    if speeds.is_empty() {
        return None;
    }
    let mut total = 0;
    let mut max = usize::MAX;
    for speed in speeds.iter().copied() {
        total += speed;
        if speed < max {
            max = speed;
        }
    }
    Some((total / speeds.len(), max))
}

fn parse(path: &Path, max_path_length: usize, mut action: impl FnMut(&[u8])) -> usize {
    eprint!(
        "    {:width$}\t",
        path.file_name().unwrap().to_str().unwrap(),
        width = max_path_length
    );

    let source_code = fs::read(path)
        .with_context(|| format!("Failed to read {}", path.display()))
        .unwrap();
    let time = Instant::now();
    for _ in 0..*REPETITION_COUNT {
        action(&source_code);
    }
    let duration = time.elapsed() / (*REPETITION_COUNT as u32);
    let duration_ns = duration.as_nanos();
    let speed = ((source_code.len() as u128) * 1_000_000) / duration_ns;
    eprintln!(
        "time {:>7.2} ms\t\tspeed {speed:>6} bytes/ms",
        (duration_ns as f64) / 1e6,
    );
    speed as usize
}

fn get_language(path: &Path) -> Language {
    let src_path = GRAMMARS_DIR.join(path).join("src");
    TEST_LOADER
        .load_language_at_path(CompileConfig::new(&src_path, None, None))
        .with_context(|| format!("Failed to load language at path {}", src_path.display()))
        .unwrap()
}



================================================
FILE: crates/cli/eslint/index.js
================================================
import globals from 'globals';
import jsdoc from 'eslint-plugin-jsdoc';

export default [
  jsdoc.configs['flat/recommended'],
  {
    languageOptions: {
      ecmaVersion: 'latest',
      sourceType: 'module',
      globals: {
        ...globals.commonjs,
        ...globals.es2021,
      },
    },
    plugins: {
      jsdoc,
    },
    rules: {
      'no-cond-assign': 'off',
      'no-irregular-whitespace': 'error',
      'no-unexpected-multiline': 'error',
      'curly': ['error', 'multi-line'],
      'guard-for-in': 'error',
      'no-caller': 'error',
      'no-extend-native': 'error',
      'no-extra-bind': 'error',
      'no-invalid-this': 'error',
      'no-multi-spaces': 'error',
      'no-multi-str': 'error',
      'no-new-wrappers': 'error',
      'no-throw-literal': 'error',
      'no-with': 'error',
      'prefer-promise-reject-errors': 'error',
      'no-unused-vars': ['error', { args: 'none' }],
      'array-bracket-newline': 'off',
      'array-bracket-spacing': ['error', 'never'],
      'array-element-newline': 'off',
      'block-spacing': ['error', 'never'],
      'brace-style': 'error',
      'camelcase': ['error', { properties: 'never' }],
      'comma-dangle': ['error', 'always-multiline'],
      'comma-spacing': 'error',
      'comma-style': 'error',
      'computed-property-spacing': 'error',
      'eol-last': 'error',
      'func-call-spacing': 'error',

      'camelcase': 'off',
      'indent': [
        'error',
        2,
        {
          'SwitchCase': 1,
        },
      ],
      'key-spacing': 'error',
      'keyword-spacing': 'error',
      'linebreak-style': 'error',
      'max-len': [
        'error',
        {
          code: 160,
          ignoreComments: true,
          ignoreUrls: true,
          ignoreStrings: true,
        },
      ],
      'new-cap': 'error',
      'no-array-constructor': 'error',
      'no-mixed-spaces-and-tabs': 'error',
      'no-multiple-empty-lines': ['error', { max: 2 }],
      'no-new-object': 'error',
      'no-tabs': 'error',
      'no-trailing-spaces': 'error',
      'object-curly-spacing': 'error',
      'one-var': ['error', {
        var: 'never',
        let: 'never',
        const: 'never',
      }],
      'operator-linebreak': ['error', 'after'],
      'padded-blocks': ['error', 'never'],
      'quote-props': ['error', 'consistent'],
      'quotes': ['error', 'single', { allowTemplateLiterals: true }],
      'semi': 'error',
      'semi-spacing': 'error',
      'space-before-blocks': 'error',
      'space-before-function-paren': ['error', {
        asyncArrow: 'always',
        anonymous: 'never',
        named: 'never',
      }],
      'spaced-comment': [
        'error',
        'always',
        {
          line: {
            markers: ['/'],
          },
        },
      ],
      'switch-colon-spacing': 'error',
      'arrow-parens': 'off',
      'constructor-super': 'error',
      'generator-star-spacing': ['error', 'after'],
      'no-new-symbol': 'error',
      'no-this-before-super': 'error',
      'no-var': 'error',
      'prefer-const': ['error', { destructuring: 'all' }],
      'prefer-rest-params': 'error',
      'prefer-spread': 'error',
      'rest-spread-spacing': 'error',
      'yield-star-spacing': ['error', 'after'],
      'jsdoc/no-undefined-types': 'off',
      'jsdoc/require-param-description': 'off',
      'jsdoc/require-returns-description': 'off',
      'jsdoc/require-returns': 'off',
      'jsdoc/tag-lines': ['error', 'any', { startLines: 1 }],
    },
  },
];



================================================
FILE: crates/cli/eslint/package.json
================================================
{
  "name": "eslint-config-treesitter",
  "version": "1.0.2",
  "description": "Eslint configuration for Tree-sitter grammar files",
  "repository": {
    "type": "git",
    "url": "git+https://github.com/tree-sitter/tree-sitter.git"
  },
  "license": "MIT",
  "author": "Amaan Qureshi <amaanq12@gmail.com>",
  "main": "index.js",
  "type": "module",
  "keywords": [
    "eslint",
    "eslintconfig",
    "tree-sitter"
  ],
  "dependencies": {
    "eslint-plugin-jsdoc": "^50.2.4"
  },
  "peerDependencies": {
    "eslint": ">= 9"
  }
}



================================================
FILE: crates/cli/npm/cli.js
================================================
#!/usr/bin/env node

const path = require('path');
const spawn = require("child_process").spawn;
const executable = process.platform === 'win32'
  ? 'tree-sitter.exe'
  : 'tree-sitter';
spawn(
  path.join(__dirname, executable),
  process.argv.slice(2),
  {stdio: 'inherit'}
).on('close', process.exit)



================================================
FILE: crates/cli/npm/dsl.d.ts
================================================
type AliasRule = { type: 'ALIAS'; named: boolean; content: Rule; value: string };
type BlankRule = { type: 'BLANK' };
type ChoiceRule = { type: 'CHOICE'; members: Rule[] };
type FieldRule = { type: 'FIELD'; name: string; content: Rule };
type ImmediateTokenRule = { type: 'IMMEDIATE_TOKEN'; content: Rule };
type PatternRule = { type: 'PATTERN'; value: string };
type PrecDynamicRule = { type: 'PREC_DYNAMIC'; content: Rule; value: number };
type PrecLeftRule = { type: 'PREC_LEFT'; content: Rule; value: number };
type PrecRightRule = { type: 'PREC_RIGHT'; content: Rule; value: number };
type PrecRule = { type: 'PREC'; content: Rule; value: number };
type Repeat1Rule = { type: 'REPEAT1'; content: Rule };
type RepeatRule = { type: 'REPEAT'; content: Rule };
type ReservedRule = { type: 'RESERVED'; content: Rule; context_name: string };
type SeqRule = { type: 'SEQ'; members: Rule[] };
type StringRule = { type: 'STRING'; value: string };
type SymbolRule<Name extends string> = { type: 'SYMBOL'; name: Name };
type TokenRule = { type: 'TOKEN'; content: Rule };

type Rule =
  | AliasRule
  | BlankRule
  | ChoiceRule
  | FieldRule
  | ImmediateTokenRule
  | PatternRule
  | PrecDynamicRule
  | PrecLeftRule
  | PrecRightRule
  | PrecRule
  | Repeat1Rule
  | RepeatRule
  | SeqRule
  | StringRule
  | SymbolRule<string>
  | TokenRule;

class RustRegex {
  value: string;

  constructor(pattern: string) {
    this.value = pattern;
  }
}

type RuleOrLiteral = Rule | RegExp | RustRegex | string;

type GrammarSymbols<RuleName extends string> = {
  [name in RuleName]: SymbolRule<name>;
} &
  Record<string, SymbolRule<string>>;

type RuleBuilder<RuleName extends string> = (
  $: GrammarSymbols<RuleName>,
  previous?: Rule,
) => RuleOrLiteral;

type RuleBuilders<
  RuleName extends string,
  BaseGrammarRuleName extends string
> = {
    [name in RuleName]: RuleBuilder<RuleName | BaseGrammarRuleName>;
  };

interface Grammar<
  RuleName extends string,
  BaseGrammarRuleName extends string = never,
  Rules extends RuleBuilders<RuleName, BaseGrammarRuleName> = RuleBuilders<
    RuleName,
    BaseGrammarRuleName
  >
> {
  /**
   * Name of the grammar language.
   */
  name: string;

  /** Mapping of grammar rule names to rule builder functions. */
  rules: Rules;

  /**
   * An array of arrays of precedence names or rules. Each inner array represents
   * a *descending* ordering. Names/rules listed earlier in one of these arrays
   * have higher precedence than any names/rules listed later in the same array.
   *
   * Using rules is just a shorthand way for using a name then calling prec()
   * with that name. It is just a convenience.
   */
  precedences?: (
    $: GrammarSymbols<RuleName | BaseGrammarRuleName>,
    previous: Rule[][],
  ) => RuleOrLiteral[][],

  /**
   * An array of arrays of rule names. Each inner array represents a set of
   * rules that's involved in an _LR(1) conflict_ that is _intended to exist_
   * in the grammar. When these conflicts occur at runtime, Tree-sitter will
   * use the GLR algorithm to explore all of the possible interpretations. If
   * _multiple_ parses end up succeeding, Tree-sitter will pick the subtree
   * whose corresponding rule has the highest total _dynamic precedence_.
   *
   * @param $ grammar rules
   */
  conflicts?: (
    $: GrammarSymbols<RuleName | BaseGrammarRuleName>,
    previous: Rule[][],
  ) => RuleOrLiteral[][];

  /**
   * An array of token names which can be returned by an _external scanner_.
   * External scanners allow you to write custom C code which runs during the
   * lexing process in order to handle lexical rules (e.g. Python's indentation
   * tokens) that cannot be described by regular expressions.
   *
   * @param $ grammar rules
   * @param previous array of externals from the base schema, if any
   *
   * @see https://tree-sitter.github.io/tree-sitter/creating-parsers/4-external-scanners
   */
  externals?: (
    $: Record<string, SymbolRule<string>>,
    previous: Rule[],
  ) => RuleOrLiteral[];

  /**
   * An array of tokens that may appear anywhere in the language. This
   * is often used for whitespace and comments. The default value of
   * extras is to accept whitespace. To control whitespace explicitly,
   * specify extras: `$ => []` in your grammar.
   *
   *  @param $ grammar rules
   */
  extras?: (
    $: GrammarSymbols<RuleName | BaseGrammarRuleName>,
  ) => RuleOrLiteral[];

  /**
   * An array of rules that should be automatically removed from the
   * grammar by replacing all of their usages with a copy of their definition.
   * This is useful for rules that are used in multiple places but for which
   * you don't want to create syntax tree nodes at runtime.
   *
   * @param $ grammar rules
   */
  inline?: (
    $: GrammarSymbols<RuleName | BaseGrammarRuleName>,
    previous: Rule[],
  ) => RuleOrLiteral[];

  /**
   * A list of hidden rule names that should be considered supertypes in the
   * generated node types file.
   *
   * @param $ grammar rules
   *
   * @see https://tree-sitter.github.io/tree-sitter/using-parsers/6-static-node-types
   */
  supertypes?: (
    $: GrammarSymbols<RuleName | BaseGrammarRuleName>,
    previous: Rule[],
  ) => RuleOrLiteral[];

  /**
   * The name of a token that will match keywords for the purpose of the
   * keyword extraction optimization.
   *
   * @param $ grammar rules
   *
   * @see https://tree-sitter.github.io/tree-sitter/creating-parsers/3-writing-the-grammar#keyword-extraction
   */
  word?: ($: GrammarSymbols<RuleName | BaseGrammarRuleName>) => RuleOrLiteral;


  /**
   * Mapping of names to reserved word sets. The first reserved word set is the
   * global word set, meaning it applies to every rule in every parse state.
   * The other word sets can be used with the `reserved` function.
   */
  reserved?: Record<
    string,
    ($: GrammarSymbols<RuleName | BaseGrammarRuleName>) => RuleOrLiteral[]
  >;
}

type GrammarSchema<RuleName extends string> = {
  [K in keyof Grammar<RuleName>]: K extends 'rules'
  ? Record<RuleName, Rule>
  : Grammar<RuleName>[K];
};

/**
 * Causes the given rule to appear with an alternative name in the syntax tree.
 * For instance with `alias($.foo, 'bar')`, the aliased rule will appear as an
 * anonymous node, as if the rule had been written as the simple string.
 *
 * @param rule rule that will be aliased
 * @param name target name for the alias
 */
declare function alias(rule: RuleOrLiteral, name: string): AliasRule;

/**
 * Causes the given rule to appear as an alternative named node, for instance
 * with `alias($.foo, $.bar)`, the aliased rule `foo` will appear as a named
 * node called `bar`.
 *
 * @param rule rule that will be aliased
 * @param symbol target symbol for the alias
 */
declare function alias(
  rule: RuleOrLiteral,
  symbol: SymbolRule<string>,
): AliasRule;

/**
 * Creates a blank rule, matching nothing.
 */
declare function blank(): BlankRule;

/**
 * Assigns a field name to the child node(s) matched by the given rule.
 * In the resulting syntax tree, you can then use that field name to
 * access specific children.
 *
 * @param name name of the field
 * @param rule rule the field should match
 */
declare function field(name: string, rule: RuleOrLiteral): FieldRule;

/**
 * Creates a rule that matches one of a set of possible rules. The order
 * of the arguments does not matter. This is analogous to the `|` (pipe)
 * operator in EBNF notation.
 *
 * @param options possible rule choices
 */
declare function choice(...options: RuleOrLiteral[]): ChoiceRule;

/**
 * Creates a rule that matches zero or one occurrence of a given rule.
 * It is analogous to the `[x]` (square bracket) syntax in EBNF notation.
 *
 * @param value rule to be made optional
 */
declare function optional(rule: RuleOrLiteral): ChoiceRule;

/**
 * Marks the given rule with a precedence which will be used to resolve LR(1)
 * conflicts at parser-generation time. When two rules overlap in a way that
 * represents either a true ambiguity or a _local_ ambiguity given one token
 * of lookahead, Tree-sitter will try to resolve the conflict by matching the
 * rule with the higher precedence.
 *
 * Precedence values can either be strings or numbers. When comparing rules
 * with numerical precedence, higher numbers indicate higher precedences. To
 * compare rules with string precedence, Tree-sitter uses the grammar's `precedences`
 * field.
 *
 * rules is zero. This works similarly to the precedence directives in Yacc grammars.
 *
 * @param value precedence weight
 * @param rule rule being weighted
 *
 * @see https://en.wikipedia.org/wiki/LR_parser#Conflicts_in_the_constructed_tables
 * @see https://docs.oracle.com/cd/E19504-01/802-5880/6i9k05dh3/index.html
 */
declare const prec: {
  (value: String | number, rule: RuleOrLiteral): PrecRule;

  /**
   * Marks the given rule as left-associative (and optionally applies a
   * numerical precedence). When an LR(1) conflict arises in which all of the
   * rules have the same numerical precedence, Tree-sitter will consult the
   * rules' associativity. If there is a left-associative rule, Tree-sitter
   * will prefer matching a rule that ends _earlier_. This works similarly to
   * associativity directives in Yacc grammars.
   *
   * @param value (optional) precedence weight
   * @param rule rule to mark as left-associative
   *
   * @see https://docs.oracle.com/cd/E19504-01/802-5880/6i9k05dh3/index.html
   */
  left(rule: RuleOrLiteral): PrecLeftRule;
  left(value: String | number, rule: RuleOrLiteral): PrecLeftRule;

  /**
   * Marks the given rule as right-associative (and optionally applies a
   * numerical precedence). When an LR(1) conflict arises in which all of the
   * rules have the same numerical precedence, Tree-sitter will consult the
   * rules' associativity. If there is a right-associative rule, Tree-sitter
   * will prefer matching a rule that ends _later_. This works similarly to
   * associativity directives in Yacc grammars.
   *
   * @param value (optional) precedence weight
   * @param rule rule to mark as right-associative
   *
   * @see https://docs.oracle.com/cd/E19504-01/802-5880/6i9k05dh3/index.html
   */
  right(rule: RuleOrLiteral): PrecRightRule;
  right(value: String | number, rule: RuleOrLiteral): PrecRightRule;

  /**
   * Marks the given rule with a numerical precedence which will be used to
   * resolve LR(1) conflicts at _runtime_ instead of parser-generation time.
   * This is only necessary when handling a conflict dynamically using the
   * `conflicts` field in the grammar, and when there is a genuine _ambiguity_:
   * multiple rules correctly match a given piece of code. In that event,
   * Tree-sitter compares the total dynamic precedence associated with each
   * rule, and selects the one with the highest total. This is similar to
   * dynamic precedence directives in Bison grammars.
   *
   * @param value precedence weight
   * @param rule rule being weighted
   *
   * @see https://www.gnu.org/software/bison/manual/html_node/Generalized-LR-Parsing.html
   */
  dynamic(value: String | number, rule: RuleOrLiteral): PrecDynamicRule;
};

/**
 * Creates a rule that matches _zero-or-more_ occurrences of a given rule.
 * It is analogous to the `{x}` (curly brace) syntax in EBNF notation. This
 * rule is implemented in terms of `repeat1` but is included because it
 * is very commonly used.
 *
 * @param rule rule to repeat, zero or more times
 */
declare function repeat(rule: RuleOrLiteral): RepeatRule;

/**
 * Creates a rule that matches one-or-more occurrences of a given rule.
 *
 * @param rule rule to repeat, one or more times
 */
declare function repeat1(rule: RuleOrLiteral): Repeat1Rule;

/**
 * Overrides the global reserved word set for a given rule. The word set name
 * should be defined in the `reserved` field in the grammar.
 *
 * @param wordset name of the reserved word set
 * @param rule rule that will use the reserved word set
 */
declare function reserved(wordset: string, rule: RuleOrLiteral): ReservedRule;

/**
 * Creates a rule that matches any number of other rules, one after another.
 * It is analogous to simply writing multiple symbols next to each other
 * in EBNF notation.
 *
 * @param rules ordered rules that comprise the sequence
 */
declare function seq(...rules: RuleOrLiteral[]): SeqRule;

/**
 * Creates a symbol rule, representing another rule in the grammar by name.
 *
 * @param name name of the target rule
 */
declare function sym<Name extends string>(name: Name): SymbolRule<Name>;

/**
 * Marks the given rule as producing only a single token. Tree-sitter's
 * default is to treat each String or RegExp literal in the grammar as a
 * separate token. Each token is matched separately by the lexer and
 * returned as its own leaf node in the tree. The token function allows
 * you to express a complex rule using the DSL functions (rather
 * than as a single regular expression) but still have Tree-sitter treat
 * it as a single token.
 *
 * @param rule rule to represent as a single token
 */
declare const token: {
  (rule: RuleOrLiteral): TokenRule;

  /**
   * Marks the given rule as producing an immediate token. This allows
   * the parser to produce a different token based on whether or not
   * there are `extras` preceding the token's main content. When there
   * are _no_ leading `extras`, an immediate token is preferred over a
   * normal token which would otherwise match.
   *
   * @param rule rule to represent as an immediate token
   */
  immediate(rule: RuleOrLiteral): ImmediateTokenRule;
};

/**
 * Creates a new language grammar with the provided schema.
 *
 * @param options grammar options
 */
declare function grammar<RuleName extends string>(
  options: Grammar<RuleName>,
): GrammarSchema<RuleName>;

/**
 * Extends an existing language grammar with the provided options,
 * creating a new language.
 *
 * @param baseGrammar base grammar schema to extend from
 * @param options grammar options for the new extended language
 */
declare function grammar<
  BaseGrammarRuleName extends string,
  RuleName extends string
>(
  baseGrammar: GrammarSchema<BaseGrammarRuleName>,
  options: Grammar<RuleName, BaseGrammarRuleName>,
): GrammarSchema<RuleName | BaseGrammarRuleName>;



================================================
FILE: crates/cli/npm/install.js
================================================
#!/usr/bin/env node

const fs = require('fs');
const zlib = require('zlib');
const http = require('http');
const https = require('https');
const packageJSON = require('./package.json');

https.globalAgent.keepAlive = false;

const matrix = {
  platform: {
    'darwin': {
      name: 'macos',
      arch: {
        'arm64': { name: 'arm64' },
        'x64': { name: 'x64' },
      }
    },
    'linux': {
      name: 'linux',
      arch: {
        'arm64': { name: 'arm64' },
        'arm': { name: 'arm' },
        'x64': { name: 'x64' },
        'x86': { name: 'x86' },
        'ppc64': { name: 'powerpc64' },
      }
    },
    'win32': {
      name: 'windows',
      arch: {
        'arm64': { name: 'arm64' },
        'x64': { name: 'x64' },
        'x86': { name: 'x86' },
        'ia32': { name: 'x86' },
      }
    },
  },
}

// Determine the URL of the file.
const platform = matrix.platform[process.platform];
const arch = platform?.arch[process.arch];

if (!platform || !platform.name || !arch || !arch.name) {
  console.error(
    `Cannot install tree-sitter-cli for platform ${process.platform}, architecture ${process.arch}`
  );
  process.exit(1);
}

const releaseURL = `https://github.com/tree-sitter/tree-sitter/releases/download/v${packageJSON.version}`;
const assetName = `tree-sitter-${platform.name}-${arch.name}.gz`;
const assetURL = `${releaseURL}/${assetName}`;

// Remove previously-downloaded files.
const executableName = process.platform === 'win32' ? 'tree-sitter.exe' : 'tree-sitter';
if (fs.existsSync(executableName)) {
  fs.unlinkSync(executableName);
}

// Download the compressed file.
console.log(`Downloading ${assetURL}`);
const file = fs.createWriteStream(executableName);
get(assetURL, response => {
  if (response.statusCode > 299) {
    console.error([
      'Download failed',
      '',
      `url: ${assetURL}`,
      `status: ${response.statusCode}`,
      `headers: ${JSON.stringify(response.headers, null, 2)}`,
      '',
    ].join('\n'));
    process.exit(1);
  }
  response.pipe(zlib.createGunzip()).pipe(file);
});

file.on('finish', () => {
  fs.chmodSync(executableName, '755');
});

// Follow redirects.
function get(url, callback) {
  const processResponse = (response) => {
    if (response.statusCode === 301 || response.statusCode === 302) {
      get(response.headers.location, callback);
    } else {
      callback(response);
    }
  };

  const proxyEnv = process.env.HTTPS_PROXY || process.env.https_proxy;
  if (!proxyEnv) {
    https.get(url, processResponse);
    return;
  }

  const requestUrl = new URL(url);
  const requestPort = requestUrl.port || (requestUrl.protocol === 'https:' ? 443 : 80);
  const proxyUrl = new URL(proxyEnv);
  const request = proxyUrl.protocol === 'https:' ? https : http;
  const requestOption = {
    host: proxyUrl.hostname,
    port: proxyUrl.port || (proxyUrl.protocol === 'https:' ? 443 : 80),
    method: 'CONNECT',
    path: `${requestUrl.hostname}:${requestPort}`,
  };
  if (proxyUrl.username || proxyUrl.password) {
    const auth = `${decodeURIComponent(
      proxyUrl.username
    )}:${decodeURIComponent(proxyUrl.password)}`;
    requestOption.headers = {
      'Proxy-Authorization': `Basic ${Buffer.from(
        auth
      ).toString('base64')}`,
    }
  }
  request.request(requestOption).on('connect', (response, socket, _head) => {
    if (response.statusCode !== 200) {
      // let caller handle error
      callback(response);
      return;
    }

    const agent = https.Agent({ socket });
    https.get({
      host: requestUrl.host,
      port: requestPort,
      path: `${requestUrl.pathname}${requestUrl.search}`,
      agent,
    }, processResponse);
  }).end();
}



================================================
FILE: crates/cli/npm/package.json
================================================
{
  "name": "tree-sitter-cli",
  "version": "0.26.0",
  "author": {
    "name": "Max Brunsfeld",
    "email": "maxbrunsfeld@gmail.com"
  },
  "maintainers": [
    {
      "name": "Amaan Qureshi",
      "email": "amaanq12@gmail.com"
    }
  ],
  "license": "MIT",
  "repository": {
    "type": "git",
    "url": "https://github.com/tree-sitter/tree-sitter.git"
  },
  "description": "CLI for generating fast incremental parsers",
  "keywords": [
    "parser",
    "lexer"
  ],
  "main": "lib/api/index.js",
  "engines": {
    "node": ">=12.0.0"
  },
  "scripts": {
    "install": "node install.js",
    "prepack": "cp ../../LICENSE ../README.md .",
    "postpack": "rm LICENSE README.md"
  },
  "bin": {
    "tree-sitter": "cli.js"
  }
}



================================================
FILE: crates/cli/src/fuzz.rs
================================================
use std::{
    collections::HashMap,
    env, fs,
    path::{Path, PathBuf},
    sync::LazyLock,
};

use rand::Rng;
use regex::Regex;
use tree_sitter::{Language, Parser};

pub mod allocations;
pub mod corpus_test;
pub mod edits;
pub mod random;
pub mod scope_sequence;

use crate::{
    fuzz::{
        corpus_test::{
            check_changed_ranges, check_consistent_sizes, get_parser, set_included_ranges,
        },
        edits::{get_random_edit, invert_edit},
        random::Rand,
    },
    parse::perform_edit,
    test::{parse_tests, print_diff, print_diff_key, strip_sexp_fields, TestEntry},
};

pub static LOG_ENABLED: LazyLock<bool> = LazyLock::new(|| env::var("TREE_SITTER_LOG").is_ok());

pub static LOG_GRAPH_ENABLED: LazyLock<bool> =
    LazyLock::new(|| env::var("TREE_SITTER_LOG_GRAPHS").is_ok());

pub static LANGUAGE_FILTER: LazyLock<Option<String>> =
    LazyLock::new(|| env::var("TREE_SITTER_LANGUAGE").ok());

pub static EXAMPLE_INCLUDE: LazyLock<Option<Regex>> =
    LazyLock::new(|| regex_env_var("TREE_SITTER_EXAMPLE_INCLUDE"));

pub static EXAMPLE_EXCLUDE: LazyLock<Option<Regex>> =
    LazyLock::new(|| regex_env_var("TREE_SITTER_EXAMPLE_EXCLUDE"));

pub static START_SEED: LazyLock<usize> = LazyLock::new(new_seed);

pub static EDIT_COUNT: LazyLock<usize> =
    LazyLock::new(|| int_env_var("TREE_SITTER_EDITS").unwrap_or(3));

pub static ITERATION_COUNT: LazyLock<usize> =
    LazyLock::new(|| int_env_var("TREE_SITTER_ITERATIONS").unwrap_or(10));

fn int_env_var(name: &'static str) -> Option<usize> {
    env::var(name).ok().and_then(|e| e.parse().ok())
}

fn regex_env_var(name: &'static str) -> Option<Regex> {
    env::var(name).ok().and_then(|e| Regex::new(&e).ok())
}

#[must_use]
pub fn new_seed() -> usize {
    int_env_var("TREE_SITTER_SEED").unwrap_or_else(|| {
        let mut rng = rand::thread_rng();
        let seed = rng.gen::<usize>();
        eprintln!("Seed: {seed}");
        seed
    })
}

pub struct FuzzOptions {
    pub skipped: Option<Vec<String>>,
    pub subdir: Option<PathBuf>,
    pub edits: usize,
    pub iterations: usize,
    pub include: Option<Regex>,
    pub exclude: Option<Regex>,
    pub log_graphs: bool,
    pub log: bool,
}

pub fn fuzz_language_corpus(
    language: &Language,
    language_name: &str,
    start_seed: usize,
    grammar_dir: &Path,
    options: &mut FuzzOptions,
) {
    fn retain(entry: &mut TestEntry, language_name: &str) -> bool {
        match entry {
            TestEntry::Example { attributes, .. } => {
                attributes.languages[0].is_empty()
                    || attributes
                        .languages
                        .iter()
                        .any(|lang| lang.as_ref() == language_name)
            }
            TestEntry::Group {
                ref mut children, ..
            } => {
                children.retain_mut(|child| retain(child, language_name));
                !children.is_empty()
            }
        }
    }

    let subdir = options.subdir.take().unwrap_or_default();

    let corpus_dir = grammar_dir.join(subdir).join("test").join("corpus");

    if !corpus_dir.exists() || !corpus_dir.is_dir() {
        eprintln!("No corpus directory found, ensure that you have a `test/corpus` directory in your grammar directory with at least one test file.");
        return;
    }

    if std::fs::read_dir(&corpus_dir).unwrap().count() == 0 {
        eprintln!("No corpus files found in `test/corpus`, ensure that you have at least one test file in your corpus directory.");
        return;
    }

    let mut main_tests = parse_tests(&corpus_dir).unwrap();
    match main_tests {
        TestEntry::Group {
            ref mut children, ..
        } => {
            children.retain_mut(|child| retain(child, language_name));
        }
        TestEntry::Example { .. } => unreachable!(),
    }
    let tests = flatten_tests(
        main_tests,
        options.include.as_ref(),
        options.exclude.as_ref(),
    );

    let get_test_name = |test: &FlattenedTest| format!("{language_name} - {}", test.name);

    let mut skipped = options
        .skipped
        .take()
        .unwrap_or_default()
        .into_iter()
        .chain(tests.iter().filter(|x| x.skip).map(get_test_name))
        .map(|x| (x, 0))
        .collect::<HashMap<String, usize>>();

    let mut failure_count = 0;

    let log_seed = env::var("TREE_SITTER_LOG_SEED").is_ok();
    let dump_edits = env::var("TREE_SITTER_DUMP_EDITS").is_ok();

    if log_seed {
        println!("  start seed: {start_seed}");
    }

    println!();
    for (test_index, test) in tests.iter().enumerate() {
        let test_name = get_test_name(test);
        if let Some(counter) = skipped.get_mut(test_name.as_str()) {
            println!("  {test_index}. {test_name} - SKIPPED");
            *counter += 1;
            continue;
        }

        println!("  {test_index}. {test_name}");

        let passed = allocations::record(|| {
            let mut log_session = None;
            let mut parser = get_parser(&mut log_session, "log.html");
            parser.set_language(language).unwrap();
            set_included_ranges(&mut parser, &test.input, test.template_delimiters);

            let tree = parser.parse(&test.input, None).unwrap();

            if test.error {
                return true;
            }

            let mut actual_output = tree.root_node().to_sexp();
            if !test.has_fields {
                actual_output = strip_sexp_fields(&actual_output);
            }

            if actual_output != test.output {
                println!("Incorrect initial parse for {test_name}");
                print_diff_key();
                print_diff(&actual_output, &test.output, true);
                println!();
                return false;
            }

            true
        })
        .unwrap_or_else(|e| {
            eprintln!("Error: {e}");
            false
        });

        if !passed {
            failure_count += 1;
            continue;
        }

        let mut parser = Parser::new();
        parser.set_language(language).unwrap();
        let tree = parser.parse(&test.input, None).unwrap();
        drop(parser);

        for trial in 0..options.iterations {
            let seed = start_seed + trial;
            let passed = allocations::record(|| {
                let mut rand = Rand::new(seed);
                let mut log_session = None;
                let mut parser = get_parser(&mut log_session, "log.html");
                parser.set_language(language).unwrap();
                let mut tree = tree.clone();
                let mut input = test.input.clone();

                if options.log_graphs {
                    eprintln!("{}\n", String::from_utf8_lossy(&input));
                }

                // Perform a random series of edits and reparse.
                let edit_count = rand.unsigned(*EDIT_COUNT);
                let mut undo_stack = Vec::with_capacity(edit_count);
                for _ in 0..=edit_count {
                    let edit = get_random_edit(&mut rand, &input);
                    undo_stack.push(invert_edit(&input, &edit));
                    perform_edit(&mut tree, &mut input, &edit).unwrap();
                }

                if log_seed {
                    println!("   {test_index}.{trial:<2} seed: {seed}");
                }

                if dump_edits {
                    fs::create_dir_all("fuzz").unwrap();
                    fs::write(
                        Path::new("fuzz")
                            .join(format!("edit.{seed}.{test_index}.{trial} {test_name}")),
                        &input,
                    )
                    .unwrap();
                }

                if options.log_graphs {
                    eprintln!("{}\n", String::from_utf8_lossy(&input));
                }

                set_included_ranges(&mut parser, &input, test.template_delimiters);
                let mut tree2 = parser.parse(&input, Some(&tree)).unwrap();

                // Check that the new tree is consistent.
                check_consistent_sizes(&tree2, &input);
                if let Err(message) = check_changed_ranges(&tree, &tree2, &input) {
                    println!("\nUnexpected scope change in seed {seed} with start seed {start_seed}\n{message}\n\n",);
                    return false;
                }

                // Undo all of the edits and re-parse again.
                while let Some(edit) = undo_stack.pop() {
                    perform_edit(&mut tree2, &mut input, &edit).unwrap();
                }
                if options.log_graphs {
                    eprintln!("{}\n", String::from_utf8_lossy(&input));
                }

                set_included_ranges(&mut parser, &test.input, test.template_delimiters);
                let tree3 = parser.parse(&input, Some(&tree2)).unwrap();

                // Verify that the final tree matches the expectation from the corpus.
                let mut actual_output = tree3.root_node().to_sexp();
                if !test.has_fields {
                    actual_output = strip_sexp_fields(&actual_output);
                }

                if actual_output != test.output && !test.error {
                    println!("Incorrect parse for {test_name} - seed {seed}");
                    print_diff_key();
                    print_diff(&actual_output, &test.output, true);
                    println!();
                    return false;
                }

                // Check that the edited tree is consistent.
                check_consistent_sizes(&tree3, &input);
                if let Err(message) = check_changed_ranges(&tree2, &tree3, &input) {
                    println!("Unexpected scope change in seed {seed} with start seed {start_seed}\n{message}\n\n");
                    return false;
                }

                true
            }).unwrap_or_else(|e| {
                eprintln!("Error: {e}");
                false
            });

            if !passed {
                failure_count += 1;
                break;
            }
        }
    }

    if failure_count != 0 {
        eprintln!("{failure_count} {language_name} corpus tests failed fuzzing");
    }

    skipped.retain(|_, v| *v == 0);

    if !skipped.is_empty() {
        println!("Non matchable skip definitions:");
        for k in skipped.keys() {
            println!("  {k}");
        }
        panic!("Non matchable skip definitions needs to be removed");
    }
}

pub struct FlattenedTest {
    pub name: String,
    pub input: Vec<u8>,
    pub output: String,
    pub languages: Vec<Box<str>>,
    pub error: bool,
    pub skip: bool,
    pub has_fields: bool,
    pub template_delimiters: Option<(&'static str, &'static str)>,
}

#[must_use]
pub fn flatten_tests(
    test: TestEntry,
    include: Option<&Regex>,
    exclude: Option<&Regex>,
) -> Vec<FlattenedTest> {
    fn helper(
        test: TestEntry,
        include: Option<&Regex>,
        exclude: Option<&Regex>,
        is_root: bool,
        prefix: &str,
        result: &mut Vec<FlattenedTest>,
    ) {
        match test {
            TestEntry::Example {
                mut name,
                input,
                output,
                has_fields,
                attributes,
                ..
            } => {
                if !prefix.is_empty() {
                    name.insert_str(0, " - ");
                    name.insert_str(0, prefix);
                }

                if let Some(include) = include {
                    if !include.is_match(&name) {
                        return;
                    }
                } else if let Some(exclude) = exclude {
                    if exclude.is_match(&name) {
                        return;
                    }
                }

                result.push(FlattenedTest {
                    name,
                    input,
                    output,
                    has_fields,
                    languages: attributes.languages,
                    error: attributes.error,
                    skip: attributes.skip,
                    template_delimiters: None,
                });
            }
            TestEntry::Group {
                mut name, children, ..
            } => {
                if !is_root && !prefix.is_empty() {
                    name.insert_str(0, " - ");
                    name.insert_str(0, prefix);
                }
                for child in children {
                    helper(child, include, exclude, false, &name, result);
                }
            }
        }
    }
    let mut result = Vec::new();
    helper(test, include, exclude, true, "", &mut result);
    result
}



================================================
FILE: crates/cli/src/highlight.rs
================================================
use std::{
    collections::{BTreeMap, HashSet},
    fmt::Write,
    fs,
    io::{self, Write as _},
    path::{self, Path, PathBuf},
    str,
    sync::{atomic::AtomicUsize, Arc},
    time::Instant,
};

use ansi_colours::{ansi256_from_rgb, rgb_from_ansi256};
use anstyle::{Ansi256Color, AnsiColor, Color, Effects, RgbColor};
use anyhow::Result;
use serde::{ser::SerializeMap, Deserialize, Deserializer, Serialize, Serializer};
use serde_json::{json, Value};
use tree_sitter_highlight::{HighlightConfiguration, HighlightEvent, Highlighter, HtmlRenderer};
use tree_sitter_loader::Loader;

pub const HTML_HEAD_HEADER: &str = "
<!doctype HTML>
<head>
  <title>Tree-sitter Highlighting</title>
  <style>
    body {
      font-family: monospace
    }
    .line-number {
      user-select: none;
      text-align: right;
      color: rgba(27,31,35,.3);
      padding: 0 10px;
    }
    .line {
      white-space: pre;
    }
  </style>";

pub const HTML_BODY_HEADER: &str = "
</head>
<body>
";

pub const HTML_FOOTER: &str = "
</body>
";

#[derive(Debug, Default)]
pub struct Style {
    pub ansi: anstyle::Style,
    pub css: Option<String>,
}

#[derive(Debug)]
pub struct Theme {
    pub styles: Vec<Style>,
    pub highlight_names: Vec<String>,
}

#[derive(Default, Deserialize, Serialize)]
pub struct ThemeConfig {
    #[serde(default)]
    pub theme: Theme,
}

impl Theme {
    pub fn load(path: &path::Path) -> io::Result<Self> {
        let json = fs::read_to_string(path)?;
        Ok(serde_json::from_str(&json).unwrap_or_default())
    }

    #[must_use]
    pub fn default_style(&self) -> Style {
        Style::default()
    }
}

impl<'de> Deserialize<'de> for Theme {
    fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
    where
        D: Deserializer<'de>,
    {
        let mut styles = Vec::new();
        let mut highlight_names = Vec::new();
        if let Ok(colors) = BTreeMap::<String, Value>::deserialize(deserializer) {
            styles.reserve(colors.len());
            highlight_names.reserve(colors.len());
            for (name, style_value) in colors {
                let mut style = Style::default();
                parse_style(&mut style, style_value);
                highlight_names.push(name);
                styles.push(style);
            }
        }
        Ok(Self {
            styles,
            highlight_names,
        })
    }
}

impl Serialize for Theme {
    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
    where
        S: Serializer,
    {
        let mut map = serializer.serialize_map(Some(self.styles.len()))?;
        for (name, style) in self.highlight_names.iter().zip(&self.styles) {
            let style = &style.ansi;
            let color = style.get_fg_color().map(|color| match color {
                Color::Ansi(color) => match color {
                    AnsiColor::Black => json!("black"),
                    AnsiColor::Blue => json!("blue"),
                    AnsiColor::Cyan => json!("cyan"),
                    AnsiColor::Green => json!("green"),
                    AnsiColor::Magenta => json!("purple"),
                    AnsiColor::Red => json!("red"),
                    AnsiColor::White => json!("white"),
                    AnsiColor::Yellow => json!("yellow"),
                    _ => unreachable!(),
                },
                Color::Ansi256(Ansi256Color(n)) => json!(n),
                Color::Rgb(RgbColor(r, g, b)) => json!(format!("#{r:x?}{g:x?}{b:x?}")),
            });
            let effects = style.get_effects();
            if effects.contains(Effects::BOLD)
                || effects.contains(Effects::ITALIC)
                || effects.contains(Effects::UNDERLINE)
            {
                let mut style_json = BTreeMap::new();
                if let Some(color) = color {
                    style_json.insert("color", color);
                }
                if effects.contains(Effects::BOLD) {
                    style_json.insert("bold", Value::Bool(true));
                }
                if effects.contains(Effects::ITALIC) {
                    style_json.insert("italic", Value::Bool(true));
                }
                if effects.contains(Effects::UNDERLINE) {
                    style_json.insert("underline", Value::Bool(true));
                }
                map.serialize_entry(&name, &style_json)?;
            } else if let Some(color) = color {
                map.serialize_entry(&name, &color)?;
            } else {
                map.serialize_entry(&name, &Value::Null)?;
            }
        }
        map.end()
    }
}

impl Default for Theme {
    fn default() -> Self {
        serde_json::from_value(json!({
            "attribute": {"color": 124, "italic": true},
            "comment": {"color": 245, "italic": true},
            "constant": 94,
            "constant.builtin": {"color": 94, "bold": true},
            "constructor": 136,
            "embedded": null,
            "function": 26,
            "function.builtin": {"color": 26, "bold": true},
            "keyword": 56,
            "module": 136,
            "number": {"color": 94, "bold": true},
            "operator": {"color": 239, "bold": true},
            "property": 124,
            "property.builtin": {"color": 124, "bold": true},
            "punctuation": 239,
            "punctuation.bracket": 239,
            "punctuation.delimiter": 239,
            "punctuation.special": 239,
            "string": 28,
            "string.special": 30,
            "tag": 18,
            "type": 23,
            "type.builtin": {"color": 23, "bold": true},
            "variable": 252,
            "variable.builtin": {"color": 252, "bold": true},
            "variable.parameter": {"color": 252, "underline": true}
        }))
        .unwrap()
    }
}

fn parse_style(style: &mut Style, json: Value) {
    if let Value::Object(entries) = json {
        for (property_name, value) in entries {
            match property_name.as_str() {
                "bold" => {
                    if value == Value::Bool(true) {
                        style.ansi = style.ansi.bold();
                    }
                }
                "italic" => {
                    if value == Value::Bool(true) {
                        style.ansi = style.ansi.italic();
                    }
                }
                "underline" => {
                    if value == Value::Bool(true) {
                        style.ansi = style.ansi.underline();
                    }
                }
                "color" => {
                    if let Some(color) = parse_color(value) {
                        style.ansi = style.ansi.fg_color(Some(color));
                    }
                }
                _ => {}
            }
        }
        style.css = Some(style_to_css(style.ansi));
    } else if let Some(color) = parse_color(json) {
        style.ansi = style.ansi.fg_color(Some(color));
        style.css = Some(style_to_css(style.ansi));
    } else {
        style.css = None;
    }

    if let Some(Color::Rgb(RgbColor(red, green, blue))) = style.ansi.get_fg_color() {
        if !terminal_supports_truecolor() {
            let ansi256 = Color::Ansi256(Ansi256Color(ansi256_from_rgb((red, green, blue))));
            style.ansi = style.ansi.fg_color(Some(ansi256));
        }
    }
}

fn parse_color(json: Value) -> Option<Color> {
    match json {
        Value::Number(n) => n.as_u64().map(|n| Color::Ansi256(Ansi256Color(n as u8))),
        Value::String(s) => match s.to_lowercase().as_str() {
            "black" => Some(Color::Ansi(AnsiColor::Black)),
            "blue" => Some(Color::Ansi(AnsiColor::Blue)),
            "cyan" => Some(Color::Ansi(AnsiColor::Cyan)),
            "green" => Some(Color::Ansi(AnsiColor::Green)),
            "purple" => Some(Color::Ansi(AnsiColor::Magenta)),
            "red" => Some(Color::Ansi(AnsiColor::Red)),
            "white" => Some(Color::Ansi(AnsiColor::White)),
            "yellow" => Some(Color::Ansi(AnsiColor::Yellow)),
            s => {
                if let Some((red, green, blue)) = hex_string_to_rgb(s) {
                    Some(Color::Rgb(RgbColor(red, green, blue)))
                } else {
                    None
                }
            }
        },
        _ => None,
    }
}

fn hex_string_to_rgb(s: &str) -> Option<(u8, u8, u8)> {
    if s.starts_with('#') && s.len() >= 7 {
        if let (Ok(red), Ok(green), Ok(blue)) = (
            u8::from_str_radix(&s[1..3], 16),
            u8::from_str_radix(&s[3..5], 16),
            u8::from_str_radix(&s[5..7], 16),
        ) {
            Some((red, green, blue))
        } else {
            None
        }
    } else {
        None
    }
}

fn style_to_css(style: anstyle::Style) -> String {
    let mut result = String::new();
    let effects = style.get_effects();
    if effects.contains(Effects::UNDERLINE) {
        write!(&mut result, "text-decoration: underline;").unwrap();
    }
    if effects.contains(Effects::BOLD) {
        write!(&mut result, "font-weight: bold;").unwrap();
    }
    if effects.contains(Effects::ITALIC) {
        write!(&mut result, "font-style: italic;").unwrap();
    }
    if let Some(color) = style.get_fg_color() {
        write_color(&mut result, color);
    }
    result
}

fn write_color(buffer: &mut String, color: Color) {
    match color {
        Color::Ansi(color) => match color {
            AnsiColor::Black => write!(buffer, "color: black").unwrap(),
            AnsiColor::Red => write!(buffer, "color: red").unwrap(),
            AnsiColor::Green => write!(buffer, "color: green").unwrap(),
            AnsiColor::Yellow => write!(buffer, "color: yellow").unwrap(),
            AnsiColor::Blue => write!(buffer, "color: blue").unwrap(),
            AnsiColor::Magenta => write!(buffer, "color: purple").unwrap(),
            AnsiColor::Cyan => write!(buffer, "color: cyan").unwrap(),
            AnsiColor::White => write!(buffer, "color: white").unwrap(),
            _ => unreachable!(),
        },
        Color::Ansi256(Ansi256Color(n)) => {
            let (r, g, b) = rgb_from_ansi256(n);
            write!(buffer, "color: #{r:02x}{g:02x}{b:02x}").unwrap();
        }
        Color::Rgb(RgbColor(r, g, b)) => write!(buffer, "color: #{r:02x}{g:02x}{b:02x}").unwrap(),
    }
}

fn terminal_supports_truecolor() -> bool {
    std::env::var("COLORTERM")
        .is_ok_and(|truecolor| truecolor == "truecolor" || truecolor == "24bit")
}

pub struct HighlightOptions {
    pub theme: Theme,
    pub check: bool,
    pub captures_path: Option<PathBuf>,
    pub inline_styles: bool,
    pub html: bool,
    pub quiet: bool,
    pub print_time: bool,
    pub cancellation_flag: Arc<AtomicUsize>,
}

pub fn highlight(
    loader: &Loader,
    path: &Path,
    name: &str,
    config: &HighlightConfiguration,
    print_name: bool,
    opts: &HighlightOptions,
) -> Result<()> {
    if opts.check {
        let names = if let Some(path) = opts.captures_path.as_deref() {
            let file = fs::read_to_string(path)?;
            let capture_names = file
                .lines()
                .filter_map(|line| {
                    if line.trim().is_empty() || line.trim().starts_with(';') {
                        return None;
                    }
                    line.split(';').next().map(|s| s.trim().trim_matches('"'))
                })
                .collect::<HashSet<_>>();
            config.nonconformant_capture_names(&capture_names)
        } else {
            config.nonconformant_capture_names(&HashSet::new())
        };
        if names.is_empty() {
            eprintln!("All highlight captures conform to standards.");
        } else {
            eprintln!(
                "Non-standard highlight {} detected:",
                if names.len() > 1 {
                    "captures"
                } else {
                    "capture"
                }
            );
            for name in names {
                eprintln!("* {name}");
            }
        }
    }

    let source = fs::read(path)?;
    let stdout = io::stdout();
    let mut stdout = stdout.lock();
    let time = Instant::now();
    let mut highlighter = Highlighter::new();
    let events =
        highlighter.highlight(config, &source, Some(&opts.cancellation_flag), |string| {
            loader.highlight_config_for_injection_string(string)
        })?;
    let theme = &opts.theme;

    if !opts.quiet && print_name {
        writeln!(&mut stdout, "{name}")?;
    }

    if opts.html {
        if !opts.quiet {
            writeln!(&mut stdout, "{HTML_HEAD_HEADER}")?;
            writeln!(&mut stdout, "  <style>")?;
            let names = theme.highlight_names.iter();
            let styles = theme.styles.iter();
            for (name, style) in names.zip(styles) {
                if let Some(css) = &style.css {
                    writeln!(&mut stdout, "    .{name} {{ {css}; }}")?;
                }
            }
            writeln!(&mut stdout, "  </style>")?;
            writeln!(&mut stdout, "{HTML_BODY_HEADER}")?;
        }

        let mut renderer = HtmlRenderer::new();
        renderer.render(events, &source, &move |highlight, output| {
            if opts.inline_styles {
                output.extend(b"style='");
                output.extend(
                    theme.styles[highlight.0]
                        .css
                        .as_ref()
                        .map_or_else(|| "".as_bytes(), |css_style| css_style.as_bytes()),
                );
                output.extend(b"'");
            } else {
                output.extend(b"class='");
                let mut parts = theme.highlight_names[highlight.0].split('.').peekable();
                while let Some(part) = parts.next() {
                    output.extend(part.as_bytes());
                    if parts.peek().is_some() {
                        output.extend(b" ");
                    }
                }
                output.extend(b"'");
            }
        })?;

        if !opts.quiet {
            writeln!(&mut stdout, "<table>")?;
            for (i, line) in renderer.lines().enumerate() {
                writeln!(
                    &mut stdout,
                    "<tr><td class=line-number>{}</td><td class=line>{line}</td></tr>",
                    i + 1,
                )?;
            }
            writeln!(&mut stdout, "</table>")?;
            writeln!(&mut stdout, "{HTML_FOOTER}")?;
        }
    } else {
        let mut style_stack = vec![theme.default_style().ansi];
        for event in events {
            match event? {
                HighlightEvent::HighlightStart(highlight) => {
                    style_stack.push(theme.styles[highlight.0].ansi);
                }
                HighlightEvent::HighlightEnd => {
                    style_stack.pop();
                }
                HighlightEvent::Source { start, end } => {
                    let style = style_stack.last().unwrap();
                    write!(&mut stdout, "{style}").unwrap();
                    stdout.write_all(&source[start..end])?;
                    write!(&mut stdout, "{style:#}").unwrap();
                }
            }
        }
    }

    if opts.print_time {
        eprintln!("Time: {}ms", time.elapsed().as_millis());
    }

    Ok(())
}

#[cfg(test)]
mod tests {
    use std::env;

    use super::*;

    const JUNGLE_GREEN: &str = "#26A69A";
    const DARK_CYAN: &str = "#00AF87";

    #[test]
    fn test_parse_style() {
        let original_environment_variable = env::var("COLORTERM");

        let mut style = Style::default();
        assert_eq!(style.ansi.get_fg_color(), None);
        assert_eq!(style.css, None);

        // darkcyan is an ANSI color and is preserved
        env::set_var("COLORTERM", "");
        parse_style(&mut style, Value::String(DARK_CYAN.to_string()));
        assert_eq!(
            style.ansi.get_fg_color(),
            Some(Color::Ansi256(Ansi256Color(36)))
        );
        assert_eq!(style.css, Some("color: #00af87".to_string()));

        // junglegreen is not an ANSI color and is preserved when the terminal supports it
        env::set_var("COLORTERM", "truecolor");
        parse_style(&mut style, Value::String(JUNGLE_GREEN.to_string()));
        assert_eq!(
            style.ansi.get_fg_color(),
            Some(Color::Rgb(RgbColor(38, 166, 154)))
        );
        assert_eq!(style.css, Some("color: #26a69a".to_string()));

        // junglegreen gets approximated as cadetblue when the terminal does not support it
        env::set_var("COLORTERM", "");
        parse_style(&mut style, Value::String(JUNGLE_GREEN.to_string()));
        assert_eq!(
            style.ansi.get_fg_color(),
            Some(Color::Ansi256(Ansi256Color(72)))
        );
        assert_eq!(style.css, Some("color: #26a69a".to_string()));

        if let Ok(environment_variable) = original_environment_variable {
            env::set_var("COLORTERM", environment_variable);
        } else {
            env::remove_var("COLORTERM");
        }
    }
}



================================================
FILE: crates/cli/src/init.rs
================================================
use std::{
    fs,
    path::{Path, PathBuf},
    str::{self, FromStr},
};

use anyhow::{anyhow, Context, Result};
use heck::{ToKebabCase, ToShoutySnakeCase, ToSnakeCase, ToUpperCamelCase};
use indoc::{formatdoc, indoc};
use semver::Version;
use serde::{Deserialize, Serialize};
use serde_json::{Map, Value};
use tree_sitter_generate::write_file;
use tree_sitter_loader::{Author, Bindings, Grammar, Links, Metadata, PathsJSON, TreeSitterJSON};
use url::Url;

const CLI_VERSION: &str = env!("CARGO_PKG_VERSION");
const CLI_VERSION_PLACEHOLDER: &str = "CLI_VERSION";

const ABI_VERSION_MAX: usize = tree_sitter::LANGUAGE_VERSION;
const ABI_VERSION_MAX_PLACEHOLDER: &str = "ABI_VERSION_MAX";

const PARSER_NAME_PLACEHOLDER: &str = "PARSER_NAME";
const CAMEL_PARSER_NAME_PLACEHOLDER: &str = "CAMEL_PARSER_NAME";
const TITLE_PARSER_NAME_PLACEHOLDER: &str = "TITLE_PARSER_NAME";
const UPPER_PARSER_NAME_PLACEHOLDER: &str = "UPPER_PARSER_NAME";
const LOWER_PARSER_NAME_PLACEHOLDER: &str = "LOWER_PARSER_NAME";
const KEBAB_PARSER_NAME_PLACEHOLDER: &str = "KEBAB_PARSER_NAME";
const PARSER_CLASS_NAME_PLACEHOLDER: &str = "PARSER_CLASS_NAME";

const PARSER_DESCRIPTION_PLACEHOLDER: &str = "PARSER_DESCRIPTION";
const PARSER_LICENSE_PLACEHOLDER: &str = "PARSER_LICENSE";
const PARSER_URL_PLACEHOLDER: &str = "PARSER_URL";
const PARSER_URL_STRIPPED_PLACEHOLDER: &str = "PARSER_URL_STRIPPED";
const PARSER_VERSION_PLACEHOLDER: &str = "PARSER_VERSION";

const AUTHOR_NAME_PLACEHOLDER: &str = "PARSER_AUTHOR_NAME";
const AUTHOR_EMAIL_PLACEHOLDER: &str = "PARSER_AUTHOR_EMAIL";
const AUTHOR_URL_PLACEHOLDER: &str = "PARSER_AUTHOR_URL";

const AUTHOR_BLOCK_JS: &str = "\n  \"author\": {";
const AUTHOR_NAME_PLACEHOLDER_JS: &str = "\n    \"name\": \"PARSER_AUTHOR_NAME\",";
const AUTHOR_EMAIL_PLACEHOLDER_JS: &str = ",\n    \"email\": \"PARSER_AUTHOR_EMAIL\"";
const AUTHOR_URL_PLACEHOLDER_JS: &str = ",\n    \"url\": \"PARSER_AUTHOR_URL\"";

const AUTHOR_BLOCK_PY: &str = "\nauthors = [{";
const AUTHOR_NAME_PLACEHOLDER_PY: &str = "name = \"PARSER_AUTHOR_NAME\"";
const AUTHOR_EMAIL_PLACEHOLDER_PY: &str = ", email = \"PARSER_AUTHOR_EMAIL\"";

const AUTHOR_BLOCK_RS: &str = "\nauthors = [";
const AUTHOR_NAME_PLACEHOLDER_RS: &str = "PARSER_AUTHOR_NAME";
const AUTHOR_EMAIL_PLACEHOLDER_RS: &str = " PARSER_AUTHOR_EMAIL";

const AUTHOR_BLOCK_GRAMMAR: &str = "\n * @author ";
const AUTHOR_NAME_PLACEHOLDER_GRAMMAR: &str = "PARSER_AUTHOR_NAME";
const AUTHOR_EMAIL_PLACEHOLDER_GRAMMAR: &str = " PARSER_AUTHOR_EMAIL";

const FUNDING_URL_PLACEHOLDER: &str = "FUNDING_URL";

const GRAMMAR_JS_TEMPLATE: &str = include_str!("./templates/grammar.js");
const PACKAGE_JSON_TEMPLATE: &str = include_str!("./templates/package.json");
const GITIGNORE_TEMPLATE: &str = include_str!("./templates/gitignore");
const GITATTRIBUTES_TEMPLATE: &str = include_str!("./templates/gitattributes");
const EDITORCONFIG_TEMPLATE: &str = include_str!("./templates/.editorconfig");

const RUST_BINDING_VERSION: &str = env!("CARGO_PKG_VERSION");
const RUST_BINDING_VERSION_PLACEHOLDER: &str = "RUST_BINDING_VERSION";

const LIB_RS_TEMPLATE: &str = include_str!("./templates/lib.rs");
const BUILD_RS_TEMPLATE: &str = include_str!("./templates/build.rs");
const CARGO_TOML_TEMPLATE: &str = include_str!("./templates/_cargo.toml");

const INDEX_JS_TEMPLATE: &str = include_str!("./templates/index.js");
const INDEX_D_TS_TEMPLATE: &str = include_str!("./templates/index.d.ts");
const JS_BINDING_CC_TEMPLATE: &str = include_str!("./templates/js-binding.cc");
const BINDING_GYP_TEMPLATE: &str = include_str!("./templates/binding.gyp");
const BINDING_TEST_JS_TEMPLATE: &str = include_str!("./templates/binding_test.js");

const MAKEFILE_TEMPLATE: &str = include_str!("./templates/makefile");
const CMAKELISTS_TXT_TEMPLATE: &str = include_str!("./templates/cmakelists.cmake");
const PARSER_NAME_H_TEMPLATE: &str = include_str!("./templates/PARSER_NAME.h");
const PARSER_NAME_PC_IN_TEMPLATE: &str = include_str!("./templates/PARSER_NAME.pc.in");

const GO_MOD_TEMPLATE: &str = include_str!("./templates/go.mod");
const BINDING_GO_TEMPLATE: &str = include_str!("./templates/binding.go");
const BINDING_TEST_GO_TEMPLATE: &str = include_str!("./templates/binding_test.go");

const SETUP_PY_TEMPLATE: &str = include_str!("./templates/setup.py");
const INIT_PY_TEMPLATE: &str = include_str!("./templates/__init__.py");
const INIT_PYI_TEMPLATE: &str = include_str!("./templates/__init__.pyi");
const PYPROJECT_TOML_TEMPLATE: &str = include_str!("./templates/pyproject.toml");
const PY_BINDING_C_TEMPLATE: &str = include_str!("./templates/py-binding.c");
const TEST_BINDING_PY_TEMPLATE: &str = include_str!("./templates/test_binding.py");

const PACKAGE_SWIFT_TEMPLATE: &str = include_str!("./templates/package.swift");
const TESTS_SWIFT_TEMPLATE: &str = include_str!("./templates/tests.swift");

const BUILD_ZIG_TEMPLATE: &str = include_str!("./templates/build.zig");
const BUILD_ZIG_ZON_TEMPLATE: &str = include_str!("./templates/build.zig.zon");
const ROOT_ZIG_TEMPLATE: &str = include_str!("./templates/root.zig");

const TREE_SITTER_JSON_SCHEMA: &str =
    "https://tree-sitter.github.io/tree-sitter/assets/schemas/config.schema.json";

#[must_use]
pub fn path_in_ignore(repo_path: &Path) -> bool {
    [
        "bindings",
        "build",
        "examples",
        "node_modules",
        "queries",
        "script",
        "src",
        "target",
        "test",
        "types",
    ]
    .iter()
    .any(|dir| repo_path.ends_with(dir))
}

#[derive(Serialize, Deserialize, Clone)]
pub struct JsonConfigOpts {
    pub name: String,
    pub camelcase: String,
    pub title: String,
    pub description: String,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub repository: Option<Url>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub funding: Option<Url>,
    pub scope: String,
    pub file_types: Vec<String>,
    pub version: Version,
    pub license: String,
    pub author: String,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub email: Option<String>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub url: Option<Url>,
    pub bindings: Bindings,
}

impl JsonConfigOpts {
    #[must_use]
    pub fn to_tree_sitter_json(self) -> TreeSitterJSON {
        TreeSitterJSON {
            schema: Some(TREE_SITTER_JSON_SCHEMA.to_string()),
            grammars: vec![Grammar {
                name: self.name.clone(),
                camelcase: Some(self.camelcase),
                title: Some(self.title),
                scope: self.scope,
                path: None,
                external_files: PathsJSON::Empty,
                file_types: Some(self.file_types),
                highlights: PathsJSON::Empty,
                injections: PathsJSON::Empty,
                locals: PathsJSON::Empty,
                tags: PathsJSON::Empty,
                injection_regex: Some(format!("^{}$", self.name)),
                first_line_regex: None,
                content_regex: None,
                class_name: Some(format!("TreeSitter{}", self.name.to_upper_camel_case())),
            }],
            metadata: Metadata {
                version: self.version,
                license: Some(self.license),
                description: Some(self.description),
                authors: Some(vec![Author {
                    name: self.author,
                    email: self.email,
                    url: self.url.map(|url| url.to_string()),
                }]),
                links: Some(Links {
                    repository: self.repository.unwrap_or_else(|| {
                        Url::parse(&format!(
                            "https://github.com/tree-sitter/tree-sitter-{}",
                            self.name
                        ))
                        .expect("Failed to parse default repository URL")
                    }),
                    funding: self.funding,
                    homepage: None,
                }),
                namespace: None,
            },
            bindings: self.bindings,
        }
    }
}

impl Default for JsonConfigOpts {
    fn default() -> Self {
        Self {
            name: String::new(),
            camelcase: String::new(),
            title: String::new(),
            description: String::new(),
            repository: None,
            funding: None,
            scope: String::new(),
            file_types: vec![],
            version: Version::from_str("0.1.0").unwrap(),
            license: String::new(),
            author: String::new(),
            email: None,
            url: None,
            bindings: Bindings::default(),
        }
    }
}

struct GenerateOpts<'a> {
    author_name: Option<&'a str>,
    author_email: Option<&'a str>,
    author_url: Option<&'a str>,
    license: Option<&'a str>,
    description: Option<&'a str>,
    repository: Option<&'a str>,
    funding: Option<&'a str>,
    version: &'a Version,
    camel_parser_name: &'a str,
    title_parser_name: &'a str,
    class_name: &'a str,
}

pub fn generate_grammar_files(
    repo_path: &Path,
    language_name: &str,
    allow_update: bool,
    opts: Option<&JsonConfigOpts>,
) -> Result<()> {
    let dashed_language_name = language_name.to_kebab_case();

    let tree_sitter_config = missing_path_else(
        repo_path.join("tree-sitter.json"),
        true,
        |path| {
            // invariant: opts is always Some when `tree-sitter.json` doesn't exist
            let Some(opts) = opts else { unreachable!() };

            let tree_sitter_json = opts.clone().to_tree_sitter_json();
            write_file(path, serde_json::to_string_pretty(&tree_sitter_json)?)?;
            Ok(())
        },
        |path| {
            // updating the config, if needed
            if let Some(opts) = opts {
                let tree_sitter_json = opts.clone().to_tree_sitter_json();
                write_file(path, serde_json::to_string_pretty(&tree_sitter_json)?)?;
            }
            Ok(())
        },
    )?;

    let tree_sitter_config = serde_json::from_str::<TreeSitterJSON>(
        &fs::read_to_string(tree_sitter_config.as_path())
            .with_context(|| "Failed to read tree-sitter.json")?,
    )?;

    let authors = tree_sitter_config.metadata.authors.as_ref();
    let camel_name = tree_sitter_config.grammars[0]
        .camelcase
        .clone()
        .unwrap_or_else(|| language_name.to_upper_camel_case());
    let title_name = tree_sitter_config.grammars[0]
        .title
        .clone()
        .unwrap_or_else(|| language_name.to_upper_camel_case());
    let class_name = tree_sitter_config.grammars[0]
        .class_name
        .clone()
        .unwrap_or_else(|| format!("TreeSitter{}", language_name.to_upper_camel_case()));

    let generate_opts = GenerateOpts {
        author_name: authors
            .map(|a| a.first().map(|a| a.name.as_str()))
            .unwrap_or_default(),
        author_email: authors
            .map(|a| a.first().and_then(|a| a.email.as_deref()))
            .unwrap_or_default(),
        author_url: authors
            .map(|a| a.first().and_then(|a| a.url.as_deref()))
            .unwrap_or_default(),
        license: tree_sitter_config.metadata.license.as_deref(),
        description: tree_sitter_config.metadata.description.as_deref(),
        repository: tree_sitter_config
            .metadata
            .links
            .as_ref()
            .map(|l| l.repository.as_str()),
        funding: tree_sitter_config
            .metadata
            .links
            .as_ref()
            .and_then(|l| l.funding.as_ref().map(|f| f.as_str())),
        version: &tree_sitter_config.metadata.version,
        camel_parser_name: &camel_name,
        title_parser_name: &title_name,
        class_name: &class_name,
    };

    // Create package.json
    missing_path(repo_path.join("package.json"), |path| {
        generate_file(
            path,
            PACKAGE_JSON_TEMPLATE,
            dashed_language_name.as_str(),
            &generate_opts,
        )
    })?;

    // Do not create a grammar.js file in a repo with multiple language configs
    if !tree_sitter_config.has_multiple_language_configs() {
        missing_path(repo_path.join("grammar.js"), |path| {
            generate_file(path, GRAMMAR_JS_TEMPLATE, language_name, &generate_opts)
        })?;
    }

    // Write .gitignore file
    missing_path_else(
        repo_path.join(".gitignore"),
        allow_update,
        |path| generate_file(path, GITIGNORE_TEMPLATE, language_name, &generate_opts),
        |path| {
            let contents = fs::read_to_string(path)?;
            if !contents.contains("Zig artifacts") {
                eprintln!("Replacing .gitignore");
                generate_file(path, GITIGNORE_TEMPLATE, language_name, &generate_opts)?;
            }
            Ok(())
        },
    )?;

    // Write .gitattributes file
    missing_path_else(
        repo_path.join(".gitattributes"),
        allow_update,
        |path| generate_file(path, GITATTRIBUTES_TEMPLATE, language_name, &generate_opts),
        |path| {
            let mut contents = fs::read_to_string(path)?;
            contents = contents.replace("bindings/c/* ", "bindings/c/** ");
            if !contents.contains("Zig bindings") {
                contents.push('\n');
                contents.push_str(indoc! {"
                # Zig bindings
                build.zig linguist-generated
                build.zig.zon linguist-generated
                "});
            }
            write_file(path, contents)?;
            Ok(())
        },
    )?;

    // Write .editorconfig file
    missing_path(repo_path.join(".editorconfig"), |path| {
        generate_file(path, EDITORCONFIG_TEMPLATE, language_name, &generate_opts)
    })?;

    let bindings_dir = repo_path.join("bindings");

    // Generate Rust bindings
    if tree_sitter_config.bindings.rust {
        missing_path(bindings_dir.join("rust"), create_dir)?.apply(|path| {
            missing_path(path.join("lib.rs"), |path| {
                generate_file(path, LIB_RS_TEMPLATE, language_name, &generate_opts)
            })?;

            missing_path(path.join("build.rs"), |path| {
                generate_file(path, BUILD_RS_TEMPLATE, language_name, &generate_opts)
            })?;

            missing_path_else(
                repo_path.join("Cargo.toml"),
                allow_update,
                |path| {
                    generate_file(
                        path,
                        CARGO_TOML_TEMPLATE,
                        dashed_language_name.as_str(),
                        &generate_opts,
                    )
                },
                |path| {
                    let contents = fs::read_to_string(path)?;
                    if contents.contains("\"LICENSE\"") {
                        write_file(path, contents.replace("\"LICENSE\"", "\"/LICENSE\""))?;
                    }
                    Ok(())
                },
            )?;

            Ok(())
        })?;
    }

    // Generate Node bindings
    if tree_sitter_config.bindings.node {
        missing_path(bindings_dir.join("node"), create_dir)?.apply(|path| {
            missing_path_else(
                path.join("index.js"),
                allow_update,
                |path| generate_file(path, INDEX_JS_TEMPLATE, language_name, &generate_opts),
                |path| {
                    let contents = fs::read_to_string(path)?;
                    if !contents.contains("bun") {
                        generate_file(path, INDEX_JS_TEMPLATE, language_name, &generate_opts)?;
                    }
                    Ok(())
                },
            )?;

            missing_path(path.join("index.d.ts"), |path| {
                generate_file(path, INDEX_D_TS_TEMPLATE, language_name, &generate_opts)
            })?;

            missing_path(path.join("binding_test.js"), |path| {
                generate_file(
                    path,
                    BINDING_TEST_JS_TEMPLATE,
                    language_name,
                    &generate_opts,
                )
            })?;

            missing_path(path.join("binding.cc"), |path| {
                generate_file(path, JS_BINDING_CC_TEMPLATE, language_name, &generate_opts)
            })?;

            missing_path_else(
                repo_path.join("binding.gyp"),
                allow_update,
                |path| generate_file(path, BINDING_GYP_TEMPLATE, language_name, &generate_opts),
                |path| {
                    let contents = fs::read_to_string(path)?;
                    if contents.contains("fs.exists(") {
                        write_file(path, contents.replace("fs.exists(", "fs.existsSync("))?;
                    }
                    Ok(())
                },
            )?;

            Ok(())
        })?;
    }

    // Generate C bindings
    if tree_sitter_config.bindings.c {
        missing_path(bindings_dir.join("c"), create_dir)?.apply(|path| {
            let old_file = &path.join(format!("tree-sitter-{}.h", language_name.to_kebab_case()));
            if allow_update && fs::exists(old_file).unwrap_or(false) {
                fs::remove_file(old_file)?;
            }
            missing_path(path.join("tree_sitter"), create_dir)?.apply(|include_path| {
                missing_path(
                    include_path.join(format!("tree-sitter-{}.h", language_name.to_kebab_case())),
                    |path| {
                        generate_file(path, PARSER_NAME_H_TEMPLATE, language_name, &generate_opts)
                    },
                )?;
                Ok(())
            })?;

            missing_path(
                path.join(format!("tree-sitter-{}.pc.in", language_name.to_kebab_case())),
                |path| {
                    generate_file(
                        path,
                        PARSER_NAME_PC_IN_TEMPLATE,
                        language_name,
                        &generate_opts,
                    )
                },
            )?;

            missing_path_else(
                repo_path.join("Makefile"),
                allow_update,
                |path| {
                    generate_file(path, MAKEFILE_TEMPLATE, language_name, &generate_opts)
                },
                |path| {
                    let contents = fs::read_to_string(path)?;
                    if !contents.contains("cd '$(DESTDIR)$(LIBDIR)' && ln -sf") {
                        eprintln!("Replacing Makefile");
                        generate_file(path, MAKEFILE_TEMPLATE, language_name, &generate_opts)?;
                    } else {
                        let contents = contents
                            .replace(
                                indoc! {r"
                                $(PARSER): $(SRC_DIR)/grammar.json
                                	$(TS) generate $^
                                "},
                                indoc! {r"
                                $(SRC_DIR)/grammar.json: grammar.js
                                	$(TS) generate --stage=json $^

                                $(PARSER): $(SRC_DIR)/grammar.json
                                	$(TS) generate --stage=parser $^
                                "}
                            );
                        write_file(path, contents)?;
                    }
                    Ok(())
                },
            )?;

            missing_path_else(
                repo_path.join("CMakeLists.txt"),
                allow_update,
                |path| generate_file(path, CMAKELISTS_TXT_TEMPLATE, language_name, &generate_opts),
                |path| {
                    let mut contents = fs::read_to_string(path)?;
                    contents = contents
                        .replace("add_custom_target(test", "add_custom_target(ts-test")
                        .replace(
                            &formatdoc! {r#"
                            install(FILES bindings/c/tree-sitter-{language_name}.h
                                    DESTINATION "${{CMAKE_INSTALL_INCLUDEDIR}}/tree_sitter")
                            "#},
                            indoc! {r#"
                            install(DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/bindings/c/tree_sitter"
                                    DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}"
                                    FILES_MATCHING PATTERN "*.h")
                            "#}
                        ).replace(
                            &format!("target_include_directories(tree-sitter-{language_name} PRIVATE src)"),
                            &formatdoc! {"
                            target_include_directories(tree-sitter-{language_name}
                                                       PRIVATE src
                                                       INTERFACE $<BUILD_INTERFACE:${{CMAKE_CURRENT_SOURCE_DIR}}/bindings/c>
                                                                 $<INSTALL_INTERFACE:${{CMAKE_INSTALL_INCLUDEDIR}}>)
                            "}
                        ).replace(
                            indoc! {r#"
                            add_custom_command(OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/src/parser.c"
                                               DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/src/grammar.json"
                                               COMMAND "${TREE_SITTER_CLI}" generate src/grammar.json
                                                        --abi=${TREE_SITTER_ABI_VERSION}
                                               WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
                                               COMMENT "Generating parser.c")
                            "#},
                            indoc! {r#"
                            add_custom_command(OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/src/grammar.json"
                                               DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/grammar.js"
                                               COMMAND "${TREE_SITTER_CLI}" generate grammar.js
                                                        --stage=json
                                               WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
                                               COMMENT "Generating grammar.json")

                            add_custom_command(OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/src/parser.c"
                                               DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/src/grammar.json"
                                               COMMAND "${TREE_SITTER_CLI}" generate src/grammar.json
                                                        --stage=parser --abi=${TREE_SITTER_ABI_VERSION}
                                               WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
                                               COMMENT "Generating parser.c")
                            "#}
                        );
                    write_file(path, contents)?;
                    Ok(())
                },
            )?;

            Ok(())
        })?;
    }

    // Generate Go bindings
    if tree_sitter_config.bindings.go {
        missing_path(bindings_dir.join("go"), create_dir)?.apply(|path| {
            missing_path(path.join("binding.go"), |path| {
                generate_file(path, BINDING_GO_TEMPLATE, language_name, &generate_opts)
            })?;

            missing_path(path.join("binding_test.go"), |path| {
                generate_file(
                    path,
                    BINDING_TEST_GO_TEMPLATE,
                    language_name,
                    &generate_opts,
                )
            })?;

            missing_path(repo_path.join("go.mod"), |path| {
                generate_file(path, GO_MOD_TEMPLATE, language_name, &generate_opts)
            })?;

            Ok(())
        })?;
    }

    // Generate Python bindings
    if tree_sitter_config.bindings.python {
        missing_path(bindings_dir.join("python"), create_dir)?.apply(|path| {
            let lang_path = path.join(format!("tree_sitter_{}", language_name.to_snake_case()));
            missing_path(&lang_path, create_dir)?;

            missing_path_else(
                lang_path.join("binding.c"),
                allow_update,
                |path| generate_file(path, PY_BINDING_C_TEMPLATE, language_name, &generate_opts),
                |path| {
                    let mut contents = fs::read_to_string(path)?;
                    if !contents.contains("PyModuleDef_Init") {
                        contents = contents
                            .replace("PyModule_Create", "PyModuleDef_Init")
                            .replace(
                                "static PyMethodDef methods[] = {\n",
                                indoc! {"
                                static struct PyModuleDef_Slot slots[] = {
                                #ifdef Py_GIL_DISABLED
                                    {Py_mod_gil, Py_MOD_GIL_NOT_USED},
                                #endif
                                    {0, NULL}
                                };

                                static PyMethodDef methods[] = {
                                "},
                            )
                            .replace(
                                indoc! {"
                                .m_size = -1,
                                    .m_methods = methods
                                "},
                                indoc! {"
                                .m_size = 0,
                                    .m_methods = methods,
                                    .m_slots = slots,
                                "},
                            );
                        write_file(path, contents)?;
                    }
                    Ok(())
                },
            )?;

            missing_path(lang_path.join("__init__.py"), |path| {
                generate_file(path, INIT_PY_TEMPLATE, language_name, &generate_opts)
            })?;

            missing_path(lang_path.join("__init__.pyi"), |path| {
                generate_file(path, INIT_PYI_TEMPLATE, language_name, &generate_opts)
            })?;

            missing_path(lang_path.join("py.typed"), |path| {
                generate_file(path, "", language_name, &generate_opts) // py.typed is empty
            })?;

            missing_path(path.join("tests"), create_dir)?.apply(|path| {
                missing_path_else(
                    path.join("test_binding.py"),
                    allow_update,
                    |path| {
                        generate_file(
                            path,
                            TEST_BINDING_PY_TEMPLATE,
                            language_name,
                            &generate_opts,
                        )
                    },
                    |path| {
                        let mut contents = fs::read_to_string(path)?;
                        if !contents.contains("Parser(Language(") {
                            contents = contents
                                .replace("tree_sitter.Language(", "Parser(Language(")
                                .replace(".language())\n", ".language()))\n")
                                .replace(
                                    "import tree_sitter\n",
                                    "from tree_sitter import Language, Parser\n",
                                );
                            write_file(path, contents)?;
                        }
                        Ok(())
                    },
                )?;
                Ok(())
            })?;

            missing_path_else(
                repo_path.join("setup.py"),
                allow_update,
                |path| generate_file(path, SETUP_PY_TEMPLATE, language_name, &generate_opts),
                |path| {
                    let mut contents = fs::read_to_string(path)?;
                    if !contents.contains("egg_info") || !contents.contains("Py_GIL_DISABLED") {
                        eprintln!("Replacing setup.py");
                        generate_file(path, SETUP_PY_TEMPLATE, language_name, &generate_opts)?;
                    } else {
                        contents = contents
                            .replace("path\nfrom platform import system", "name as os_name, path")
                            .replace("system() != \"Windows\"", "os_name != \"nt\"");
                        write_file(path, contents)?;
                    }
                    Ok(())
                },
            )?;

            missing_path_else(
                repo_path.join("pyproject.toml"),
                allow_update,
                |path| {
                    generate_file(
                        path,
                        PYPROJECT_TOML_TEMPLATE,
                        dashed_language_name.as_str(),
                        &generate_opts,
                    )
                },
                |path| {
                    let mut contents = fs::read_to_string(path)?;
                    if !contents.contains("cp310-*") {
                        contents = contents
                            .replace(r#"build = "cp39-*""#, r#"build = "cp310-*""#)
                            .replace(r#"python = ">=3.9""#, r#"python = ">=3.10""#)
                            .replace("tree-sitter~=0.22", "tree-sitter~=0.24");
                        write_file(path, contents)?;
                    }
                    Ok(())
                },
            )?;

            Ok(())
        })?;
    }

    // Generate Swift bindings
    if tree_sitter_config.bindings.swift {
        missing_path(bindings_dir.join("swift"), create_dir)?.apply(|path| {
            let lang_path = path.join(format!("TreeSitter{camel_name}"));
            missing_path(&lang_path, create_dir)?;

            missing_path(lang_path.join(format!("{language_name}.h")), |path| {
                generate_file(path, PARSER_NAME_H_TEMPLATE, language_name, &generate_opts)
            })?;

            missing_path(
                path.join(format!("TreeSitter{camel_name}Tests")),
                create_dir,
            )?
            .apply(|path| {
                missing_path(
                    path.join(format!("TreeSitter{camel_name}Tests.swift")),
                    |path| generate_file(path, TESTS_SWIFT_TEMPLATE, language_name, &generate_opts),
                )?;

                Ok(())
            })?;

            missing_path_else(
                repo_path.join("Package.swift"),
                allow_update,
                |path| generate_file(path, PACKAGE_SWIFT_TEMPLATE, language_name, &generate_opts),
                |path| {
                    let mut contents = fs::read_to_string(path)?;
                    contents = contents
                        .replace(
                            "https://github.com/ChimeHQ/SwiftTreeSitter",
                            "https://github.com/tree-sitter/swift-tree-sitter",
                        )
                        .replace("version: \"0.8.0\")", "version: \"0.9.0\")")
                        .replace("(url:", "(name: \"SwiftTreeSitter\", url:");
                    write_file(path, contents)?;
                    Ok(())
                },
            )?;

            Ok(())
        })?;
    }

    // Generate Zig bindings
    if tree_sitter_config.bindings.zig {
        missing_path(repo_path.join("build.zig"), |path| {
            generate_file(path, BUILD_ZIG_TEMPLATE, language_name, &generate_opts)
        })?;

        missing_path(repo_path.join("build.zig.zon"), |path| {
            generate_file(path, BUILD_ZIG_ZON_TEMPLATE, language_name, &generate_opts)
        })?;

        missing_path(bindings_dir.join("zig"), create_dir)?.apply(|path| {
            missing_path(path.join("root.zig"), |path| {
                generate_file(path, ROOT_ZIG_TEMPLATE, language_name, &generate_opts)
            })?;

            Ok(())
        })?;
    }

    Ok(())
}

pub fn get_root_path(path: &Path) -> Result<PathBuf> {
    let mut pathbuf = path.to_owned();
    let filename = path.file_name().unwrap().to_str().unwrap();
    let is_package_json = filename == "package.json";
    loop {
        let json = pathbuf
            .exists()
            .then(|| {
                let contents = fs::read_to_string(pathbuf.as_path())
                    .with_context(|| format!("Failed to read {filename}"))?;
                if is_package_json {
                    serde_json::from_str::<Map<String, Value>>(&contents)
                        .context(format!("Failed to parse {filename}"))
                        .map(|v| v.contains_key("tree-sitter"))
                } else {
                    serde_json::from_str::<TreeSitterJSON>(&contents)
                        .context(format!("Failed to parse {filename}"))
                        .map(|_| true)
                }
            })
            .transpose()?;
        if json == Some(true) {
            return Ok(pathbuf.parent().unwrap().to_path_buf());
        }
        pathbuf.pop(); // filename
        if !pathbuf.pop() {
            return Err(anyhow!(format!(
                concat!(
                    "Failed to locate a {} file,",
                    " please ensure you have one, and if you don't then consult the docs",
                ),
                filename
            )));
        }
        pathbuf.push(filename);
    }
}

fn generate_file(
    path: &Path,
    template: &str,
    language_name: &str,
    generate_opts: &GenerateOpts,
) -> Result<()> {
    let filename = path.file_name().unwrap().to_str().unwrap();

    let mut replacement = template
        .replace(
            CAMEL_PARSER_NAME_PLACEHOLDER,
            generate_opts.camel_parser_name,
        )
        .replace(
            TITLE_PARSER_NAME_PLACEHOLDER,
            generate_opts.title_parser_name,
        )
        .replace(
            UPPER_PARSER_NAME_PLACEHOLDER,
            &language_name.to_shouty_snake_case(),
        )
        .replace(
            LOWER_PARSER_NAME_PLACEHOLDER,
            &language_name.to_snake_case(),
        )
        .replace(
            KEBAB_PARSER_NAME_PLACEHOLDER,
            &language_name.to_kebab_case(),
        )
        .replace(PARSER_NAME_PLACEHOLDER, language_name)
        .replace(CLI_VERSION_PLACEHOLDER, CLI_VERSION)
        .replace(RUST_BINDING_VERSION_PLACEHOLDER, RUST_BINDING_VERSION)
        .replace(ABI_VERSION_MAX_PLACEHOLDER, &ABI_VERSION_MAX.to_string())
        .replace(
            PARSER_VERSION_PLACEHOLDER,
            &generate_opts.version.to_string(),
        )
        .replace(PARSER_CLASS_NAME_PLACEHOLDER, generate_opts.class_name);

    if let Some(name) = generate_opts.author_name {
        replacement = replacement.replace(AUTHOR_NAME_PLACEHOLDER, name);
    } else {
        match filename {
            "package.json" => {
                replacement = replacement.replace(AUTHOR_NAME_PLACEHOLDER_JS, "");
            }
            "pyproject.toml" => {
                replacement = replacement.replace(AUTHOR_NAME_PLACEHOLDER_PY, "");
            }
            "grammar.js" => {
                replacement = replacement.replace(AUTHOR_NAME_PLACEHOLDER_GRAMMAR, "");
            }
            "Cargo.toml" => {
                replacement = replacement.replace(AUTHOR_NAME_PLACEHOLDER_RS, "");
            }
            _ => {}
        }
    }

    if let Some(email) = generate_opts.author_email {
        replacement = match filename {
            "Cargo.toml" | "grammar.js" => {
                replacement.replace(AUTHOR_EMAIL_PLACEHOLDER, &format!("<{email}>"))
            }
            _ => replacement.replace(AUTHOR_EMAIL_PLACEHOLDER, email),
        }
    } else {
        match filename {
            "package.json" => {
                replacement = replacement.replace(AUTHOR_EMAIL_PLACEHOLDER_JS, "");
            }
            "pyproject.toml" => {
                replacement = replacement.replace(AUTHOR_EMAIL_PLACEHOLDER_PY, "");
            }
            "grammar.js" => {
                replacement = replacement.replace(AUTHOR_EMAIL_PLACEHOLDER_GRAMMAR, "");
            }
            "Cargo.toml" => {
                replacement = replacement.replace(AUTHOR_EMAIL_PLACEHOLDER_RS, "");
            }
            _ => {}
        }
    }

    if filename == "package.json" {
        if let Some(url) = generate_opts.author_url {
            replacement = replacement.replace(AUTHOR_URL_PLACEHOLDER, url);
        } else {
            replacement = replacement.replace(AUTHOR_URL_PLACEHOLDER_JS, "");
        }
    }

    if generate_opts.author_name.is_none()
        && generate_opts.author_email.is_none()
        && generate_opts.author_url.is_none()
        && filename == "package.json"
    {
        if let Some(start_idx) = replacement.find(AUTHOR_BLOCK_JS) {
            if let Some(end_idx) = replacement[start_idx..]
                .find("},")
                .map(|i| i + start_idx + 2)
            {
                replacement.replace_range(start_idx..end_idx, "");
            }
        }
    } else if generate_opts.author_name.is_none() && generate_opts.author_email.is_none() {
        match filename {
            "pyproject.toml" => {
                if let Some(start_idx) = replacement.find(AUTHOR_BLOCK_PY) {
                    if let Some(end_idx) = replacement[start_idx..]
                        .find("}]")
                        .map(|i| i + start_idx + 2)
                    {
                        replacement.replace_range(start_idx..end_idx, "");
                    }
                }
            }
            "grammar.js" => {
                if let Some(start_idx) = replacement.find(AUTHOR_BLOCK_GRAMMAR) {
                    if let Some(end_idx) = replacement[start_idx..]
                        .find(" \n")
                        .map(|i| i + start_idx + 1)
                    {
                        replacement.replace_range(start_idx..end_idx, "");
                    }
                }
            }
            "Cargo.toml" => {
                if let Some(start_idx) = replacement.find(AUTHOR_BLOCK_RS) {
                    if let Some(end_idx) = replacement[start_idx..]
                        .find("\"]")
                        .map(|i| i + start_idx + 2)
                    {
                        replacement.replace_range(start_idx..end_idx, "");
                    }
                }
            }
            _ => {}
        }
    }

    match generate_opts.license {
        Some(license) => replacement = replacement.replace(PARSER_LICENSE_PLACEHOLDER, license),
        _ => replacement = replacement.replace(PARSER_LICENSE_PLACEHOLDER, "MIT"),
    }

    match generate_opts.description {
        Some(description) => {
            replacement = replacement.replace(PARSER_DESCRIPTION_PLACEHOLDER, description);
        }
        _ => {
            replacement = replacement.replace(
                PARSER_DESCRIPTION_PLACEHOLDER,
                &format!(
                    "{} grammar for tree-sitter",
                    generate_opts.camel_parser_name,
                ),
            );
        }
    }

    match generate_opts.repository {
        Some(repository) => {
            replacement = replacement
                .replace(
                    PARSER_URL_STRIPPED_PLACEHOLDER,
                    &repository.replace("https://", "").to_lowercase(),
                )
                .replace(PARSER_URL_PLACEHOLDER, &repository.to_lowercase());
        }
        _ => {
            replacement = replacement
                .replace(
                    PARSER_URL_STRIPPED_PLACEHOLDER,
                    &format!(
                        "github.com/tree-sitter/tree-sitter-{}",
                        language_name.to_lowercase()
                    ),
                )
                .replace(
                    PARSER_URL_PLACEHOLDER,
                    &format!(
                        "https://github.com/tree-sitter/tree-sitter-{}",
                        language_name.to_lowercase()
                    ),
                );
        }
    }

    if let Some(funding_url) = generate_opts.funding {
        match filename {
            "pyproject.toml" | "package.json" => {
                replacement = replacement.replace(FUNDING_URL_PLACEHOLDER, funding_url);
            }
            _ => {}
        }
    } else {
        match filename {
            "package.json" => {
                replacement = replacement.replace("  \"funding\": \"FUNDING_URL\",\n", "");
            }
            "pyproject.toml" => {
                replacement = replacement.replace("Funding = \"FUNDING_URL\"\n", "");
            }
            _ => {}
        }
    }

    write_file(path, replacement)?;
    Ok(())
}

fn create_dir(path: &Path) -> Result<()> {
    fs::create_dir_all(path)
        .with_context(|| format!("Failed to create {:?}", path.to_string_lossy()))
}

#[derive(PartialEq, Eq, Debug)]
enum PathState<P>
where
    P: AsRef<Path>,
{
    Exists(P),
    Missing(P),
}

#[allow(dead_code)]
impl<P> PathState<P>
where
    P: AsRef<Path>,
{
    fn exists(&self, mut action: impl FnMut(&Path) -> Result<()>) -> Result<&Self> {
        if let Self::Exists(path) = self {
            action(path.as_ref())?;
        }
        Ok(self)
    }

    fn missing(&self, mut action: impl FnMut(&Path) -> Result<()>) -> Result<&Self> {
        if let Self::Missing(path) = self {
            action(path.as_ref())?;
        }
        Ok(self)
    }

    fn apply(&self, mut action: impl FnMut(&Path) -> Result<()>) -> Result<&Self> {
        action(self.as_path())?;
        Ok(self)
    }

    fn apply_state(&self, mut action: impl FnMut(&Self) -> Result<()>) -> Result<&Self> {
        action(self)?;
        Ok(self)
    }

    fn as_path(&self) -> &Path {
        match self {
            Self::Exists(path) | Self::Missing(path) => path.as_ref(),
        }
    }
}

fn missing_path<P, F>(path: P, mut action: F) -> Result<PathState<P>>
where
    P: AsRef<Path>,
    F: FnMut(&Path) -> Result<()>,
{
    let path_ref = path.as_ref();
    if !path_ref.exists() {
        action(path_ref)?;
        Ok(PathState::Missing(path))
    } else {
        Ok(PathState::Exists(path))
    }
}

fn missing_path_else<P, T, F>(
    path: P,
    allow_update: bool,
    mut action: T,
    mut else_action: F,
) -> Result<PathState<P>>
where
    P: AsRef<Path>,
    T: FnMut(&Path) -> Result<()>,
    F: FnMut(&Path) -> Result<()>,
{
    let path_ref = path.as_ref();
    if !path_ref.exists() {
        action(path_ref)?;
        Ok(PathState::Missing(path))
    } else {
        if allow_update {
            else_action(path_ref)?;
        }
        Ok(PathState::Exists(path))
    }
}



================================================
FILE: crates/cli/src/input.rs
================================================
use std::{
    fs,
    io::{Read, Write},
    path::{Path, PathBuf},
    sync::{
        atomic::{AtomicUsize, Ordering},
        mpsc, Arc,
    },
};

use anyhow::{anyhow, bail, Context, Result};
use glob::glob;

use crate::test::{parse_tests, TestEntry};

pub enum CliInput {
    Paths(Vec<PathBuf>),
    Test {
        name: String,
        contents: Vec<u8>,
        languages: Vec<Box<str>>,
    },
    Stdin(Vec<u8>),
}

pub fn get_input(
    paths_file: Option<&Path>,
    paths: Option<Vec<PathBuf>>,
    test_number: Option<u32>,
    cancellation_flag: &Arc<AtomicUsize>,
) -> Result<CliInput> {
    if let Some(paths_file) = paths_file {
        return Ok(CliInput::Paths(
            fs::read_to_string(paths_file)
                .with_context(|| format!("Failed to read paths file {}", paths_file.display()))?
                .trim()
                .lines()
                .map(PathBuf::from)
                .collect::<Vec<_>>(),
        ));
    }

    if let Some(test_number) = test_number {
        let current_dir = std::env::current_dir().unwrap();
        let test_dir = current_dir.join("test").join("corpus");

        if !test_dir.exists() {
            return Err(anyhow!(
                "Test corpus directory not found in current directory, see https://tree-sitter.github.io/tree-sitter/creating-parsers/5-writing-tests"
            ));
        }

        let test_entry = parse_tests(&test_dir)?;
        let mut test_num = 0;
        let Some((name, contents, languages)) =
            get_test_info(&test_entry, test_number.max(1) - 1, &mut test_num)
        else {
            return Err(anyhow!("Failed to fetch contents of test #{test_number}"));
        };

        return Ok(CliInput::Test {
            name,
            contents,
            languages,
        });
    }

    if let Some(paths) = paths {
        let mut result = Vec::new();

        let mut incorporate_path = |path: PathBuf, positive| {
            if positive {
                result.push(path);
            } else if let Some(index) = result.iter().position(|p| *p == path) {
                result.remove(index);
            }
        };

        for mut path in paths {
            let mut positive = true;
            if path.starts_with("!") {
                positive = false;
                path = path.strip_prefix("!").unwrap().to_path_buf();
            }

            if path.exists() {
                incorporate_path(path, positive);
            } else {
                let Some(path_str) = path.to_str() else {
                    bail!("Invalid path: {}", path.display());
                };
                let paths = glob(path_str)
                    .with_context(|| format!("Invalid glob pattern {}", path.display()))?;
                for path in paths {
                    incorporate_path(path?, positive);
                }
            }
        }

        if result.is_empty() {
            return Err(anyhow!(
                "No files were found at or matched by the provided pathname/glob"
            ));
        }

        return Ok(CliInput::Paths(result));
    }

    let reader_flag = cancellation_flag.clone();
    let (tx, rx) = mpsc::channel();

    // Spawn a thread to read from stdin, until ctrl-c or EOF is received
    std::thread::spawn(move || {
        let mut input = Vec::new();
        let stdin = std::io::stdin();
        let mut handle = stdin.lock();

        // Read in chunks, so we can check the ctrl-c flag
        loop {
            if reader_flag.load(Ordering::Relaxed) == 1 {
                break;
            }
            let mut buffer = [0; 1024];
            match handle.read(&mut buffer) {
                Ok(0) | Err(_) => break,
                Ok(n) => input.extend_from_slice(&buffer[..n]),
            }
        }

        // Signal to the main thread that we're done
        tx.send(input).ok();
    });

    loop {
        // If we've received a ctrl-c signal, exit
        if cancellation_flag.load(Ordering::Relaxed) == 1 {
            bail!("\n");
        }

        // If we're done receiving input from stdin, return it
        if let Ok(input) = rx.try_recv() {
            return Ok(CliInput::Stdin(input));
        }

        std::thread::sleep(std::time::Duration::from_millis(50));
    }
}

#[allow(clippy::type_complexity)]
pub fn get_test_info(
    test_entry: &TestEntry,
    target_test: u32,
    test_num: &mut u32,
) -> Option<(String, Vec<u8>, Vec<Box<str>>)> {
    match test_entry {
        TestEntry::Example {
            name,
            input,
            attributes,
            ..
        } => {
            if *test_num == target_test {
                return Some((name.clone(), input.clone(), attributes.languages.clone()));
            }
            *test_num += 1;
        }
        TestEntry::Group { children, .. } => {
            for child in children {
                if let Some((name, input, languages)) = get_test_info(child, target_test, test_num)
                {
                    return Some((name, input, languages));
                }
            }
        }
    }

    None
}

/// Writes `contents` to a temporary file and returns the path to that file.
pub fn get_tmp_source_file(contents: &[u8]) -> Result<PathBuf> {
    let parse_path = std::env::temp_dir().join(".tree-sitter-temp");
    let mut parse_file = std::fs::File::create(&parse_path)?;
    parse_file.write_all(contents)?;

    Ok(parse_path)
}



================================================
FILE: crates/cli/src/logger.rs
================================================
use log::{LevelFilter, Log, Metadata, Record};

#[allow(dead_code)]
struct Logger {
    pub filter: Option<String>,
}

impl Log for Logger {
    fn enabled(&self, _: &Metadata) -> bool {
        true
    }

    fn log(&self, record: &Record) {
        eprintln!(
            "[{}] {}",
            record
                .module_path()
                .unwrap_or_default()
                .trim_start_matches("rust_tree_sitter_cli::"),
            record.args()
        );
    }

    fn flush(&self) {}
}

pub fn init() {
    log::set_boxed_logger(Box::new(Logger { filter: None })).unwrap();
    log::set_max_level(LevelFilter::Info);
}



================================================
FILE: crates/cli/src/parse.rs
================================================
use std::{
    fmt, fs,
    io::{self, StdoutLock, Write},
    path::{Path, PathBuf},
    sync::atomic::{AtomicUsize, Ordering},
    time::{Duration, Instant},
};

use anstyle::{AnsiColor, Color, RgbColor};
use anyhow::{anyhow, Context, Result};
use clap::ValueEnum;
use serde::{Deserialize, Serialize};
use tree_sitter::{
    ffi, InputEdit, Language, LogType, ParseOptions, ParseState, Parser, Point, Range, Tree,
    TreeCursor,
};

use super::util;
use crate::{fuzz::edits::Edit, test::paint};

#[derive(Debug, Default, Serialize)]
pub struct Stats {
    pub successful_parses: usize,
    pub total_parses: usize,
    pub total_bytes: usize,
    pub total_duration: Duration,
}

impl fmt::Display for Stats {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        let duration_us = self.total_duration.as_micros();
        let success_rate = if self.total_parses > 0 {
            format!(
                "{:.2}%",
                ((self.successful_parses as f64) / (self.total_parses as f64)) * 100.0,
            )
        } else {
            "N/A".to_string()
        };
        let duration_str = match (self.total_parses, duration_us) {
            (0, _) => "N/A".to_string(),
            (_, 0) => "0 bytes/ms".to_string(),
            (_, _) => format!(
                "{} bytes/ms",
                ((self.total_bytes as u128) * 1_000) / duration_us
            ),
        };
        writeln!(
            f,
            "Total parses: {}; successful parses: {}; failed parses: {}; success percentage: {success_rate}; average speed: {duration_str}",
            self.total_parses,
            self.successful_parses,
            self.total_parses - self.successful_parses,
        )
    }
}

/// Sets the color used in the output of `tree-sitter parse --cst`
#[derive(Debug, Copy, Clone)]
pub struct ParseTheme {
    /// The color of node kinds
    pub node_kind: Option<Color>,
    /// The color of text associated with a node
    pub node_text: Option<Color>,
    /// The color of node fields
    pub field: Option<Color>,
    /// The color of the range information for unnamed nodes
    pub row_color: Option<Color>,
    /// The color of the range information for named nodes
    pub row_color_named: Option<Color>,
    /// The color of extra nodes
    pub extra: Option<Color>,
    /// The color of ERROR nodes
    pub error: Option<Color>,
    /// The color of MISSING nodes and their associated text
    pub missing: Option<Color>,
    /// The color of newline characters
    pub line_feed: Option<Color>,
    /// The color of backticks
    pub backtick: Option<Color>,
    /// The color of literals
    pub literal: Option<Color>,
}

impl ParseTheme {
    const GRAY: Color = Color::Rgb(RgbColor(118, 118, 118));
    const LIGHT_GRAY: Color = Color::Rgb(RgbColor(166, 172, 181));
    const ORANGE: Color = Color::Rgb(RgbColor(255, 153, 51));
    const YELLOW: Color = Color::Rgb(RgbColor(219, 219, 173));
    const GREEN: Color = Color::Rgb(RgbColor(101, 192, 67));

    #[must_use]
    pub const fn empty() -> Self {
        Self {
            node_kind: None,
            node_text: None,
            field: None,
            row_color: None,
            row_color_named: None,
            extra: None,
            error: None,
            missing: None,
            line_feed: None,
            backtick: None,
            literal: None,
        }
    }
}

impl Default for ParseTheme {
    fn default() -> Self {
        Self {
            node_kind: Some(AnsiColor::BrightCyan.into()),
            node_text: Some(Self::GRAY),
            field: Some(AnsiColor::Blue.into()),
            row_color: Some(AnsiColor::White.into()),
            row_color_named: Some(AnsiColor::BrightCyan.into()),
            extra: Some(AnsiColor::BrightMagenta.into()),
            error: Some(AnsiColor::Red.into()),
            missing: Some(Self::ORANGE),
            line_feed: Some(Self::LIGHT_GRAY),
            backtick: Some(Self::GREEN),
            literal: Some(Self::YELLOW),
        }
    }
}

#[derive(Debug, Copy, Clone, Deserialize, Serialize)]
pub struct Rgb(pub u8, pub u8, pub u8);

impl From<Rgb> for RgbColor {
    fn from(val: Rgb) -> Self {
        Self(val.0, val.1, val.2)
    }
}

#[derive(Debug, Copy, Clone, Default, Deserialize, Serialize)]
#[serde(rename_all = "kebab-case")]
pub struct Config {
    pub parse_theme: Option<ParseThemeRaw>,
}

#[derive(Debug, Copy, Clone, Default, Deserialize, Serialize)]
#[serde(rename_all = "kebab-case")]
pub struct ParseThemeRaw {
    pub node_kind: Option<Rgb>,
    pub node_text: Option<Rgb>,
    pub field: Option<Rgb>,
    pub row_color: Option<Rgb>,
    pub row_color_named: Option<Rgb>,
    pub extra: Option<Rgb>,
    pub error: Option<Rgb>,
    pub missing: Option<Rgb>,
    pub line_feed: Option<Rgb>,
    pub backtick: Option<Rgb>,
    pub literal: Option<Rgb>,
}

impl From<ParseThemeRaw> for ParseTheme {
    fn from(value: ParseThemeRaw) -> Self {
        let val_or_default = |val: Option<Rgb>, default: Option<Color>| -> Option<Color> {
            val.map_or(default, |v| Some(Color::Rgb(v.into())))
        };
        let default = Self::default();

        Self {
            node_kind: val_or_default(value.node_kind, default.node_kind),
            node_text: val_or_default(value.node_text, default.node_text),
            field: val_or_default(value.field, default.field),
            row_color: val_or_default(value.row_color, default.row_color),
            row_color_named: val_or_default(value.row_color_named, default.row_color_named),
            extra: val_or_default(value.extra, default.extra),
            error: val_or_default(value.error, default.error),
            missing: val_or_default(value.missing, default.missing),
            line_feed: val_or_default(value.line_feed, default.line_feed),
            backtick: val_or_default(value.backtick, default.backtick),
            literal: val_or_default(value.literal, default.literal),
        }
    }
}

#[derive(Copy, Clone, PartialEq, Eq)]
pub enum ParseOutput {
    Normal,
    Quiet,
    Xml,
    Cst,
    Dot,
}

/// A position in a multi-line text document, in terms of rows and columns.
///
/// Rows and columns are zero-based.
///
/// This serves as a serializable wrapper for `Point`
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize)]
pub struct ParsePoint {
    pub row: usize,
    pub column: usize,
}

impl From<Point> for ParsePoint {
    fn from(value: Point) -> Self {
        Self {
            row: value.row,
            column: value.column,
        }
    }
}

#[derive(Serialize, Default, Debug, Clone)]
pub struct ParseSummary {
    pub file: PathBuf,
    pub successful: bool,
    pub start: Option<ParsePoint>,
    pub end: Option<ParsePoint>,
    pub duration: Option<Duration>,
    pub bytes: Option<usize>,
}

impl ParseSummary {
    #[must_use]
    pub fn new(path: &Path) -> Self {
        Self {
            file: path.to_path_buf(),
            successful: false,
            ..Default::default()
        }
    }
}

#[derive(Serialize, Debug, Default)]
pub struct ParseStats {
    pub parse_summaries: Vec<ParseSummary>,
    pub cumulative_stats: Stats,
}

#[derive(Serialize, ValueEnum, Debug, Copy, Clone, Default, Eq, PartialEq)]
pub enum ParseDebugType {
    #[default]
    Quiet,
    Normal,
    Pretty,
}

pub struct ParseFileOptions<'a> {
    pub edits: &'a [&'a str],
    pub output: ParseOutput,
    pub stats: &'a mut ParseStats,
    pub print_time: bool,
    pub timeout: u64,
    pub debug: ParseDebugType,
    pub debug_graph: bool,
    pub cancellation_flag: Option<&'a AtomicUsize>,
    pub encoding: Option<u32>,
    pub open_log: bool,
    pub no_ranges: bool,
    pub parse_theme: &'a ParseTheme,
}

#[derive(Copy, Clone)]
pub struct ParseResult {
    pub successful: bool,
    pub bytes: usize,
    pub duration: Option<Duration>,
}

pub fn parse_file_at_path(
    parser: &mut Parser,
    language: &Language,
    path: &Path,
    name: &str,
    max_path_length: usize,
    opts: &mut ParseFileOptions,
) -> Result<()> {
    let mut _log_session = None;
    parser.set_language(language)?;
    let mut source_code = fs::read(path).with_context(|| format!("Error reading {name:?}"))?;

    // Render an HTML graph if `--debug-graph` was passed
    if opts.debug_graph {
        _log_session = Some(util::log_graphs(parser, "log.html", opts.open_log)?);
    }
    // Log to stderr if `--debug` was passed
    else if opts.debug != ParseDebugType::Quiet {
        let mut curr_version: usize = 0;
        let use_color = std::env::var("NO_COLOR").map_or(true, |v| v != "1");
        let debug = opts.debug;
        parser.set_logger(Some(Box::new(move |log_type, message| {
            if debug == ParseDebugType::Normal {
                if log_type == LogType::Lex {
                    write!(&mut io::stderr(), "  ").unwrap();
                }
                writeln!(&mut io::stderr(), "{message}").unwrap();
            } else {
                let colors = &[
                    AnsiColor::White,
                    AnsiColor::Red,
                    AnsiColor::Blue,
                    AnsiColor::Green,
                    AnsiColor::Cyan,
                    AnsiColor::Yellow,
                ];
                if message.starts_with("process version:") {
                    let comma_idx = message.find(',').unwrap();
                    curr_version = message["process version:".len()..comma_idx]
                        .parse()
                        .unwrap();
                }
                let color = if use_color {
                    Some(colors[curr_version])
                } else {
                    None
                };
                let mut out = if log_type == LogType::Lex {
                    "  ".to_string()
                } else {
                    String::new()
                };
                out += &paint(color, message);
                writeln!(&mut io::stderr(), "{out}").unwrap();
            }
        })));
    }

    let parse_time = Instant::now();

    #[inline(always)]
    fn is_utf16_le_bom(bom_bytes: &[u8]) -> bool {
        bom_bytes == [0xFF, 0xFE]
    }

    #[inline(always)]
    fn is_utf16_be_bom(bom_bytes: &[u8]) -> bool {
        bom_bytes == [0xFE, 0xFF]
    }

    let encoding = match opts.encoding {
        None if source_code.len() >= 2 => {
            if is_utf16_le_bom(&source_code[0..2]) {
                Some(ffi::TSInputEncodingUTF16LE)
            } else if is_utf16_be_bom(&source_code[0..2]) {
                Some(ffi::TSInputEncodingUTF16BE)
            } else {
                None
            }
        }
        _ => opts.encoding,
    };

    // If the `--cancel` flag was passed, then cancel the parse
    // when the user types a newline.
    //
    // Additionally, if the `--time` flag was passed, end the parse
    // after the specified number of microseconds.
    let start_time = Instant::now();
    let progress_callback = &mut |_: &ParseState| {
        if let Some(cancellation_flag) = opts.cancellation_flag {
            if cancellation_flag.load(Ordering::SeqCst) != 0 {
                return true;
            }
        }

        if opts.timeout > 0 && start_time.elapsed().as_micros() > opts.timeout as u128 {
            return true;
        }

        false
    };

    let parse_opts = ParseOptions::new().progress_callback(progress_callback);

    let tree = match encoding {
        Some(encoding) if encoding == ffi::TSInputEncodingUTF16LE => {
            let source_code_utf16 = source_code
                .chunks_exact(2)
                .map(|chunk| u16::from_le_bytes([chunk[0], chunk[1]]))
                .collect::<Vec<_>>();
            parser.parse_utf16_le_with_options(
                &mut |i, _| {
                    if i < source_code_utf16.len() {
                        &source_code_utf16[i..]
                    } else {
                        &[]
                    }
                },
                None,
                Some(parse_opts),
            )
        }
        Some(encoding) if encoding == ffi::TSInputEncodingUTF16BE => {
            let source_code_utf16 = source_code
                .chunks_exact(2)
                .map(|chunk| u16::from_be_bytes([chunk[0], chunk[1]]))
                .collect::<Vec<_>>();
            parser.parse_utf16_be_with_options(
                &mut |i, _| {
                    if i < source_code_utf16.len() {
                        &source_code_utf16[i..]
                    } else {
                        &[]
                    }
                },
                None,
                Some(parse_opts),
            )
        }
        _ => parser.parse_with_options(
            &mut |i, _| {
                if i < source_code.len() {
                    &source_code[i..]
                } else {
                    &[]
                }
            },
            None,
            Some(parse_opts),
        ),
    };
    let parse_duration = parse_time.elapsed();

    let stdout = io::stdout();
    let mut stdout = stdout.lock();

    if let Some(mut tree) = tree {
        if opts.debug_graph && !opts.edits.is_empty() {
            println!("BEFORE:\n{}", String::from_utf8_lossy(&source_code));
        }

        let edit_time = Instant::now();
        for (i, edit) in opts.edits.iter().enumerate() {
            let edit = parse_edit_flag(&source_code, edit)?;
            perform_edit(&mut tree, &mut source_code, &edit)?;
            tree = parser.parse(&source_code, Some(&tree)).unwrap();

            if opts.debug_graph {
                println!("AFTER {i}:\n{}", String::from_utf8_lossy(&source_code));
            }
        }
        let edit_duration = edit_time.elapsed();

        parser.stop_printing_dot_graphs();

        let parse_duration_ms = parse_duration.as_micros() as f64 / 1e3;
        let edit_duration_ms = edit_duration.as_micros() as f64 / 1e3;
        let mut cursor = tree.walk();

        if opts.output == ParseOutput::Normal {
            let mut needs_newline = false;
            let mut indent_level = 0;
            let mut did_visit_children = false;
            loop {
                let node = cursor.node();
                let is_named = node.is_named();
                if did_visit_children {
                    if is_named {
                        stdout.write_all(b")")?;
                        needs_newline = true;
                    }
                    if cursor.goto_next_sibling() {
                        did_visit_children = false;
                    } else if cursor.goto_parent() {
                        did_visit_children = true;
                        indent_level -= 1;
                    } else {
                        break;
                    }
                } else {
                    if is_named {
                        if needs_newline {
                            stdout.write_all(b"\n")?;
                        }
                        for _ in 0..indent_level {
                            stdout.write_all(b"  ")?;
                        }
                        let start = node.start_position();
                        let end = node.end_position();
                        if let Some(field_name) = cursor.field_name() {
                            write!(&mut stdout, "{field_name}: ")?;
                        }
                        write!(&mut stdout, "({}", node.kind())?;
                        if !opts.no_ranges {
                            write!(
                                &mut stdout,
                                " [{}, {}] - [{}, {}]",
                                start.row, start.column, end.row, end.column
                            )?;
                        }
                        needs_newline = true;
                    }
                    if cursor.goto_first_child() {
                        did_visit_children = false;
                        indent_level += 1;
                    } else {
                        did_visit_children = true;
                    }
                }
            }
            cursor.reset(tree.root_node());
            println!();
        }

        if opts.output == ParseOutput::Cst {
            let lossy_source_code = String::from_utf8_lossy(&source_code);
            let total_width = lossy_source_code
                .lines()
                .enumerate()
                .map(|(row, col)| {
                    (row as f64).log10() as usize + (col.len() as f64).log10() as usize + 1
                })
                .max()
                .unwrap_or(1);
            let mut indent_level = 1;
            let mut did_visit_children = false;
            let mut in_error = false;
            loop {
                if did_visit_children {
                    if cursor.goto_next_sibling() {
                        did_visit_children = false;
                    } else if cursor.goto_parent() {
                        did_visit_children = true;
                        indent_level -= 1;
                        if !cursor.node().has_error() {
                            in_error = false;
                        }
                    } else {
                        break;
                    }
                } else {
                    cst_render_node(
                        opts,
                        &mut cursor,
                        &source_code,
                        &mut stdout,
                        total_width,
                        indent_level,
                        in_error,
                    )?;
                    if cursor.goto_first_child() {
                        did_visit_children = false;
                        indent_level += 1;
                        if cursor.node().has_error() {
                            in_error = true;
                        }
                    } else {
                        did_visit_children = true;
                    }
                }
            }
            cursor.reset(tree.root_node());
            println!();
        }

        if opts.output == ParseOutput::Xml {
            let mut needs_newline = false;
            let mut indent_level = 0;
            let mut did_visit_children = false;
            let mut had_named_children = false;
            let mut tags = Vec::<&str>::new();
            writeln!(&mut stdout, "<?xml version=\"1.0\"?>")?;
            loop {
                let node = cursor.node();
                let is_named = node.is_named();
                if did_visit_children {
                    if is_named {
                        let tag = tags.pop();
                        if had_named_children {
                            for _ in 0..indent_level {
                                stdout.write_all(b"  ")?;
                            }
                        }
                        write!(&mut stdout, "</{}>", tag.expect("there is a tag"))?;
                        // we only write a line in the case where it's the last sibling
                        if let Some(parent) = node.parent() {
                            if parent.child(parent.child_count() as u32 - 1).unwrap() == node {
                                stdout.write_all(b"\n")?;
                            }
                        }
                        needs_newline = true;
                    }
                    if cursor.goto_next_sibling() {
                        did_visit_children = false;
                        had_named_children = false;
                    } else if cursor.goto_parent() {
                        did_visit_children = true;
                        had_named_children = is_named;
                        indent_level -= 1;
                        if !is_named && needs_newline {
                            stdout.write_all(b"\n")?;
                            for _ in 0..indent_level {
                                stdout.write_all(b"  ")?;
                            }
                        }
                    } else {
                        break;
                    }
                } else {
                    if is_named {
                        if needs_newline {
                            stdout.write_all(b"\n")?;
                        }
                        for _ in 0..indent_level {
                            stdout.write_all(b"  ")?;
                        }
                        write!(&mut stdout, "<{}", node.kind())?;
                        if let Some(field_name) = cursor.field_name() {
                            write!(&mut stdout, " field=\"{field_name}\"")?;
                        }
                        let start = node.start_position();
                        let end = node.end_position();
                        write!(&mut stdout, " srow=\"{}\"", start.row)?;
                        write!(&mut stdout, " scol=\"{}\"", start.column)?;
                        write!(&mut stdout, " erow=\"{}\"", end.row)?;
                        write!(&mut stdout, " ecol=\"{}\"", end.column)?;
                        write!(&mut stdout, ">")?;
                        tags.push(node.kind());
                        needs_newline = true;
                    }
                    if cursor.goto_first_child() {
                        did_visit_children = false;
                        had_named_children = false;
                        indent_level += 1;
                    } else {
                        did_visit_children = true;
                        let start = node.start_byte();
                        let end = node.end_byte();
                        let value =
                            std::str::from_utf8(&source_code[start..end]).expect("has a string");
                        if !is_named && needs_newline {
                            stdout.write_all(b"\n")?;
                            for _ in 0..indent_level {
                                stdout.write_all(b"  ")?;
                            }
                        }
                        write!(&mut stdout, "{}", html_escape::encode_text(value))?;
                    }
                }
            }
            cursor.reset(tree.root_node());
            println!();
        }

        if opts.output == ParseOutput::Dot {
            util::print_tree_graph(&tree, "log.html", opts.open_log).unwrap();
        }

        let mut first_error = None;
        let mut earliest_node_with_error = None;
        'outer: loop {
            let node = cursor.node();
            if node.has_error() {
                if earliest_node_with_error.is_none() {
                    earliest_node_with_error = Some(node);
                }
                if node.is_error() || node.is_missing() {
                    first_error = Some(node);
                    break;
                }

                // If there's no more children, even though some outer node has an error,
                // then that means that the first error is hidden, but the later error could be
                // visible. So, we walk back up to the child of the first node with an error,
                // and then check its siblings for errors.
                if !cursor.goto_first_child() {
                    let earliest = earliest_node_with_error.unwrap();
                    while cursor.goto_parent() {
                        if cursor.node().parent().is_some_and(|p| p == earliest) {
                            while cursor.goto_next_sibling() {
                                let sibling = cursor.node();
                                if sibling.is_error() || sibling.is_missing() {
                                    first_error = Some(sibling);
                                    break 'outer;
                                }
                                if sibling.has_error() && cursor.goto_first_child() {
                                    continue 'outer;
                                }
                            }
                            break;
                        }
                    }
                    break;
                }
            } else if !cursor.goto_next_sibling() {
                break;
            }
        }

        if first_error.is_some() || opts.print_time {
            let path = path.to_string_lossy();
            write!(
                &mut stdout,
                "{:width$}\tParse: {parse_duration_ms:>7.2} ms\t{:>6} bytes/ms",
                name,
                (source_code.len() as u128 * 1_000_000) / parse_duration.as_nanos(),
                width = max_path_length
            )?;
            if let Some(node) = first_error {
                let start = node.start_position();
                let end = node.end_position();
                let mut node_text = String::new();
                for c in node.kind().chars() {
                    if let Some(escaped) = escape_invisible(c) {
                        node_text += escaped;
                    } else {
                        node_text.push(c);
                    }
                }
                write!(&mut stdout, "\t(")?;
                if node.is_missing() {
                    if node.is_named() {
                        write!(&mut stdout, "MISSING {node_text}")?;
                    } else {
                        write!(&mut stdout, "MISSING \"{node_text}\"")?;
                    }
                } else {
                    write!(&mut stdout, "{node_text}")?;
                }
                write!(
                    &mut stdout,
                    " [{}, {}] - [{}, {}])",
                    start.row, start.column, end.row, end.column
                )?;
            }
            if !opts.edits.is_empty() {
                write!(
                    &mut stdout,
                    "\n{:width$}\tEdit:  {edit_duration_ms:>7.2} ms",
                    " ".repeat(path.len()),
                    width = max_path_length,
                )?;
            }
            writeln!(&mut stdout)?;
        }

        opts.stats.parse_summaries.push(ParseSummary {
            file: path.to_path_buf(),
            successful: first_error.is_none(),
            start: Some(tree.root_node().start_position().into()),
            end: Some(tree.root_node().end_position().into()),
            duration: Some(parse_duration),
            bytes: Some(source_code.len()),
        });

        return Ok(());
    }
    parser.stop_printing_dot_graphs();

    if opts.print_time {
        let duration = parse_time.elapsed();
        let duration_ms = duration.as_micros() as f64 / 1e3;
        writeln!(
            &mut stdout,
            "{:width$}\tParse: {duration_ms:>7.2} ms\t(timed out)",
            path.to_str().unwrap(),
            width = max_path_length
        )?;
    }

    opts.stats.parse_summaries.push(ParseSummary {
        file: path.to_path_buf(),
        successful: false,
        start: None,
        end: None,
        duration: None,
        bytes: Some(source_code.len()),
    });

    Ok(())
}

const fn escape_invisible(c: char) -> Option<&'static str> {
    Some(match c {
        '\n' => "\\n",
        '\r' => "\\r",
        '\t' => "\\t",
        '\0' => "\\0",
        '\\' => "\\\\",
        '\x0b' => "\\v",
        '\x0c' => "\\f",
        _ => return None,
    })
}

fn render_node_text(source: &str) -> String {
    source
        .chars()
        .fold(String::with_capacity(source.len()), |mut acc, c| {
            if let Some(esc) = escape_invisible(c) {
                acc.push_str(esc);
            } else {
                acc.push(c);
            }
            acc
        })
}

fn write_node_text(
    opts: &ParseFileOptions,
    stdout: &mut StdoutLock<'static>,
    cursor: &TreeCursor,
    is_named: bool,
    source: &str,
    color: Option<impl Into<Color> + Copy>,
    text_info: (usize, usize),
) -> Result<()> {
    let (total_width, indent_level) = text_info;
    let (quote, quote_color) = if is_named {
        ('`', opts.parse_theme.backtick)
    } else {
        ('\"', color.map(|c| c.into()))
    };

    if !is_named {
        write!(
            stdout,
            "{}{}{}",
            paint(quote_color, &String::from(quote)),
            paint(color, &render_node_text(source)),
            paint(quote_color, &String::from(quote)),
        )?;
    } else {
        let multiline = source.contains('\n');
        for (i, line) in source.split_inclusive('\n').enumerate() {
            if line.is_empty() {
                break;
            }
            let mut node_range = cursor.node().range();
            // For each line of text, adjust the row by shifting it down `i` rows,
            // and adjust the column by setting it to the length of *this* line.
            node_range.start_point.row += i;
            node_range.end_point.row = node_range.start_point.row;
            node_range.end_point.column = line.len()
                + if i == 0 {
                    node_range.start_point.column
                } else {
                    0
                };
            let formatted_line = render_line_feed(line, opts);
            if !opts.no_ranges {
                write!(
                    stdout,
                    "{}{}{}{}{}{}",
                    if multiline { "\n" } else { "" },
                    if multiline {
                        render_node_range(opts, cursor, is_named, true, total_width, node_range)
                    } else {
                        String::new()
                    },
                    if multiline {
                        "  ".repeat(indent_level + 1)
                    } else {
                        String::new()
                    },
                    paint(quote_color, &String::from(quote)),
                    &paint(color, &render_node_text(&formatted_line)),
                    paint(quote_color, &String::from(quote)),
                )?;
            } else {
                write!(
                    stdout,
                    "\n{}{}{}{}",
                    "  ".repeat(indent_level + 1),
                    paint(quote_color, &String::from(quote)),
                    &paint(color, &render_node_text(&formatted_line)),
                    paint(quote_color, &String::from(quote)),
                )?;
            }
        }
    }

    Ok(())
}

fn render_line_feed(source: &str, opts: &ParseFileOptions) -> String {
    if cfg!(windows) {
        source.replace("\r\n", &paint(opts.parse_theme.line_feed, "\r\n"))
    } else {
        source.replace('\n', &paint(opts.parse_theme.line_feed, "\n"))
    }
}

fn render_node_range(
    opts: &ParseFileOptions,
    cursor: &TreeCursor,
    is_named: bool,
    is_multiline: bool,
    total_width: usize,
    range: Range,
) -> String {
    let has_field_name = cursor.field_name().is_some();
    let range_color = if is_named && !is_multiline && !has_field_name {
        opts.parse_theme.row_color_named
    } else {
        opts.parse_theme.row_color
    };

    let remaining_width_start = (total_width
        - (range.start_point.row as f64).log10() as usize
        - (range.start_point.column as f64).log10() as usize)
        .max(1);
    let remaining_width_end = (total_width
        - (range.end_point.row as f64).log10() as usize
        - (range.end_point.column as f64).log10() as usize)
        .max(1);
    paint(
        range_color,
        &format!(
            "{}:{}{:remaining_width_start$}- {}:{}{:remaining_width_end$}",
            range.start_point.row,
            range.start_point.column,
            ' ',
            range.end_point.row,
            range.end_point.column,
            ' ',
        ),
    )
}

fn cst_render_node(
    opts: &ParseFileOptions,
    cursor: &mut TreeCursor,
    source_code: &[u8],
    stdout: &mut StdoutLock<'static>,
    total_width: usize,
    indent_level: usize,
    in_error: bool,
) -> Result<()> {
    let node = cursor.node();
    let is_named = node.is_named();
    if !opts.no_ranges {
        write!(
            stdout,
            "{}",
            render_node_range(opts, cursor, is_named, false, total_width, node.range())
        )?;
    }
    write!(
        stdout,
        "{}{}",
        "  ".repeat(indent_level),
        if in_error && !node.has_error() {
            " "
        } else {
            ""
        }
    )?;
    if is_named {
        if let Some(field_name) = cursor.field_name() {
            write!(
                stdout,
                "{}",
                paint(opts.parse_theme.field, &format!("{field_name}: "))
            )?;
        }

        if node.has_error() || node.is_error() {
            write!(stdout, "{}", paint(opts.parse_theme.error, "•"))?;
        }

        let kind_color = if node.is_error() {
            opts.parse_theme.error
        } else if node.is_extra() || node.parent().is_some_and(|p| p.is_extra() && !p.is_error()) {
            opts.parse_theme.extra
        } else {
            opts.parse_theme.node_kind
        };
        write!(stdout, "{} ", paint(kind_color, node.kind()))?;

        if node.child_count() == 0 {
            // Node text from a pattern or external scanner
            write_node_text(
                opts,
                stdout,
                cursor,
                is_named,
                &String::from_utf8_lossy(&source_code[node.start_byte()..node.end_byte()]),
                opts.parse_theme.node_text,
                (total_width, indent_level),
            )?;
        }
    } else if node.is_missing() {
        write!(stdout, "{}: ", paint(opts.parse_theme.missing, "MISSING"))?;
        write!(
            stdout,
            "\"{}\"",
            paint(opts.parse_theme.missing, node.kind())
        )?;
    } else {
        // Terminal literals, like "fn"
        write_node_text(
            opts,
            stdout,
            cursor,
            is_named,
            node.kind(),
            opts.parse_theme.literal,
            (total_width, indent_level),
        )?;
    }
    writeln!(stdout)?;

    Ok(())
}

pub fn perform_edit(tree: &mut Tree, input: &mut Vec<u8>, edit: &Edit) -> Result<InputEdit> {
    let start_byte = edit.position;
    let old_end_byte = edit.position + edit.deleted_length;
    let new_end_byte = edit.position + edit.inserted_text.len();
    let start_position = position_for_offset(input, start_byte)?;
    let old_end_position = position_for_offset(input, old_end_byte)?;
    input.splice(start_byte..old_end_byte, edit.inserted_text.iter().copied());
    let new_end_position = position_for_offset(input, new_end_byte)?;
    let edit = InputEdit {
        start_byte,
        old_end_byte,
        new_end_byte,
        start_position,
        old_end_position,
        new_end_position,
    };
    tree.edit(&edit);
    Ok(edit)
}

fn parse_edit_flag(source_code: &[u8], flag: &str) -> Result<Edit> {
    let error = || {
        anyhow!(concat!(
            "Invalid edit string '{}'. ",
            "Edit strings must match the pattern '<START_BYTE_OR_POSITION> <REMOVED_LENGTH> <NEW_TEXT>'"
        ), flag)
    };

    // Three whitespace-separated parts:
    // * edit position
    // * deleted length
    // * inserted text
    let mut parts = flag.split(' ');
    let position = parts.next().ok_or_else(error)?;
    let deleted_length = parts.next().ok_or_else(error)?;
    let inserted_text = parts.collect::<Vec<_>>().join(" ").into_bytes();

    // Position can either be a byte_offset or row,column pair, separated by a comma
    let position = if position == "$" {
        source_code.len()
    } else if position.contains(',') {
        let mut parts = position.split(',');
        let row = parts.next().ok_or_else(error)?;
        let row = row.parse::<usize>().map_err(|_| error())?;
        let column = parts.next().ok_or_else(error)?;
        let column = column.parse::<usize>().map_err(|_| error())?;
        offset_for_position(source_code, Point { row, column })?
    } else {
        position.parse::<usize>().map_err(|_| error())?
    };

    // Deleted length must be a byte count.
    let deleted_length = deleted_length.parse::<usize>().map_err(|_| error())?;

    Ok(Edit {
        position,
        deleted_length,
        inserted_text,
    })
}

pub fn offset_for_position(input: &[u8], position: Point) -> Result<usize> {
    let mut row = 0;
    let mut offset = 0;
    let mut iter = memchr::memchr_iter(b'\n', input);
    loop {
        if let Some(pos) = iter.next() {
            if row < position.row {
                row += 1;
                offset = pos;
                continue;
            }
        }
        offset += 1;
        break;
    }
    if position.row - row > 0 {
        return Err(anyhow!("Failed to address a row: {}", position.row));
    }
    if let Some(pos) = iter.next() {
        if (pos - offset < position.column) || (input[offset] == b'\n' && position.column > 0) {
            return Err(anyhow!("Failed to address a column: {}", position.column));
        }
    } else if input.len() - offset < position.column {
        return Err(anyhow!("Failed to address a column over the end"));
    }
    Ok(offset + position.column)
}

pub fn position_for_offset(input: &[u8], offset: usize) -> Result<Point> {
    if offset > input.len() {
        return Err(anyhow!("Failed to address an offset: {offset}"));
    }
    let mut result = Point { row: 0, column: 0 };
    let mut last = 0;
    for pos in memchr::memchr_iter(b'\n', &input[..offset]) {
        result.row += 1;
        last = pos;
    }
    result.column = if result.row > 0 {
        offset - last - 1
    } else {
        offset
    };
    Ok(result)
}



================================================
FILE: crates/cli/src/playground.html
================================================
<head>
  <meta charset="utf-8">
  <title>tree-sitter THE_LANGUAGE_NAME</title>
  <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/codemirror/6.65.7/codemirror.min.css">
  <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/clusterize.js/0.19.0/clusterize.min.css">
  <link rel="icon" type="image/png" href="https://tree-sitter.github.io/tree-sitter/assets/images/favicon-32x32.png"
    sizes="32x32" />
  <link rel="icon" type="image/png" href="https://tree-sitter.github.io/tree-sitter/assets/images/favicon-16x16.png"
    sizes="16x16" />
</head>

<body>
  <div id="playground-container" style="visibility: hidden;">
    <header>
      <div class="header-item">
        <span class="language-name">Language: THE_LANGUAGE_NAME</span>
      </div>

      <div class="header-item">
        <input id="logging-checkbox" type="checkbox">
        <label for="logging-checkbox">log</label>
      </div>

      <div class="header-item">
        <input id="anonymous-nodes-checkbox" type="checkbox">
        <label for="anonymous-nodes-checkbox">show anonymous nodes</label>
      </div>

      <div class="header-item">
        <input id="query-checkbox" type="checkbox">
        <label for="query-checkbox">query</label>
      </div>

      <div class="header-item">
        <input id="accessibility-checkbox" type="checkbox">
        <label for="accessibility-checkbox">accessibility</label>
      </div>

      <div class="header-item">
        <label for="update-time">parse time: </label>
        <span id="update-time"></span>
      </div>

      <div class="header-item">
        <a href="https://tree-sitter.github.io/tree-sitter/7-playground.html#about">(?)</a>
      </div>

      <select id="language-select" style="display: none;">
        <option value="parser">Parser</option>
      </select>

      <div class="header-item">
        <button id="theme-toggle" class="theme-toggle" aria-label="Toggle theme">
          <svg class="sun-icon" viewBox="0 0 24 24" width="16" height="16">
            <path fill="currentColor"
              d="M12 17.5a5.5 5.5 0 1 0 0-11 5.5 5.5 0 0 0 0 11zm0 1.5a7 7 0 1 1 0-14 7 7 0 0 1 0 14zm0-16a1 1 0 0 1 1 1v2a1 1 0 1 1-2 0V4a1 1 0 0 1 1-1zm0 15a1 1 0 0 1 1 1v2a1 1 0 1 1-2 0v-2a1 1 0 0 1 1-1zm9-9a1 1 0 0 1-1 1h-2a1 1 0 1 1 0-2h2a1 1 0 0 1 1 1zM4 12a1 1 0 0 1-1 1H1a1 1 0 1 1 0-2h2a1 1 0 0 1 1 1z" />
          </svg>
          <svg class="moon-icon" viewBox="0 0 24 24" width="16" height="16">
            <path fill="currentColor"
              d="M12.1 22c-5.5 0-10-4.5-10-10s4.5-10 10-10c.2 0 .3 0 .5.1-1.3 1.4-2 3.2-2 5.2 0 4.1 3.4 7.5 7.5 7.5 2 0 3.8-.7 5.2-2 .1.2.1.3.1.5 0 5.4-4.5 9.7-10 9.7z" />
          </svg>
        </button>
      </div>
    </header>

    <main>
      <div id="input-pane">
        <div class="panel-header">Code</div>
        <div id="code-container">
          <textarea id="code-input"></textarea>
        </div>

        <div id="query-container" style="visibility: hidden; position: absolute;">
          <div class="panel-header">Query</div>
          <textarea id="query-input"></textarea>
        </div>
      </div>

      <div id="output-container-scroll">
        <div class="panel-header">Tree</div>
        <pre id="output-container" class="highlight"></pre>
      </div>
    </main>
  </div>

  <script src="https://code.jquery.com/jquery-3.3.1.min.js" crossorigin="anonymous">
  </script>

  <script src="https://cdnjs.cloudflare.com/ajax/libs/codemirror/6.65.7/codemirror.min.js"></script>
  <script src="https://cdnjs.cloudflare.com/ajax/libs/clusterize.js/0.19.0/clusterize.min.js"></script>

  <script>LANGUAGE_BASE_URL = "";</script>
  <script type="module" src="playground.js"></script>
  <script type="module">
    import * as TreeSitter from './web-tree-sitter.js';
    window.TreeSitter = TreeSitter;
    setTimeout(() => window.initializePlayground({local: true}), 1)
  </script>

  <style>
    /* Base Variables */
    :root {
      --light-bg: #f9f9f9;
      --light-border: #e0e0e0;
      --light-text: #333;
      --light-hover-border: #c1c1c1;
      --light-scrollbar-track: #f1f1f1;
      --light-scrollbar-thumb: #c1c1c1;
      --light-scrollbar-thumb-hover: #a8a8a8;

      --dark-bg: #1d1f21;
      --dark-border: #2d2d2d;
      --dark-text: #c5c8c6;
      --dark-panel-bg: #252526;
      --dark-code-bg: #1e1e1e;
      --dark-scrollbar-track: #25282c;
      --dark-scrollbar-thumb: #4a4d51;
      --dark-scrollbar-thumb-hover: #5a5d61;

      --primary-color: #0550ae;
      --primary-color-alpha: rgba(5, 80, 174, 0.1);
      --primary-color-alpha-dark: rgba(121, 192, 255, 0.1);
      --selection-color: rgba(39, 95, 255, 0.3);
    }

    /* Theme Colors */
    [data-theme="dark"] {
      --bg-color: var(--dark-bg);
      --border-color: var(--dark-border);
      --text-color: var(--dark-text);
      --panel-bg: var(--dark-panel-bg);
      --code-bg: var(--dark-code-bg);
    }

    [data-theme="light"] {
      --bg-color: var(--light-bg);
      --border-color: var(--light-border);
      --text-color: var(--light-text);
      --panel-bg: white;
      --code-bg: white;
    }

    /* Base Styles */
    body {
      margin: 0;
      padding: 0;
      font-family: system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif;
      background-color: var(--bg-color);
      color: var(--text-color);
    }

    /* Layout */
    #playground-container {
      width: 100%;
      height: 100vh;
      display: flex;
      flex-direction: column;
      background-color: var(--bg-color);
    }

    header {
      padding: 16px 24px;
      border-bottom: 1px solid var(--border-color);
      display: flex;
      align-items: center;
      gap: 20px;
      background-color: var(--panel-bg);
      font-size: 14px;
    }

    .header-item {
      display: flex;
      align-items: center;
      gap: 8px;
    }

    .language-name {
      font-weight: 600;
    }

    main {
      flex: 1;
      display: flex;
      overflow: hidden;
    }

    #input-pane {
      width: 50%;
      display: flex;
      flex-direction: column;
      border-right: 1px solid var(--border-color);
      background-color: var(--panel-bg);
      overflow: hidden;
    }

    #code-container {
      flex: 1;
      min-height: 0;
      position: relative;
      border-bottom: 1px solid var(--border-color);
      display: flex;
      flex-direction: column;
    }

    #query-container:not([style*="visibility: hidden"]) {
      flex: 1;
      min-height: 0;
      display: flex;
      flex-direction: column;
    }

    #query-container .panel-header {
      flex: 0 0 auto;
    }

    #query-container .CodeMirror {
      flex: 1;
      position: relative;
      min-height: 0;
    }

    #output-container-scroll {
      width: 50%;
      overflow: auto;
      background-color: var(--panel-bg);
      padding: 0;
      display: flex;
      flex-direction: column;
    }

    #output-container {
      font-family: ui-monospace, "SF Mono", Menlo, Consolas, monospace;
      line-height: 1.5;
      margin: 0;
      padding: 16px;
    }

    .panel-header {
      padding: 8px 16px;
      font-weight: 600;
      font-size: 14px;
      border-bottom: 1px solid var(--border-color);
      background-color: var(--panel-bg);
    }

    .CodeMirror {
      position: absolute;
      top: 0;
      left: 0;
      right: 0;
      bottom: 0;
      height: 100%;
      font-family: ui-monospace, "SF Mono", Menlo, Consolas, monospace;
      font-size: 14px;
      line-height: 1.6;
      background-color: var(--code-bg) !important;
      color: var(--text-color) !important;
    }

    .query-error {
      text-decoration: underline red dashed;
      -webkit-text-decoration: underline red dashed;
    }

    /* Scrollbars */
    ::-webkit-scrollbar {
      width: 8px;
      height: 8px;
    }

    ::-webkit-scrollbar-track {
      border-radius: 4px;
      background: var(--light-scrollbar-track);
    }

    ::-webkit-scrollbar-thumb {
      border-radius: 4px;
      background: var(--light-scrollbar-thumb);
    }

    ::-webkit-scrollbar-thumb:hover {
      background: var(--light-scrollbar-thumb-hover);
    }

    [data-theme="dark"] {
      ::-webkit-scrollbar-track {
        background: var(--dark-scrollbar-track) !important;
      }

      ::-webkit-scrollbar-thumb {
        background: var(--dark-scrollbar-thumb) !important;
      }

      ::-webkit-scrollbar-thumb:hover {
        background: var(--dark-scrollbar-thumb-hover) !important;
      }
    }

    /* Theme Toggle */
    .theme-toggle {
      background: none;
      border: 1px solid var(--border-color);
      border-radius: 4px;
      padding: 6px;
      cursor: pointer;
      color: var(--text-color);
    }

    .theme-toggle:hover {
      background-color: var(--primary-color-alpha);
    }

    [data-theme="light"] .moon-icon,
    [data-theme="dark"] .sun-icon {
      display: none;
    }

    /* Form Elements */
    input[type="checkbox"] {
      margin-right: 6px;
      vertical-align: middle;
    }

    label {
      font-size: 14px;
      margin-right: 16px;
      cursor: pointer;
    }

    #output-container a {
      cursor: pointer;
      text-decoration: none;
      color: #040404;
      padding: 2px;
    }

    #output-container a:hover {
      text-decoration: underline;
    }

    #output-container a.node-link.named {
      color: #0550ae;
    }

    #output-container a.node-link.anonymous {
      color: #116329;
    }

    #output-container a.node-link.anonymous:before {
      content: '"';
    }

    #output-container a.node-link.anonymous:after {
      content: '"';
    }

    #output-container a.node-link.error {
      color: #cf222e;
    }

    #output-container a.highlighted {
      background-color: #d9d9d9;
      color: red;
      border-radius: 3px;
      text-decoration: underline;
    }

    /* Dark Theme Node Colors */
    [data-theme="dark"] {
      & #output-container a {
        color: #d4d4d4;
      }

      & #output-container a.node-link.named {
        color: #79c0ff;
      }

      & #output-container a.node-link.anonymous {
        color: #7ee787;
      }

      & #output-container a.node-link.error {
        color: #ff7b72;
      }

      & #output-container a.highlighted {
        background-color: #373b41;
        color: red;
      }

      & .CodeMirror {
        background-color: var(--dark-code-bg) !important;
        color: var(--dark-text) !important;
      }

      & .CodeMirror-gutters {
        background-color: var(--dark-panel-bg) !important;
        border-color: var(--dark-border) !important;
      }

      & .CodeMirror-cursor {
        border-color: var(--dark-text) !important;
      }

      & .CodeMirror-selected {
        background-color: rgba(255, 255, 255, 0.1) !important;
      }
    }
  </style>
</body>



================================================
FILE: crates/cli/src/playground.rs
================================================
use std::{
    borrow::Cow,
    env, fs,
    net::TcpListener,
    path::{Path, PathBuf},
    str::{self, FromStr as _},
};

use anyhow::{anyhow, Context, Result};
use tiny_http::{Header, Response, Server};

use super::wasm;

macro_rules! optional_resource {
    ($name:tt, $path:tt) => {
        #[cfg(TREE_SITTER_EMBED_WASM_BINDING)]
        fn $name(tree_sitter_dir: Option<&Path>) -> Cow<'static, [u8]> {
            if let Some(tree_sitter_dir) = tree_sitter_dir {
                Cow::Owned(fs::read(tree_sitter_dir.join($path)).unwrap())
            } else {
                Cow::Borrowed(include_bytes!(concat!("../../", $path)))
            }
        }

        #[cfg(not(TREE_SITTER_EMBED_WASM_BINDING))]
        fn $name(tree_sitter_dir: Option<&Path>) -> Cow<'static, [u8]> {
            if let Some(tree_sitter_dir) = tree_sitter_dir {
                Cow::Owned(fs::read(tree_sitter_dir.join($path)).unwrap())
            } else {
                Cow::Borrowed(&[])
            }
        }
    };
}

optional_resource!(get_playground_js, "docs/src/assets/js/playground.js");
optional_resource!(get_lib_js, "lib/binding_web/web-tree-sitter.js");
optional_resource!(get_lib_wasm, "lib/binding_web/web-tree-sitter.wasm");

fn get_main_html(tree_sitter_dir: Option<&Path>) -> Cow<'static, [u8]> {
    tree_sitter_dir.map_or(
        Cow::Borrowed(include_bytes!("playground.html")),
        |tree_sitter_dir| {
            Cow::Owned(fs::read(tree_sitter_dir.join("cli/src/playground.html")).unwrap())
        },
    )
}

pub fn serve(grammar_path: &Path, open_in_browser: bool) -> Result<()> {
    let server = get_server()?;
    let (grammar_name, language_wasm) = wasm::load_language_wasm_file(grammar_path)?;
    let url = format!("http://{}", server.server_addr());
    println!("Started playground on: {url}");
    if open_in_browser && webbrowser::open(&url).is_err() {
        eprintln!("Failed to open '{url}' in a web browser");
    }

    let tree_sitter_dir = env::var("TREE_SITTER_BASE_DIR").map(PathBuf::from).ok();
    let main_html = str::from_utf8(&get_main_html(tree_sitter_dir.as_deref()))
        .unwrap()
        .replace("THE_LANGUAGE_NAME", &grammar_name)
        .into_bytes();
    let playground_js = get_playground_js(tree_sitter_dir.as_deref());
    let lib_js = get_lib_js(tree_sitter_dir.as_deref());
    let lib_wasm = get_lib_wasm(tree_sitter_dir.as_deref());

    let html_header = Header::from_str("Content-Type: text/html").unwrap();
    let js_header = Header::from_str("Content-Type: application/javascript").unwrap();
    let wasm_header = Header::from_str("Content-Type: application/wasm").unwrap();

    for request in server.incoming_requests() {
        let res = match request.url() {
            "/" => response(&main_html, &html_header),
            "/tree-sitter-parser.wasm" => response(&language_wasm, &wasm_header),
            "/playground.js" => {
                if playground_js.is_empty() {
                    redirect("https://tree-sitter.github.io/tree-sitter/assets/js/playground.js")
                } else {
                    response(&playground_js, &js_header)
                }
            }
            "/web-tree-sitter.js" => {
                if lib_js.is_empty() {
                    redirect("https://tree-sitter.github.io/web-tree-sitter.js")
                } else {
                    response(&lib_js, &js_header)
                }
            }
            "/web-tree-sitter.wasm" => {
                if lib_wasm.is_empty() {
                    redirect("https://tree-sitter.github.io/web-tree-sitter.wasm")
                } else {
                    response(&lib_wasm, &wasm_header)
                }
            }
            _ => response(b"Not found", &html_header).with_status_code(404),
        };
        request
            .respond(res)
            .with_context(|| "Failed to write HTTP response")?;
    }

    Ok(())
}

fn redirect(url: &str) -> Response<&[u8]> {
    Response::empty(302)
        .with_data("".as_bytes(), Some(0))
        .with_header(Header::from_bytes("Location", url.as_bytes()).unwrap())
}

fn response<'a>(data: &'a [u8], header: &Header) -> Response<&'a [u8]> {
    Response::empty(200)
        .with_data(data, Some(data.len()))
        .with_header(header.clone())
}

fn get_server() -> Result<Server> {
    let addr = env::var("TREE_SITTER_PLAYGROUND_ADDR").unwrap_or_else(|_| "127.0.0.1".to_owned());
    let port = env::var("TREE_SITTER_PLAYGROUND_PORT")
        .map(|v| {
            v.parse::<u16>()
                .with_context(|| "Invalid port specification")
        })
        .ok();
    let listener = match port {
        Some(port) => {
            bind_to(&addr, port?).with_context(|| "Failed to bind to the specified port")?
        }
        None => get_listener_on_available_port(&addr)
            .with_context(|| "Failed to find a free port to bind to it")?,
    };
    let server =
        Server::from_listener(listener, None).map_err(|_| anyhow!("Failed to start web server"))?;
    Ok(server)
}

fn get_listener_on_available_port(addr: &str) -> Option<TcpListener> {
    (8000..12000).find_map(|port| bind_to(addr, port))
}

fn bind_to(addr: &str, port: u16) -> Option<TcpListener> {
    TcpListener::bind(format!("{addr}:{port}")).ok()
}



================================================
FILE: crates/cli/src/query.rs
================================================
use std::{
    fs,
    io::{self, Write},
    ops::Range,
    path::Path,
    time::Instant,
};

use anstyle::AnsiColor;
use anyhow::{Context, Result};
use streaming_iterator::StreamingIterator;
use tree_sitter::{Language, Parser, Point, Query, QueryCursor};

use crate::{
    query_testing::{self, to_utf8_point},
    test::paint,
};

#[allow(clippy::too_many_arguments)]
pub fn query_file_at_path(
    language: &Language,
    path: &Path,
    name: &str,
    query_path: &Path,
    ordered_captures: bool,
    byte_range: Option<Range<usize>>,
    point_range: Option<Range<Point>>,
    should_test: bool,
    quiet: bool,
    print_time: bool,
    stdin: bool,
) -> Result<()> {
    let stdout = io::stdout();
    let mut stdout = stdout.lock();

    let query_source = fs::read_to_string(query_path)
        .with_context(|| format!("Error reading query file {}", query_path.display()))?;
    let query = Query::new(language, &query_source).with_context(|| "Query compilation failed")?;

    let mut query_cursor = QueryCursor::new();
    if let Some(range) = byte_range {
        query_cursor.set_byte_range(range);
    }
    if let Some(range) = point_range {
        query_cursor.set_point_range(range);
    }

    let mut parser = Parser::new();
    parser.set_language(language)?;

    let mut results = Vec::new();

    if !should_test && !stdin {
        writeln!(&mut stdout, "{name}")?;
    }

    let source_code =
        fs::read(path).with_context(|| format!("Error reading source file {}", path.display()))?;
    let tree = parser.parse(&source_code, None).unwrap();

    let start = Instant::now();
    if ordered_captures {
        let mut captures = query_cursor.captures(&query, tree.root_node(), source_code.as_slice());
        while let Some((mat, capture_index)) = captures.next() {
            let capture = mat.captures[*capture_index];
            let capture_name = &query.capture_names()[capture.index as usize];
            if !quiet && !should_test {
                writeln!(
                        &mut stdout,
                        "    pattern: {:>2}, capture: {} - {capture_name}, start: {}, end: {}, text: `{}`",
                        mat.pattern_index,
                        capture.index,
                        capture.node.start_position(),
                        capture.node.end_position(),
                        capture.node.utf8_text(&source_code).unwrap_or("")
                    )?;
            }
            results.push(query_testing::CaptureInfo {
                name: (*capture_name).to_string(),
                start: to_utf8_point(capture.node.start_position(), source_code.as_slice()),
                end: to_utf8_point(capture.node.end_position(), source_code.as_slice()),
            });
        }
    } else {
        let mut matches = query_cursor.matches(&query, tree.root_node(), source_code.as_slice());
        while let Some(m) = matches.next() {
            if !quiet && !should_test {
                writeln!(&mut stdout, "  pattern: {}", m.pattern_index)?;
            }
            for capture in m.captures {
                let start = capture.node.start_position();
                let end = capture.node.end_position();
                let capture_name = &query.capture_names()[capture.index as usize];
                if !quiet && !should_test {
                    if end.row == start.row {
                        writeln!(
                                &mut stdout,
                                "    capture: {} - {capture_name}, start: {start}, end: {end}, text: `{}`",
                                capture.index,
                                capture.node.utf8_text(&source_code).unwrap_or("")
                            )?;
                    } else {
                        writeln!(
                            &mut stdout,
                            "    capture: {capture_name}, start: {start}, end: {end}",
                        )?;
                    }
                }
                results.push(query_testing::CaptureInfo {
                    name: (*capture_name).to_string(),
                    start: to_utf8_point(capture.node.start_position(), source_code.as_slice()),
                    end: to_utf8_point(capture.node.end_position(), source_code.as_slice()),
                });
            }
        }
    }
    if query_cursor.did_exceed_match_limit() {
        writeln!(
            &mut stdout,
            "  WARNING: Query exceeded maximum number of in-progress captures!"
        )?;
    }
    if should_test {
        let path_name = if stdin {
            "stdin"
        } else {
            Path::new(&path).file_name().unwrap().to_str().unwrap()
        };
        match query_testing::assert_expected_captures(&results, path, &mut parser, language) {
            Ok(assertion_count) => {
                println!(
                    "  ✓ {} ({} assertions)",
                    paint(Some(AnsiColor::Green), path_name),
                    assertion_count
                );
            }
            Err(e) => {
                println!("  ✗ {}", paint(Some(AnsiColor::Red), path_name));
                return Err(e);
            }
        }
    }
    if print_time {
        writeln!(&mut stdout, "{:?}", start.elapsed())?;
    }

    Ok(())
}



================================================
FILE: crates/cli/src/query_testing.rs
================================================
use std::{fs, path::Path, sync::LazyLock};

use anyhow::{anyhow, Result};
use bstr::{BStr, ByteSlice};
use regex::Regex;
use tree_sitter::{Language, Parser, Point};

static CAPTURE_NAME_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new("[\\w_\\-.]+").unwrap());

#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct Utf8Point {
    pub row: usize,
    pub column: usize,
}

impl std::fmt::Display for Utf8Point {
    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
        write!(f, "({}, {})", self.row, self.column)
    }
}

impl Utf8Point {
    #[must_use]
    pub const fn new(row: usize, column: usize) -> Self {
        Self { row, column }
    }
}

#[must_use]
pub fn to_utf8_point(point: Point, source: &[u8]) -> Utf8Point {
    if point.column == 0 {
        return Utf8Point::new(point.row, 0);
    }

    let bstr = BStr::new(source);
    let line = bstr.lines_with_terminator().nth(point.row).unwrap();
    let mut utf8_column = 0;

    for (_, grapheme_end, _) in line.grapheme_indices() {
        utf8_column += 1;
        if grapheme_end >= point.column {
            break;
        }
    }

    Utf8Point {
        row: point.row,
        column: utf8_column,
    }
}

#[derive(Debug, Eq, PartialEq)]
pub struct CaptureInfo {
    pub name: String,
    pub start: Utf8Point,
    pub end: Utf8Point,
}

#[derive(Debug, PartialEq, Eq)]
pub struct Assertion {
    pub position: Utf8Point,
    pub length: usize,
    pub negative: bool,
    pub expected_capture_name: String,
}

impl Assertion {
    #[must_use]
    pub const fn new(
        row: usize,
        col: usize,
        length: usize,
        negative: bool,
        expected_capture_name: String,
    ) -> Self {
        Self {
            position: Utf8Point::new(row, col),
            length,
            negative,
            expected_capture_name,
        }
    }
}

/// Parse the given source code, finding all of the comments that contain
/// highlighting assertions. Return a vector of (position, expected highlight name)
/// pairs.
pub fn parse_position_comments(
    parser: &mut Parser,
    language: &Language,
    source: &[u8],
) -> Result<Vec<Assertion>> {
    let mut result = Vec::new();
    let mut assertion_ranges = Vec::new();

    // Parse the code.
    parser.set_included_ranges(&[]).unwrap();
    parser.set_language(language).unwrap();
    let tree = parser.parse(source, None).unwrap();

    // Walk the tree, finding comment nodes that contain assertions.
    let mut ascending = false;
    let mut cursor = tree.root_node().walk();
    loop {
        if ascending {
            let node = cursor.node();

            // Find every comment node.
            if node.kind().to_lowercase().contains("comment") {
                if let Ok(text) = node.utf8_text(source) {
                    let mut position = node.start_position();
                    if position.row > 0 {
                        // Find the arrow character ("^" or "<-") in the comment. A left arrow
                        // refers to the column where the comment node starts. An up arrow refers
                        // to its own column.
                        let mut has_left_caret = false;
                        let mut has_arrow = false;
                        let mut negative = false;
                        let mut arrow_end = 0;
                        let mut arrow_count = 1;
                        for (i, c) in text.char_indices() {
                            arrow_end = i + 1;
                            if c == '-' && has_left_caret {
                                has_arrow = true;
                                break;
                            }
                            if c == '^' {
                                has_arrow = true;
                                position.column += i;
                                // Continue counting remaining arrows and update their end column
                                for (_, c) in text[arrow_end..].char_indices() {
                                    if c != '^' {
                                        arrow_end += arrow_count - 1;
                                        break;
                                    }
                                    arrow_count += 1;
                                }
                                break;
                            }
                            has_left_caret = c == '<';
                        }

                        // find any ! after arrows but before capture name
                        if has_arrow {
                            for (i, c) in text[arrow_end..].char_indices() {
                                if c == '!' {
                                    negative = true;
                                    arrow_end += i + 1;
                                    break;
                                } else if !c.is_whitespace() {
                                    break;
                                }
                            }
                        }

                        // If the comment node contains an arrow and a highlight name, record the
                        // highlight name and the position.
                        if let (true, Some(mat)) =
                            (has_arrow, CAPTURE_NAME_REGEX.find(&text[arrow_end..]))
                        {
                            assertion_ranges.push((node.start_position(), node.end_position()));
                            result.push(Assertion {
                                position: to_utf8_point(position, source),
                                length: arrow_count,
                                negative,
                                expected_capture_name: mat.as_str().to_string(),
                            });
                        }
                    }
                }
            }

            // Continue walking the tree.
            if cursor.goto_next_sibling() {
                ascending = false;
            } else if !cursor.goto_parent() {
                break;
            }
        } else if !cursor.goto_first_child() {
            ascending = true;
        }
    }

    // Adjust the row number in each assertion's position to refer to the line of
    // code *above* the assertion. There can be multiple lines of assertion comments and empty
    // lines, so the positions may have to be decremented by more than one row.
    let mut i = 0;
    let lines = source.lines_with_terminator().collect::<Vec<_>>();
    for assertion in &mut result {
        let original_position = assertion.position;
        loop {
            let on_assertion_line = assertion_ranges[i..]
                .iter()
                .any(|(start, _)| start.row == assertion.position.row);
            let on_empty_line = lines[assertion.position.row].len() <= assertion.position.column;
            if on_assertion_line || on_empty_line {
                if assertion.position.row > 0 {
                    assertion.position.row -= 1;
                } else {
                    return Err(anyhow!(
                        "Error: could not find a line that corresponds to the assertion `{}` located at {original_position}",
                        assertion.expected_capture_name
                    ));
                }
            } else {
                while i < assertion_ranges.len()
                    && assertion_ranges[i].0.row < assertion.position.row
                {
                    i += 1;
                }
                break;
            }
        }
    }

    // The assertions can end up out of order due to the line adjustments.
    result.sort_unstable_by_key(|a| a.position);

    Ok(result)
}

pub fn assert_expected_captures(
    infos: &[CaptureInfo],
    path: &Path,
    parser: &mut Parser,
    language: &Language,
) -> Result<usize> {
    let contents = fs::read_to_string(path)?;
    let pairs = parse_position_comments(parser, language, contents.as_bytes())?;
    for assertion in &pairs {
        if let Some(found) = &infos.iter().find(|p| {
            assertion.position >= p.start
                && (assertion.position.row < p.end.row
                    || assertion.position.column + assertion.length - 1 < p.end.column)
        }) {
            if assertion.expected_capture_name != found.name && found.name != "name" {
                return Err(anyhow!(
                    "Assertion failed: at {}, found {}, expected {}",
                    found.start,
                    found.name,
                    assertion.expected_capture_name,
                ));
            }
        } else {
            return Err(anyhow!(
                "Assertion failed: could not match {} at row {}, column {}",
                assertion.expected_capture_name,
                assertion.position.row,
                assertion.position.column + assertion.length - 1,
            ));
        }
    }
    Ok(pairs.len())
}



================================================
FILE: crates/cli/src/tags.rs
================================================
use std::{
    fs,
    io::{self, Write},
    path::Path,
    str,
    sync::{atomic::AtomicUsize, Arc},
    time::Instant,
};

use anyhow::Result;
use tree_sitter_tags::{TagsConfiguration, TagsContext};

pub struct TagsOptions {
    pub scope: Option<String>,
    pub quiet: bool,
    pub print_time: bool,
    pub cancellation_flag: Arc<AtomicUsize>,
}

pub fn generate_tags(
    path: &Path,
    name: &str,
    config: &TagsConfiguration,
    indent: bool,
    opts: &TagsOptions,
) -> Result<()> {
    let mut context = TagsContext::new();
    let stdout = io::stdout();
    let mut stdout = stdout.lock();

    let indent_str = if indent {
        if !opts.quiet {
            writeln!(&mut stdout, "{name}")?;
        }
        "\t"
    } else {
        ""
    };

    let source = fs::read(path)?;
    let start = Instant::now();
    for tag in context
        .generate_tags(config, &source, Some(&opts.cancellation_flag))?
        .0
    {
        let tag = tag?;
        if !opts.quiet {
            write!(
                &mut stdout,
                "{indent_str}{:<10}\t | {:<8}\t{} {} - {} `{}`",
                str::from_utf8(&source[tag.name_range]).unwrap_or(""),
                &config.syntax_type_name(tag.syntax_type_id),
                if tag.is_definition { "def" } else { "ref" },
                tag.span.start,
                tag.span.end,
                str::from_utf8(&source[tag.line_range]).unwrap_or(""),
            )?;
            if let Some(docs) = tag.docs {
                if docs.len() > 120 {
                    write!(&mut stdout, "\t{:?}...", docs.get(0..120).unwrap_or(""))?;
                } else {
                    write!(&mut stdout, "\t{:?}", &docs)?;
                }
            }
            writeln!(&mut stdout)?;
        }
    }

    if opts.print_time {
        writeln!(
            &mut stdout,
            "{indent_str}time: {}ms",
            start.elapsed().as_millis(),
        )?;
    }

    Ok(())
}



================================================
FILE: crates/cli/src/test_highlight.rs
================================================
use std::{fs, path::Path};

use anstyle::AnsiColor;
use anyhow::{anyhow, Result};
use tree_sitter::Point;
use tree_sitter_highlight::{Highlight, HighlightConfiguration, HighlightEvent, Highlighter};
use tree_sitter_loader::{Config, Loader};

use super::{
    query_testing::{parse_position_comments, to_utf8_point, Assertion, Utf8Point},
    test::paint,
    util,
};

#[derive(Debug)]
pub struct Failure {
    row: usize,
    column: usize,
    expected_highlight: String,
    actual_highlights: Vec<String>,
}

impl std::error::Error for Failure {}

impl std::fmt::Display for Failure {
    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
        write!(
            f,
            "Failure - row: {}, column: {}, expected highlight '{}', actual highlights: ",
            self.row, self.column, self.expected_highlight
        )?;
        if self.actual_highlights.is_empty() {
            write!(f, "none.")?;
        } else {
            for (i, actual_highlight) in self.actual_highlights.iter().enumerate() {
                if i > 0 {
                    write!(f, ", ")?;
                }
                write!(f, "'{actual_highlight}'")?;
            }
        }
        Ok(())
    }
}

pub fn test_highlights(
    loader: &Loader,
    loader_config: &Config,
    highlighter: &mut Highlighter,
    directory: &Path,
    use_color: bool,
) -> Result<()> {
    println!("syntax highlighting:");
    test_highlights_indented(loader, loader_config, highlighter, directory, use_color, 2)
}

fn test_highlights_indented(
    loader: &Loader,
    loader_config: &Config,
    highlighter: &mut Highlighter,
    directory: &Path,
    use_color: bool,
    indent_level: usize,
) -> Result<()> {
    let mut failed = false;

    for highlight_test_file in fs::read_dir(directory)? {
        let highlight_test_file = highlight_test_file?;
        let test_file_path = highlight_test_file.path();
        let test_file_name = highlight_test_file.file_name();
        print!(
            "{indent:indent_level$}",
            indent = "",
            indent_level = indent_level * 2
        );
        if test_file_path.is_dir() && test_file_path.read_dir()?.next().is_some() {
            println!("{}:", test_file_name.to_string_lossy());
            if test_highlights_indented(
                loader,
                loader_config,
                highlighter,
                &test_file_path,
                use_color,
                indent_level + 1,
            )
            .is_err()
            {
                failed = true;
            }
        } else {
            let (language, language_config) = loader
                .language_configuration_for_file_name(&test_file_path)?
                .ok_or_else(|| {
                    anyhow!(
                        "{}",
                        util::lang_not_found_for_path(test_file_path.as_path(), loader_config)
                    )
                })?;
            let highlight_config = language_config
                .highlight_config(language, None)?
                .ok_or_else(|| anyhow!("No highlighting config found for {test_file_path:?}"))?;
            match test_highlight(
                loader,
                highlighter,
                highlight_config,
                fs::read(&test_file_path)?.as_slice(),
            ) {
                Ok(assertion_count) => {
                    println!(
                        "✓ {} ({assertion_count} assertions)",
                        paint(
                            use_color.then_some(AnsiColor::Green),
                            test_file_name.to_string_lossy().as_ref()
                        ),
                    );
                }
                Err(e) => {
                    println!(
                        "✗ {}",
                        paint(
                            use_color.then_some(AnsiColor::Red),
                            test_file_name.to_string_lossy().as_ref()
                        )
                    );
                    println!(
                        "{indent:indent_level$}  {e}",
                        indent = "",
                        indent_level = indent_level * 2
                    );
                    failed = true;
                }
            }
        }
    }

    if failed {
        Err(anyhow!(""))
    } else {
        Ok(())
    }
}
pub fn iterate_assertions(
    assertions: &[Assertion],
    highlights: &[(Utf8Point, Utf8Point, Highlight)],
    highlight_names: &[String],
) -> Result<usize> {
    // Iterate through all of the highlighting assertions, checking each one against the
    // actual highlights.
    let mut i = 0;
    let mut actual_highlights = Vec::new();
    for Assertion {
        position,
        length,
        negative,
        expected_capture_name: expected_highlight,
    } in assertions
    {
        let mut passed = false;
        let mut end_column = position.column + length - 1;
        actual_highlights.clear();

        // The assertions are ordered by position, so skip past all of the highlights that
        // end at or before this assertion's position.
        'highlight_loop: while let Some(highlight) = highlights.get(i) {
            if highlight.1 <= *position {
                i += 1;
                continue;
            }

            // Iterate through all of the highlights that start at or before this assertion's
            // position, looking for one that matches the assertion.
            let mut j = i;
            while let (false, Some(highlight)) = (passed, highlights.get(j)) {
                end_column = position.column + length - 1;
                if highlight.0.row >= position.row && highlight.0.column > end_column {
                    break 'highlight_loop;
                }

                // If the highlight matches the assertion, or if the highlight doesn't
                // match the assertion but it's negative, this test passes. Otherwise,
                // add this highlight to the list of actual highlights that span the
                // assertion's position, in order to generate an error message in the event
                // of a failure.
                let highlight_name = &highlight_names[(highlight.2).0];
                if (*highlight_name == *expected_highlight) == *negative {
                    actual_highlights.push(highlight_name);
                } else {
                    passed = true;
                    break 'highlight_loop;
                }

                j += 1;
            }
        }

        if !passed {
            return Err(Failure {
                row: position.row,
                column: end_column,
                expected_highlight: expected_highlight.clone(),
                actual_highlights: actual_highlights.into_iter().cloned().collect(),
            }
            .into());
        }
    }

    Ok(assertions.len())
}

pub fn test_highlight(
    loader: &Loader,
    highlighter: &mut Highlighter,
    highlight_config: &HighlightConfiguration,
    source: &[u8],
) -> Result<usize> {
    // Highlight the file, and parse out all of the highlighting assertions.
    let highlight_names = loader.highlight_names();
    let highlights = get_highlight_positions(loader, highlighter, highlight_config, source)?;
    let assertions =
        parse_position_comments(highlighter.parser(), &highlight_config.language, source)?;

    iterate_assertions(&assertions, &highlights, &highlight_names)
}

pub fn get_highlight_positions(
    loader: &Loader,
    highlighter: &mut Highlighter,
    highlight_config: &HighlightConfiguration,
    source: &[u8],
) -> Result<Vec<(Utf8Point, Utf8Point, Highlight)>> {
    let mut row = 0;
    let mut column = 0;
    let mut byte_offset = 0;
    let mut was_newline = false;
    let mut result = Vec::new();
    let mut highlight_stack = Vec::new();
    let source = String::from_utf8_lossy(source);
    let mut char_indices = source.char_indices();
    for event in highlighter.highlight(highlight_config, source.as_bytes(), None, |string| {
        loader.highlight_config_for_injection_string(string)
    })? {
        match event? {
            HighlightEvent::HighlightStart(h) => highlight_stack.push(h),
            HighlightEvent::HighlightEnd => {
                highlight_stack.pop();
            }
            HighlightEvent::Source { start, end } => {
                let mut start_position = Point::new(row, column);
                while byte_offset < end {
                    if byte_offset <= start {
                        start_position = Point::new(row, column);
                    }
                    if let Some((i, c)) = char_indices.next() {
                        if was_newline {
                            row += 1;
                            column = 0;
                        } else {
                            column += i - byte_offset;
                        }
                        was_newline = c == '\n';
                        byte_offset = i;
                    } else {
                        break;
                    }
                }
                if let Some(highlight) = highlight_stack.last() {
                    let utf8_start_position = to_utf8_point(start_position, source.as_bytes());
                    let utf8_end_position =
                        to_utf8_point(Point::new(row, column), source.as_bytes());
                    result.push((utf8_start_position, utf8_end_position, *highlight));
                }
            }
        }
    }
    Ok(result)
}



================================================
FILE: crates/cli/src/test_tags.rs
================================================
use std::{fs, path::Path};

use anstyle::AnsiColor;
use anyhow::{anyhow, Result};
use tree_sitter_loader::{Config, Loader};
use tree_sitter_tags::{TagsConfiguration, TagsContext};

use super::{
    query_testing::{parse_position_comments, to_utf8_point, Assertion, Utf8Point},
    test::paint,
    util,
};

#[derive(Debug)]
pub struct Failure {
    row: usize,
    column: usize,
    expected_tag: String,
    actual_tags: Vec<String>,
}

impl std::error::Error for Failure {}

impl std::fmt::Display for Failure {
    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
        write!(
            f,
            "Failure - row: {}, column: {}, expected tag: '{}', actual tag: ",
            self.row, self.column, self.expected_tag
        )?;
        if self.actual_tags.is_empty() {
            write!(f, "none.")?;
        } else {
            for (i, actual_tag) in self.actual_tags.iter().enumerate() {
                if i > 0 {
                    write!(f, ", ")?;
                }
                write!(f, "'{actual_tag}'")?;
            }
        }
        Ok(())
    }
}

pub fn test_tags(
    loader: &Loader,
    loader_config: &Config,
    tags_context: &mut TagsContext,
    directory: &Path,
    use_color: bool,
) -> Result<()> {
    println!("tags:");
    test_tags_indented(loader, loader_config, tags_context, directory, use_color, 2)
}

pub fn test_tags_indented(
    loader: &Loader,
    loader_config: &Config,
    tags_context: &mut TagsContext,
    directory: &Path,
    use_color: bool,
    indent_level: usize,
) -> Result<()> {
    let mut failed = false;

    for tag_test_file in fs::read_dir(directory)? {
        let tag_test_file = tag_test_file?;
        let test_file_path = tag_test_file.path();
        let test_file_name = tag_test_file.file_name();
        print!(
            "{indent:indent_level$}",
            indent = "",
            indent_level = indent_level * 2
        );
        if test_file_path.is_dir() && test_file_path.read_dir()?.next().is_some() {
            println!("{}:", test_file_name.to_string_lossy());
            if test_tags_indented(
                loader,
                loader_config,
                tags_context,
                &test_file_path,
                use_color,
                indent_level + 1,
            )
            .is_err()
            {
                failed = true;
            }
        } else {
            let (language, language_config) = loader
                .language_configuration_for_file_name(&test_file_path)?
                .ok_or_else(|| {
                    anyhow!(
                        "{}",
                        util::lang_not_found_for_path(test_file_path.as_path(), loader_config)
                    )
                })?;
            let tags_config = language_config
                .tags_config(language)?
                .ok_or_else(|| anyhow!("No tags config found for {test_file_path:?}"))?;
            match test_tag(
                tags_context,
                tags_config,
                fs::read(&test_file_path)?.as_slice(),
            ) {
                Ok(assertion_count) => {
                    println!(
                        "✓ {} ({assertion_count} assertions)",
                        paint(
                            use_color.then_some(AnsiColor::Green),
                            test_file_name.to_string_lossy().as_ref()
                        ),
                    );
                }
                Err(e) => {
                    println!(
                        "✗ {}",
                        paint(
                            use_color.then_some(AnsiColor::Red),
                            test_file_name.to_string_lossy().as_ref()
                        )
                    );
                    println!(
                        "{indent:indent_level$}  {e}",
                        indent = "",
                        indent_level = indent_level * 2
                    );
                    failed = true;
                }
            }
        }
    }

    if failed {
        Err(anyhow!(""))
    } else {
        Ok(())
    }
}

pub fn test_tag(
    tags_context: &mut TagsContext,
    tags_config: &TagsConfiguration,
    source: &[u8],
) -> Result<usize> {
    let tags = get_tag_positions(tags_context, tags_config, source)?;
    let assertions = parse_position_comments(tags_context.parser(), &tags_config.language, source)?;

    // Iterate through all of the assertions, checking against the actual tags.
    let mut i = 0;
    let mut actual_tags = Vec::<&String>::new();
    for Assertion {
        position,
        length,
        negative,
        expected_capture_name: expected_tag,
    } in &assertions
    {
        let mut passed = false;
        let mut end_column = position.column + length - 1;

        'tag_loop: while let Some(tag) = tags.get(i) {
            if tag.1 <= *position {
                i += 1;
                continue;
            }

            // Iterate through all of the tags that start at or before this assertion's
            // position, looking for one that matches the assertion
            let mut j = i;
            while let (false, Some(tag)) = (passed, tags.get(j)) {
                end_column = position.column + length - 1;
                if tag.0.column > end_column {
                    break 'tag_loop;
                }

                let tag_name = &tag.2;
                if (*tag_name == *expected_tag) == *negative {
                    actual_tags.push(tag_name);
                } else {
                    passed = true;
                    break 'tag_loop;
                }

                j += 1;
                if tag == tags.last().unwrap() {
                    break 'tag_loop;
                }
            }
        }

        if !passed {
            return Err(Failure {
                row: position.row,
                column: end_column,
                expected_tag: expected_tag.clone(),
                actual_tags: actual_tags.into_iter().cloned().collect(),
            }
            .into());
        }
    }

    Ok(assertions.len())
}

pub fn get_tag_positions(
    tags_context: &mut TagsContext,
    tags_config: &TagsConfiguration,
    source: &[u8],
) -> Result<Vec<(Utf8Point, Utf8Point, String)>> {
    let (tags_iter, _has_error) = tags_context.generate_tags(tags_config, source, None)?;
    let tag_positions = tags_iter
        .filter_map(std::result::Result::ok)
        .map(|tag| {
            let tag_postfix = tags_config.syntax_type_name(tag.syntax_type_id).to_string();
            let tag_name = if tag.is_definition {
                format!("definition.{tag_postfix}")
            } else {
                format!("reference.{tag_postfix}")
            };
            (
                to_utf8_point(tag.span.start, source),
                to_utf8_point(tag.span.end, source),
                tag_name,
            )
        })
        .collect();
    Ok(tag_positions)
}



================================================
FILE: crates/cli/src/tests.rs
================================================
mod async_context_test;
mod corpus_test;
mod detect_language;
mod helpers;
mod highlight_test;
mod language_test;
mod node_test;
mod parser_hang_test;
mod parser_test;
mod pathological_test;
mod query_test;
mod tags_test;
mod test_highlight_test;
mod test_tags_test;
mod text_provider_test;
mod tree_test;

#[cfg(feature = "wasm")]
mod wasm_language_test;

use tree_sitter_generate::GenerateResult;

pub use crate::fuzz::{
    allocations,
    edits::{get_random_edit, invert_edit},
    random::Rand,
    ITERATION_COUNT,
};

/// This is a simple wrapper around [`tree_sitter_generate::generate_parser_for_grammar`], because
/// our tests do not need to pass in a version number, only the grammar JSON.
fn generate_parser(grammar_json: &str) -> GenerateResult<(String, String)> {
    tree_sitter_generate::generate_parser_for_grammar(grammar_json, Some((0, 0, 0)))
}



================================================
FILE: crates/cli/src/tree_sitter_cli.rs
================================================
#![doc = include_str!("../README.md")]

pub mod fuzz;
pub mod highlight;
pub mod init;
pub mod input;
pub mod logger;
pub mod parse;
pub mod playground;
pub mod query;
pub mod query_testing;
pub mod tags;
pub mod test;
pub mod test_highlight;
pub mod test_tags;
pub mod util;
pub mod version;
pub mod wasm;

#[cfg(test)]
mod tests;

// To run compile fail tests
#[cfg(doctest)]
mod tests;



================================================
FILE: crates/cli/src/util.rs
================================================
use std::{
    path::{Path, PathBuf},
    process::{Child, ChildStdin, Command, Stdio},
    sync::{
        atomic::{AtomicUsize, Ordering},
        Arc,
    },
};

use anyhow::{anyhow, Context, Result};
use indoc::indoc;
use tree_sitter::{Parser, Tree};
use tree_sitter_config::Config;
use tree_sitter_loader::Config as LoaderConfig;

const HTML_HEADER: &[u8] = b"
<!DOCTYPE html>

<style>
svg { width: 100%; }
</style>

";

#[must_use]
pub fn lang_not_found_for_path(path: &Path, loader_config: &LoaderConfig) -> String {
    let path = path.display();
    format!(
        indoc! {"
            No language found for path `{}`

            If a language should be associated with this file extension, please ensure the path to `{}` is inside one of the following directories as specified by your 'config.json':\n\n{}\n
            If the directory that contains the relevant grammar for `{}` is not listed above, please add the directory to the list of directories in your config file, {}
        "},
        path,
        path,
        loader_config
            .parser_directories
            .iter()
            .enumerate()
            .map(|(i, d)| format!("  {}. {}", i + 1, d.display()))
            .collect::<Vec<_>>()
            .join("  \n"),
        path,
        if let Ok(Some(config_path)) = Config::find_config_file() {
            format!("located at {}", config_path.display())
        } else {
            String::from("which you need to create by running `tree-sitter init-config`")
        }
    )
}

#[must_use]
pub fn cancel_on_signal() -> Arc<AtomicUsize> {
    let result = Arc::new(AtomicUsize::new(0));
    ctrlc::set_handler({
        let flag = result.clone();
        move || {
            flag.store(1, Ordering::Relaxed);
        }
    })
    .expect("Error setting Ctrl-C handler");
    result
}

pub struct LogSession {
    path: PathBuf,
    dot_process: Option<Child>,
    dot_process_stdin: Option<ChildStdin>,
    open_log: bool,
}

pub fn print_tree_graph(tree: &Tree, path: &str, quiet: bool) -> Result<()> {
    let session = LogSession::new(path, quiet)?;
    tree.print_dot_graph(session.dot_process_stdin.as_ref().unwrap());
    Ok(())
}

pub fn log_graphs(parser: &mut Parser, path: &str, open_log: bool) -> Result<LogSession> {
    let session = LogSession::new(path, open_log)?;
    parser.print_dot_graphs(session.dot_process_stdin.as_ref().unwrap());
    Ok(session)
}

impl LogSession {
    fn new(path: &str, open_log: bool) -> Result<Self> {
        use std::io::Write;

        let mut dot_file = std::fs::File::create(path)?;
        dot_file.write_all(HTML_HEADER)?;
        let mut dot_process = Command::new("dot")
            .arg("-Tsvg")
            .stdin(Stdio::piped())
            .stdout(dot_file)
            .spawn()
            .with_context(|| {
                "Failed to run the `dot` command. Check that graphviz is installed."
            })?;
        let dot_stdin = dot_process
            .stdin
            .take()
            .ok_or_else(|| anyhow!("Failed to open stdin for `dot` process."))?;
        Ok(Self {
            path: PathBuf::from(path),
            dot_process: Some(dot_process),
            dot_process_stdin: Some(dot_stdin),
            open_log,
        })
    }
}

impl Drop for LogSession {
    fn drop(&mut self) {
        use std::fs;

        drop(self.dot_process_stdin.take().unwrap());
        let output = self.dot_process.take().unwrap().wait_with_output().unwrap();
        if output.status.success() {
            if self.open_log && fs::metadata(&self.path).unwrap().len() > HTML_HEADER.len() as u64 {
                webbrowser::open(&self.path.to_string_lossy()).unwrap();
            }
        } else {
            eprintln!(
                "Dot failed: {} {}",
                String::from_utf8_lossy(&output.stdout),
                String::from_utf8_lossy(&output.stderr)
            );
        }
    }
}



================================================
FILE: crates/cli/src/version.rs
================================================
use std::{fs, path::PathBuf, process::Command};

use anyhow::{anyhow, Context, Result};
use regex::Regex;
use tree_sitter_loader::TreeSitterJSON;

pub struct Version {
    pub version: String,
    pub current_dir: PathBuf,
}

impl Version {
    #[must_use]
    pub const fn new(version: String, current_dir: PathBuf) -> Self {
        Self {
            version,
            current_dir,
        }
    }

    pub fn run(self) -> Result<()> {
        let tree_sitter_json = self.current_dir.join("tree-sitter.json");

        let tree_sitter_json =
            serde_json::from_str::<TreeSitterJSON>(&fs::read_to_string(tree_sitter_json)?)?;

        let is_multigrammar = tree_sitter_json.grammars.len() > 1;

        self.update_treesitter_json().with_context(|| {
            format!(
                "Failed to update tree-sitter.json at {}",
                self.current_dir.display()
            )
        })?;
        self.update_cargo_toml().with_context(|| {
            format!(
                "Failed to update Cargo.toml at {}",
                self.current_dir.display()
            )
        })?;
        self.update_package_json().with_context(|| {
            format!(
                "Failed to update package.json at {}",
                self.current_dir.display()
            )
        })?;
        self.update_makefile(is_multigrammar).with_context(|| {
            format!(
                "Failed to update Makefile at {}",
                self.current_dir.display()
            )
        })?;
        self.update_cmakelists_txt().with_context(|| {
            format!(
                "Failed to update CMakeLists.txt at {}",
                self.current_dir.display()
            )
        })?;
        self.update_pyproject_toml().with_context(|| {
            format!(
                "Failed to update pyproject.toml at {}",
                self.current_dir.display()
            )
        })?;

        Ok(())
    }

    fn update_treesitter_json(&self) -> Result<()> {
        let tree_sitter_json = &fs::read_to_string(self.current_dir.join("tree-sitter.json"))?;

        let tree_sitter_json = tree_sitter_json
            .lines()
            .map(|line| {
                if line.contains("\"version\":") {
                    let prefix_index = line.find("\"version\":").unwrap() + "\"version\":".len();
                    let start_quote = line[prefix_index..].find('"').unwrap() + prefix_index + 1;
                    let end_quote = line[start_quote + 1..].find('"').unwrap() + start_quote + 1;

                    format!(
                        "{}{}{}",
                        &line[..start_quote],
                        self.version,
                        &line[end_quote..]
                    )
                } else {
                    line.to_string()
                }
            })
            .collect::<Vec<_>>()
            .join("\n")
            + "\n";

        fs::write(self.current_dir.join("tree-sitter.json"), tree_sitter_json)?;

        Ok(())
    }

    fn update_cargo_toml(&self) -> Result<()> {
        if !self.current_dir.join("Cargo.toml").exists() {
            return Ok(());
        }

        let cargo_toml = fs::read_to_string(self.current_dir.join("Cargo.toml"))?;

        let cargo_toml = cargo_toml
            .lines()
            .map(|line| {
                if line.starts_with("version =") {
                    format!("version = \"{}\"", self.version)
                } else {
                    line.to_string()
                }
            })
            .collect::<Vec<_>>()
            .join("\n")
            + "\n";

        fs::write(self.current_dir.join("Cargo.toml"), cargo_toml)?;

        if self.current_dir.join("Cargo.lock").exists() {
            let Ok(cmd) = Command::new("cargo")
                .arg("generate-lockfile")
                .arg("--offline")
                .current_dir(&self.current_dir)
                .output()
            else {
                return Ok(()); // cargo is not `executable`, ignore
            };

            if !cmd.status.success() {
                let stderr = String::from_utf8_lossy(&cmd.stderr);
                return Err(anyhow!(
                    "Failed to run `cargo generate-lockfile`:\n{stderr}"
                ));
            }
        }

        Ok(())
    }

    fn update_package_json(&self) -> Result<()> {
        if !self.current_dir.join("package.json").exists() {
            return Ok(());
        }

        let package_json = &fs::read_to_string(self.current_dir.join("package.json"))?;

        let package_json = package_json
            .lines()
            .map(|line| {
                if line.contains("\"version\":") {
                    let prefix_index = line.find("\"version\":").unwrap() + "\"version\":".len();
                    let start_quote = line[prefix_index..].find('"').unwrap() + prefix_index + 1;
                    let end_quote = line[start_quote + 1..].find('"').unwrap() + start_quote + 1;

                    format!(
                        "{}{}{}",
                        &line[..start_quote],
                        self.version,
                        &line[end_quote..]
                    )
                } else {
                    line.to_string()
                }
            })
            .collect::<Vec<_>>()
            .join("\n")
            + "\n";

        fs::write(self.current_dir.join("package.json"), package_json)?;

        if self.current_dir.join("package-lock.json").exists() {
            let Ok(cmd) = Command::new("npm")
                .arg("install")
                .arg("--package-lock-only")
                .current_dir(&self.current_dir)
                .output()
            else {
                return Ok(()); // npm is not `executable`, ignore
            };

            if !cmd.status.success() {
                let stderr = String::from_utf8_lossy(&cmd.stderr);
                return Err(anyhow!("Failed to run `npm install`:\n{stderr}"));
            }
        }

        Ok(())
    }

    fn update_makefile(&self, is_multigrammar: bool) -> Result<()> {
        let makefile = if is_multigrammar {
            if !self.current_dir.join("common").join("common.mak").exists() {
                return Ok(());
            }

            fs::read_to_string(self.current_dir.join("Makefile"))?
        } else {
            if !self.current_dir.join("Makefile").exists() {
                return Ok(());
            }

            fs::read_to_string(self.current_dir.join("Makefile"))?
        };

        let makefile = makefile
            .lines()
            .map(|line| {
                if line.starts_with("VERSION") {
                    format!("VERSION := {}", self.version)
                } else {
                    line.to_string()
                }
            })
            .collect::<Vec<_>>()
            .join("\n")
            + "\n";

        fs::write(self.current_dir.join("Makefile"), makefile)?;

        Ok(())
    }

    fn update_cmakelists_txt(&self) -> Result<()> {
        if !self.current_dir.join("CMakeLists.txt").exists() {
            return Ok(());
        }

        let cmake = fs::read_to_string(self.current_dir.join("CMakeLists.txt"))?;

        let re = Regex::new(r#"(\s*VERSION\s+)"[0-9]+\.[0-9]+\.[0-9]+""#)?;
        let cmake = re.replace(&cmake, format!(r#"$1"{}""#, self.version));

        fs::write(self.current_dir.join("CMakeLists.txt"), cmake.as_bytes())?;

        Ok(())
    }

    fn update_pyproject_toml(&self) -> Result<()> {
        if !self.current_dir.join("pyproject.toml").exists() {
            return Ok(());
        }

        let pyproject_toml = fs::read_to_string(self.current_dir.join("pyproject.toml"))?;

        let pyproject_toml = pyproject_toml
            .lines()
            .map(|line| {
                if line.starts_with("version =") {
                    format!("version = \"{}\"", self.version)
                } else {
                    line.to_string()
                }
            })
            .collect::<Vec<_>>()
            .join("\n")
            + "\n";

        fs::write(self.current_dir.join("pyproject.toml"), pyproject_toml)?;

        Ok(())
    }
}



================================================
FILE: crates/cli/src/wasm.rs
================================================
use std::{
    fs,
    path::{Path, PathBuf},
};

use anyhow::{anyhow, Context, Result};
use tree_sitter::wasm_stdlib_symbols;
use tree_sitter_generate::parse_grammar::GrammarJSON;
use tree_sitter_loader::Loader;
use wasmparser::Parser;

pub fn load_language_wasm_file(language_dir: &Path) -> Result<(String, Vec<u8>)> {
    let grammar_name = get_grammar_name(language_dir)
        .with_context(|| "Failed to get wasm filename")
        .unwrap();
    let wasm_filename = format!("tree-sitter-{grammar_name}.wasm");
    let contents = fs::read(language_dir.join(&wasm_filename)).with_context(|| {
        format!("Failed to read {wasm_filename}. Run `tree-sitter build --wasm` first.",)
    })?;
    Ok((grammar_name, contents))
}

pub fn get_grammar_name(language_dir: &Path) -> Result<String> {
    let src_dir = language_dir.join("src");
    let grammar_json_path = src_dir.join("grammar.json");
    let grammar_json = fs::read_to_string(&grammar_json_path).with_context(|| {
        format!(
            "Failed to read grammar file {}",
            grammar_json_path.display()
        )
    })?;
    let grammar: GrammarJSON = serde_json::from_str(&grammar_json).with_context(|| {
        format!(
            "Failed to parse grammar file {}",
            grammar_json_path.display()
        )
    })?;
    Ok(grammar.name)
}

pub fn compile_language_to_wasm(
    loader: &Loader,
    root_dir: Option<&Path>,
    language_dir: &Path,
    output_dir: &Path,
    output_file: Option<PathBuf>,
) -> Result<()> {
    let grammar_name = get_grammar_name(language_dir)?;
    let output_filename =
        output_file.unwrap_or_else(|| output_dir.join(format!("tree-sitter-{grammar_name}.wasm")));
    let src_path = language_dir.join("src");
    let scanner_path = loader.get_scanner_path(&src_path);
    loader.compile_parser_to_wasm(
        &grammar_name,
        root_dir,
        &src_path,
        scanner_path
            .as_ref()
            .and_then(|p| Some(Path::new(p.file_name()?))),
        &output_filename,
    )?;

    // Exit with an error if the external scanner uses symbols from the
    // C or C++ standard libraries that aren't available to wasm parsers.
    let stdlib_symbols = wasm_stdlib_symbols().collect::<Vec<_>>();
    let dylink_symbols = [
        "__indirect_function_table",
        "__memory_base",
        "__stack_pointer",
        "__table_base",
        "__table_base",
        "memory",
    ];
    let builtin_symbols = [
        "__assert_fail",
        "__cxa_atexit",
        "abort",
        "emscripten_notify_memory_growth",
        "tree_sitter_debug_message",
        "proc_exit",
    ];

    let mut missing_symbols = Vec::new();
    let wasm_bytes = fs::read(&output_filename)?;
    let parser = Parser::new(0);
    for payload in parser.parse_all(&wasm_bytes) {
        if let wasmparser::Payload::ImportSection(imports) = payload? {
            for import in imports {
                let import = import?.name;
                if !builtin_symbols.contains(&import)
                    && !stdlib_symbols.contains(&import)
                    && !dylink_symbols.contains(&import)
                {
                    missing_symbols.push(import);
                }
            }
        }
    }

    if !missing_symbols.is_empty() {
        Err(anyhow!(
            concat!(
                "This external scanner uses a symbol that isn't available to wasm parsers.\n",
                "\n",
                "Missing symbols:\n",
                "    {}\n",
                "\n",
                "Available symbols:\n",
                "    {}",
            ),
            missing_symbols.join("\n    "),
            stdlib_symbols.join("\n    ")
        ))?;
    }

    Ok(())
}



================================================
FILE: crates/cli/src/fuzz/allocations.rs
================================================
use std::{
    collections::HashMap,
    os::raw::c_void,
    sync::{
        atomic::{AtomicBool, AtomicUsize, Ordering::SeqCst},
        Mutex,
    },
};

#[ctor::ctor]
unsafe fn initialize_allocation_recording() {
    tree_sitter::set_allocator(
        Some(ts_record_malloc),
        Some(ts_record_calloc),
        Some(ts_record_realloc),
        Some(ts_record_free),
    );
}

#[derive(Debug, PartialEq, Eq, Hash)]
struct Allocation(*const c_void);
unsafe impl Send for Allocation {}
unsafe impl Sync for Allocation {}

#[derive(Default)]
struct AllocationRecorder {
    enabled: AtomicBool,
    allocation_count: AtomicUsize,
    outstanding_allocations: Mutex<HashMap<Allocation, usize>>,
}

thread_local! {
    static RECORDER: AllocationRecorder = AllocationRecorder::default();
}

extern "C" {
    fn malloc(size: usize) -> *mut c_void;
    fn calloc(count: usize, size: usize) -> *mut c_void;
    fn realloc(ptr: *mut c_void, size: usize) -> *mut c_void;
    fn free(ptr: *mut c_void);
}

pub fn record<T>(f: impl FnOnce() -> T) -> Result<T, String> {
    RECORDER.with(|recorder| {
        recorder.enabled.store(true, SeqCst);
        recorder.allocation_count.store(0, SeqCst);
        recorder.outstanding_allocations.lock().unwrap().clear();
    });

    let value = f();

    let outstanding_allocation_indices = RECORDER.with(|recorder| {
        recorder.enabled.store(false, SeqCst);
        recorder.allocation_count.store(0, SeqCst);
        recorder
            .outstanding_allocations
            .lock()
            .unwrap()
            .drain()
            .map(|e| e.1)
            .collect::<Vec<_>>()
    });
    if !outstanding_allocation_indices.is_empty() {
        return Err(format!(
            "Leaked allocation indices: {outstanding_allocation_indices:?}",
        ));
    }
    Ok(value)
}

fn record_alloc(ptr: *mut c_void) {
    RECORDER.with(|recorder| {
        if recorder.enabled.load(SeqCst) {
            let count = recorder.allocation_count.fetch_add(1, SeqCst);
            recorder
                .outstanding_allocations
                .lock()
                .unwrap()
                .insert(Allocation(ptr), count);
        }
    });
}

fn record_dealloc(ptr: *mut c_void) {
    RECORDER.with(|recorder| {
        if recorder.enabled.load(SeqCst) {
            recorder
                .outstanding_allocations
                .lock()
                .unwrap()
                .remove(&Allocation(ptr));
        }
    });
}

unsafe extern "C" fn ts_record_malloc(size: usize) -> *mut c_void {
    let result = malloc(size);
    record_alloc(result);
    result
}

unsafe extern "C" fn ts_record_calloc(count: usize, size: usize) -> *mut c_void {
    let result = calloc(count, size);
    record_alloc(result);
    result
}

unsafe extern "C" fn ts_record_realloc(ptr: *mut c_void, size: usize) -> *mut c_void {
    let result = realloc(ptr, size);
    if ptr.is_null() {
        record_alloc(result);
    } else if !core::ptr::eq(ptr, result) {
        record_dealloc(ptr);
        record_alloc(result);
    }
    result
}

unsafe extern "C" fn ts_record_free(ptr: *mut c_void) {
    record_dealloc(ptr);
    free(ptr);
}



================================================
FILE: crates/cli/src/fuzz/corpus_test.rs
================================================
use tree_sitter::{LogType, Node, Parser, Point, Range, Tree};

use super::{scope_sequence::ScopeSequence, LOG_ENABLED, LOG_GRAPH_ENABLED};
use crate::util;

pub fn check_consistent_sizes(tree: &Tree, input: &[u8]) {
    fn check(node: Node, line_offsets: &[usize]) {
        let start_byte = node.start_byte();
        let end_byte = node.end_byte();
        let start_point = node.start_position();
        let end_point = node.end_position();

        assert!(start_byte <= end_byte);
        assert!(start_point <= end_point);
        assert_eq!(
            start_byte,
            line_offsets[start_point.row] + start_point.column
        );
        assert_eq!(end_byte, line_offsets[end_point.row] + end_point.column);

        let mut last_child_end_byte = start_byte;
        let mut last_child_end_point = start_point;
        let mut some_child_has_changes = false;
        let mut actual_named_child_count = 0;
        for i in 0..node.child_count() {
            let child = node.child(i as u32).unwrap();
            assert!(child.start_byte() >= last_child_end_byte);
            assert!(child.start_position() >= last_child_end_point);
            check(child, line_offsets);
            if child.has_changes() {
                some_child_has_changes = true;
            }
            if child.is_named() {
                actual_named_child_count += 1;
            }
            last_child_end_byte = child.end_byte();
            last_child_end_point = child.end_position();
        }

        assert_eq!(actual_named_child_count, node.named_child_count());

        if node.child_count() > 0 {
            assert!(end_byte >= last_child_end_byte);
            assert!(end_point >= last_child_end_point);
        }

        if some_child_has_changes {
            assert!(node.has_changes());
        }
    }

    let mut line_offsets = vec![0];
    for (i, c) in input.iter().enumerate() {
        if *c == b'\n' {
            line_offsets.push(i + 1);
        }
    }

    check(tree.root_node(), &line_offsets);
}

pub fn check_changed_ranges(old_tree: &Tree, new_tree: &Tree, input: &[u8]) -> Result<(), String> {
    let changed_ranges = old_tree.changed_ranges(new_tree).collect::<Vec<_>>();
    let old_scope_sequence = ScopeSequence::new(old_tree);
    let new_scope_sequence = ScopeSequence::new(new_tree);

    let old_range = old_tree.root_node().range();
    let new_range = new_tree.root_node().range();

    let byte_range =
        old_range.start_byte.min(new_range.start_byte)..old_range.end_byte.max(new_range.end_byte);
    let point_range = old_range.start_point.min(new_range.start_point)
        ..old_range.end_point.max(new_range.end_point);

    for range in &changed_ranges {
        if range.end_byte > byte_range.end || range.end_point > point_range.end {
            return Err(format!(
                "changed range extends outside of the old and new trees {range:?}",
            ));
        }
    }

    old_scope_sequence.check_changes(&new_scope_sequence, input, &changed_ranges)
}

pub fn set_included_ranges(parser: &mut Parser, input: &[u8], delimiters: Option<(&str, &str)>) {
    if let Some((start, end)) = delimiters {
        let mut ranges = Vec::new();
        let mut ix = 0;
        while ix < input.len() {
            let Some(mut start_ix) = input[ix..]
                .windows(2)
                .position(|win| win == start.as_bytes())
            else {
                break;
            };
            start_ix += ix + start.len();
            let end_ix = input[start_ix..]
                .windows(2)
                .position(|win| win == end.as_bytes())
                .map_or(input.len(), |ix| start_ix + ix);
            ix = end_ix;
            ranges.push(Range {
                start_byte: start_ix,
                end_byte: end_ix,
                start_point: point_for_offset(input, start_ix),
                end_point: point_for_offset(input, end_ix),
            });
        }

        parser.set_included_ranges(&ranges).unwrap();
    } else {
        parser.set_included_ranges(&[]).unwrap();
    }
}

fn point_for_offset(text: &[u8], offset: usize) -> Point {
    let mut point = Point::default();
    for byte in &text[..offset] {
        if *byte == b'\n' {
            point.row += 1;
            point.column = 0;
        } else {
            point.column += 1;
        }
    }
    point
}

pub fn get_parser(session: &mut Option<util::LogSession>, log_filename: &str) -> Parser {
    let mut parser = Parser::new();

    if *LOG_ENABLED {
        parser.set_logger(Some(Box::new(|log_type, msg| {
            if log_type == LogType::Lex {
                eprintln!("  {msg}");
            } else {
                eprintln!("{msg}");
            }
        })));
    }
    if *LOG_GRAPH_ENABLED {
        *session = Some(util::log_graphs(&mut parser, log_filename, false).unwrap());
    }

    parser
}



================================================
FILE: crates/cli/src/fuzz/edits.rs
================================================
use super::random::Rand;

#[derive(Debug)]
pub struct Edit {
    pub position: usize,
    pub deleted_length: usize,
    pub inserted_text: Vec<u8>,
}

#[must_use]
pub fn invert_edit(input: &[u8], edit: &Edit) -> Edit {
    let position = edit.position;
    let removed_content = &input[position..(position + edit.deleted_length)];
    Edit {
        position,
        deleted_length: edit.inserted_text.len(),
        inserted_text: removed_content.to_vec(),
    }
}

pub fn get_random_edit(rand: &mut Rand, input: &[u8]) -> Edit {
    let choice = rand.unsigned(10);
    if choice < 2 {
        // Insert text at end
        let inserted_text = rand.words(3);
        Edit {
            position: input.len(),
            deleted_length: 0,
            inserted_text,
        }
    } else if choice < 5 {
        // Delete text from the end
        let deleted_length = rand.unsigned(30).min(input.len());
        Edit {
            position: input.len() - deleted_length,
            deleted_length,
            inserted_text: vec![],
        }
    } else if choice < 8 {
        // Insert at a random position
        let position = rand.unsigned(input.len());
        let word_count = 1 + rand.unsigned(3);
        let inserted_text = rand.words(word_count);
        Edit {
            position,
            deleted_length: 0,
            inserted_text,
        }
    } else {
        // Replace at random position
        let position = rand.unsigned(input.len());
        let deleted_length = rand.unsigned(input.len() - position);
        let word_count = 1 + rand.unsigned(3);
        let inserted_text = rand.words(word_count);
        Edit {
            position,
            deleted_length,
            inserted_text,
        }
    }
}



================================================
FILE: crates/cli/src/fuzz/random.rs
================================================
use rand::{
    distributions::Alphanumeric,
    prelude::{Rng, SeedableRng, StdRng},
};

const OPERATORS: &[char] = &[
    '+', '-', '<', '>', '(', ')', '*', '/', '&', '|', '!', ',', '.', '%',
];

pub struct Rand(StdRng);

impl Rand {
    #[must_use]
    pub fn new(seed: usize) -> Self {
        Self(StdRng::seed_from_u64(seed as u64))
    }

    pub fn unsigned(&mut self, max: usize) -> usize {
        self.0.gen_range(0..=max)
    }

    pub fn words(&mut self, max_count: usize) -> Vec<u8> {
        let word_count = self.unsigned(max_count);
        let mut result = Vec::with_capacity(2 * word_count);
        for i in 0..word_count {
            if i > 0 {
                if self.unsigned(5) == 0 {
                    result.push(b'\n');
                } else {
                    result.push(b' ');
                }
            }
            if self.unsigned(3) == 0 {
                let index = self.unsigned(OPERATORS.len() - 1);
                result.push(OPERATORS[index] as u8);
            } else {
                for _ in 0..self.unsigned(8) {
                    result.push(self.0.sample(Alphanumeric));
                }
            }
        }
        result
    }
}



================================================
FILE: crates/cli/src/fuzz/scope_sequence.rs
================================================
use tree_sitter::{Point, Range, Tree};

#[derive(Debug)]
pub struct ScopeSequence(Vec<ScopeStack>);

type ScopeStack = Vec<&'static str>;

impl ScopeSequence {
    #[must_use]
    pub fn new(tree: &Tree) -> Self {
        let mut result = Self(Vec::new());
        let mut scope_stack = Vec::new();

        let mut cursor = tree.walk();
        let mut visited_children = false;
        loop {
            let node = cursor.node();
            for _ in result.0.len()..node.start_byte() {
                result.0.push(scope_stack.clone());
            }
            if visited_children {
                for _ in result.0.len()..node.end_byte() {
                    result.0.push(scope_stack.clone());
                }
                scope_stack.pop();
                if cursor.goto_next_sibling() {
                    visited_children = false;
                } else if !cursor.goto_parent() {
                    break;
                }
            } else {
                scope_stack.push(cursor.node().kind());
                if !cursor.goto_first_child() {
                    visited_children = true;
                }
            }
        }

        result
    }

    pub fn check_changes(
        &self,
        other: &Self,
        text: &[u8],
        known_changed_ranges: &[Range],
    ) -> Result<(), String> {
        let mut position = Point { row: 0, column: 0 };
        for i in 0..(self.0.len().max(other.0.len())) {
            let stack = &self.0.get(i);
            let other_stack = &other.0.get(i);
            if *stack != *other_stack && ![b'\r', b'\n'].contains(&text[i]) {
                let containing_range = known_changed_ranges
                    .iter()
                    .find(|range| range.start_point <= position && position < range.end_point);
                if containing_range.is_none() {
                    let line = &text[(i - position.column)..]
                        .split(|c| *c == b'\n')
                        .next()
                        .unwrap();
                    return Err(format!(
                        concat!(
                            "Position: {}\n",
                            "Byte offset: {}\n",
                            "Line: {}\n",
                            "{}^\n",
                            "Old scopes: {:?}\n",
                            "New scopes: {:?}\n",
                            "Invalidated ranges: {:?}",
                        ),
                        position,
                        i,
                        String::from_utf8_lossy(line),
                        String::from(" ").repeat(position.column + "Line: ".len()),
                        stack,
                        other_stack,
                        known_changed_ranges,
                    ));
                }
            }

            if text[i] == b'\n' {
                position.row += 1;
                position.column = 0;
            } else {
                position.column += 1;
            }
        }
        Ok(())
    }
}



================================================
FILE: crates/cli/src/templates/__init__.py
================================================
"""PARSER_DESCRIPTION"""

from importlib.resources import files as _files

from ._binding import language


def _get_query(name, file):
    query = _files(f"{__package__}.queries") / file
    globals()[name] = query.read_text()
    return globals()[name]


def __getattr__(name):
    # NOTE: uncomment these to include any queries that this grammar contains:

    # if name == "HIGHLIGHTS_QUERY":
    #     return _get_query("HIGHLIGHTS_QUERY", "highlights.scm")
    # if name == "INJECTIONS_QUERY":
    #     return _get_query("INJECTIONS_QUERY", "injections.scm")
    # if name == "LOCALS_QUERY":
    #     return _get_query("LOCALS_QUERY", "locals.scm")
    # if name == "TAGS_QUERY":
    #     return _get_query("TAGS_QUERY", "tags.scm")

    raise AttributeError(f"module {__name__!r} has no attribute {name!r}")


__all__ = [
    "language",
    # "HIGHLIGHTS_QUERY",
    # "INJECTIONS_QUERY",
    # "LOCALS_QUERY",
    # "TAGS_QUERY",
]


def __dir__():
    return sorted(__all__ + [
        "__all__", "__builtins__", "__cached__", "__doc__", "__file__",
        "__loader__", "__name__", "__package__", "__path__", "__spec__",
    ])



================================================
FILE: crates/cli/src/templates/__init__.pyi
================================================
from typing import Final

# NOTE: uncomment these to include any queries that this grammar contains:

# HIGHLIGHTS_QUERY: Final[str]
# INJECTIONS_QUERY: Final[str]
# LOCALS_QUERY: Final[str]
# TAGS_QUERY: Final[str]

def language() -> object: ...



================================================
FILE: crates/cli/src/templates/_cargo.toml
================================================
[package]
name = "tree-sitter-PARSER_NAME"
description = "PARSER_DESCRIPTION"
version = "PARSER_VERSION"
authors = ["PARSER_AUTHOR_NAME PARSER_AUTHOR_EMAIL"]
license = "PARSER_LICENSE"
readme = "README.md"
keywords = ["incremental", "parsing", "tree-sitter", "PARSER_NAME"]
categories = ["parser-implementations", "parsing", "text-editors"]
repository = "PARSER_URL"
edition = "2021"
autoexamples = false

build = "bindings/rust/build.rs"
include = [
  "bindings/rust/*",
  "grammar.js",
  "queries/*",
  "src/*",
  "tree-sitter.json",
  "/LICENSE",
]

[lib]
path = "bindings/rust/lib.rs"

[dependencies]
tree-sitter-language = "0.1"

[build-dependencies]
cc = "1.2"

[dev-dependencies]
tree-sitter = "RUST_BINDING_VERSION"



================================================
FILE: crates/cli/src/templates/binding.go
================================================
package tree_sitter_LOWER_PARSER_NAME

// #cgo CFLAGS: -std=c11 -fPIC
// #include "../../src/parser.c"
// #if __has_include("../../src/scanner.c")
// #include "../../src/scanner.c"
// #endif
import "C"

import "unsafe"

// Get the tree-sitter Language for this grammar.
func Language() unsafe.Pointer {
	return unsafe.Pointer(C.tree_sitter_LOWER_PARSER_NAME())
}



================================================
FILE: crates/cli/src/templates/binding.gyp
================================================
{
  "targets": [
    {
      "target_name": "tree_sitter_PARSER_NAME_binding",
      "dependencies": [
        "<!(node -p \"require('node-addon-api').targets\"):node_addon_api_except",
      ],
      "include_dirs": [
        "src",
      ],
      "sources": [
        "bindings/node/binding.cc",
        "src/parser.c",
      ],
      "variables": {
        "has_scanner": "<!(node -p \"fs.existsSync('src/scanner.c')\")"
      },
      "conditions": [
        ["has_scanner=='true'", {
          "sources+": ["src/scanner.c"],
        }],
        ["OS!='win'", {
          "cflags_c": [
            "-std=c11",
          ],
        }, { # OS == "win"
          "cflags_c": [
            "/std:c11",
            "/utf-8",
          ],
        }],
      ],
    }
  ]
}



================================================
FILE: crates/cli/src/templates/binding_test.go
================================================
package tree_sitter_LOWER_PARSER_NAME_test

import (
	"testing"

	tree_sitter "github.com/tree-sitter/go-tree-sitter"
	tree_sitter_LOWER_PARSER_NAME "PARSER_URL_STRIPPED/bindings/go"
)

func TestCanLoadGrammar(t *testing.T) {
	language := tree_sitter.NewLanguage(tree_sitter_LOWER_PARSER_NAME.Language())
	if language == nil {
		t.Errorf("Error loading TITLE_PARSER_NAME grammar")
	}
}



================================================
FILE: crates/cli/src/templates/binding_test.js
================================================
const assert = require("node:assert");
const { test } = require("node:test");

const Parser = require("tree-sitter");

test("can load grammar", () => {
  const parser = new Parser();
  assert.doesNotThrow(() => parser.setLanguage(require(".")));
});



================================================
FILE: crates/cli/src/templates/build.rs
================================================
fn main() {
    let src_dir = std::path::Path::new("src");

    let mut c_config = cc::Build::new();
    c_config.std("c11").include(src_dir);

    #[cfg(target_env = "msvc")]
    c_config.flag("-utf-8");

    let parser_path = src_dir.join("parser.c");
    c_config.file(&parser_path);
    println!("cargo:rerun-if-changed={}", parser_path.to_str().unwrap());

    let scanner_path = src_dir.join("scanner.c");
    if scanner_path.exists() {
        c_config.file(&scanner_path);
        println!("cargo:rerun-if-changed={}", scanner_path.to_str().unwrap());
    }

    c_config.compile("tree-sitter-KEBAB_PARSER_NAME");
}



================================================
FILE: crates/cli/src/templates/build.zig
================================================
const std = @import("std");

pub fn build(b: *std.Build) !void {
    const target = b.standardTargetOptions(.{});
    const optimize = b.standardOptimizeOption(.{});

    const shared = b.option(bool, "build-shared", "Build a shared library") orelse true;
    const reuse_alloc = b.option(bool, "reuse-allocator", "Reuse the library allocator") orelse false;

    const lib: *std.Build.Step.Compile = if (shared) b.addSharedLibrary(.{
        .name = "tree-sitter-PARSER_NAME",
        .pic = true,
        .target = target,
        .optimize = optimize,
        .link_libc = true,
    }) else b.addStaticLibrary(.{
        .name = "tree-sitter-PARSER_NAME",
        .target = target,
        .optimize = optimize,
        .link_libc = true,
    });

    lib.addCSourceFile(.{
        .file = b.path("src/parser.c"),
        .flags = &.{"-std=c11"},
    });
    if (hasScanner(b.build_root.handle)) {
        lib.addCSourceFile(.{
            .file = b.path("src/scanner.c"),
            .flags = &.{"-std=c11"},
        });
    }

    if (reuse_alloc) {
        lib.root_module.addCMacro("TREE_SITTER_REUSE_ALLOCATOR", "");
    }
    if (optimize == .Debug) {
        lib.root_module.addCMacro("TREE_SITTER_DEBUG", "");
    }

    lib.addIncludePath(b.path("src"));

    b.installArtifact(lib);
    b.installFile("src/node-types.json", "node-types.json");
    b.installDirectory(.{ .source_dir = b.path("queries"), .install_dir = .prefix, .install_subdir = "queries", .include_extensions = &.{"scm"} });

    const module = b.addModule("tree-sitter-PARSER_NAME", .{
        .root_source_file = b.path("bindings/zig/root.zig"),
        .target = target,
        .optimize = optimize,
    });
    module.linkLibrary(lib);

    const ts_dep = b.dependency("tree-sitter", .{});
    const ts_mod = ts_dep.module("tree-sitter");
    module.addImport("tree-sitter", ts_mod);

    // ╭─────────────────╮
    // │      Tests      │
    // ╰─────────────────╯

    const tests = b.addTest(.{
        .root_source_file = b.path("bindings/zig/root.zig"),
        .target = target,
        .optimize = optimize,
    });
    tests.linkLibrary(lib);
    tests.root_module.addImport("tree-sitter", ts_mod);

    const run_tests = b.addRunArtifact(tests);

    const test_step = b.step("test", "Run unit tests");
    test_step.dependOn(&run_tests.step);
}

inline fn hasScanner(dir: std.fs.Dir) bool {
    dir.access("src/scanner.c", .{}) catch return false;
    return true;
}



================================================
FILE: crates/cli/src/templates/build.zig.zon
================================================
.{
    .name = "tree-sitter-PARSER_NAME",
    .version = "PARSER_VERSION",
    .dependencies = .{ .@"tree-sitter" = .{
        .url = "https://github.com/tree-sitter/zig-tree-sitter/archive/refs/tags/v0.25.0.tar.gz",
        .hash = "12201a8d5e840678bbbf5128e605519c4024af422295d68e2ba2090e675328e5811d",
    } },
    .paths = .{
        "build.zig",
        "build.zig.zon",
        "bindings/zig",
        "src",
        "queries",
        "LICENSE",
        "README.md",
    },
}



================================================
FILE: crates/cli/src/templates/cmakelists.cmake
================================================
cmake_minimum_required(VERSION 3.13)

project(tree-sitter-KEBAB_PARSER_NAME
        VERSION "PARSER_VERSION"
        DESCRIPTION "PARSER_DESCRIPTION"
        HOMEPAGE_URL "PARSER_URL"
        LANGUAGES C)

option(BUILD_SHARED_LIBS "Build using shared libraries" ON)
option(TREE_SITTER_REUSE_ALLOCATOR "Reuse the library allocator" OFF)

set(TREE_SITTER_ABI_VERSION ABI_VERSION_MAX CACHE STRING "Tree-sitter ABI version")
if(NOT ${TREE_SITTER_ABI_VERSION} MATCHES "^[0-9]+$")
    unset(TREE_SITTER_ABI_VERSION CACHE)
    message(FATAL_ERROR "TREE_SITTER_ABI_VERSION must be an integer")
endif()

include(GNUInstallDirs)

find_program(TREE_SITTER_CLI tree-sitter DOC "Tree-sitter CLI")

add_custom_command(OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/src/grammar.json"
                   DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/grammar.js"
                   COMMAND "${TREE_SITTER_CLI}" generate grammar.js
                            --stage=json
                   WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
                   COMMENT "Generating grammar.json")

add_custom_command(OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/src/parser.c"
                   DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/src/grammar.json"
                   COMMAND "${TREE_SITTER_CLI}" generate src/grammar.json
                            --stage=parser --abi=${TREE_SITTER_ABI_VERSION}
                   WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
                   COMMENT "Generating parser.c")

add_library(tree-sitter-KEBAB_PARSER_NAME src/parser.c)
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/src/scanner.c)
  target_sources(tree-sitter-KEBAB_PARSER_NAME PRIVATE src/scanner.c)
endif()
target_include_directories(tree-sitter-KEBAB_PARSER_NAME
                           PRIVATE src
                           INTERFACE $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/bindings/c>
                                     $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>)

target_compile_definitions(tree-sitter-KEBAB_PARSER_NAME PRIVATE
                           $<$<BOOL:${TREE_SITTER_REUSE_ALLOCATOR}>:TREE_SITTER_REUSE_ALLOCATOR>
                           $<$<CONFIG:Debug>:TREE_SITTER_DEBUG>)

set_target_properties(tree-sitter-KEBAB_PARSER_NAME
                      PROPERTIES
                      C_STANDARD 11
                      POSITION_INDEPENDENT_CODE ON
                      SOVERSION "${TREE_SITTER_ABI_VERSION}.${PROJECT_VERSION_MAJOR}"
                      DEFINE_SYMBOL "")

configure_file(bindings/c/tree-sitter-KEBAB_PARSER_NAME.pc.in
               "${CMAKE_CURRENT_BINARY_DIR}/tree-sitter-KEBAB_PARSER_NAME.pc" @ONLY)

install(DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/bindings/c/tree_sitter"
        DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}"
        FILES_MATCHING PATTERN "*.h")
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/tree-sitter-KEBAB_PARSER_NAME.pc"
        DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig")
install(TARGETS tree-sitter-KEBAB_PARSER_NAME
        LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}")

file(GLOB QUERIES queries/*.scm)
install(FILES ${QUERIES}
        DESTINATION "${CMAKE_INSTALL_DATADIR}/tree-sitter/queries/KEBAB_PARSER_NAME")

add_custom_target(ts-test "${TREE_SITTER_CLI}" test
                  WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
                  COMMENT "tree-sitter test")



================================================
FILE: crates/cli/src/templates/gitattributes
================================================
* text=auto eol=lf

# Generated source files
src/*.json linguist-generated
src/parser.c linguist-generated
src/tree_sitter/* linguist-generated

# C bindings
bindings/c/** linguist-generated
CMakeLists.txt linguist-generated
Makefile linguist-generated

# Rust bindings
bindings/rust/* linguist-generated
Cargo.toml linguist-generated
Cargo.lock linguist-generated

# Node.js bindings
bindings/node/* linguist-generated
binding.gyp linguist-generated
package.json linguist-generated
package-lock.json linguist-generated

# Python bindings
bindings/python/** linguist-generated
setup.py linguist-generated
pyproject.toml linguist-generated

# Go bindings
bindings/go/* linguist-generated
go.mod linguist-generated
go.sum linguist-generated

# Swift bindings
bindings/swift/** linguist-generated
Package.swift linguist-generated
Package.resolved linguist-generated

# Zig bindings
bindings/zig/* linguist-generated
build.zig linguist-generated
build.zig.zon linguist-generated



================================================
FILE: crates/cli/src/templates/gitignore
================================================
# Rust artifacts
target/
Cargo.lock

# Node artifacts
build/
prebuilds/
node_modules/
package-lock.json

# Swift artifacts
.build/
Package.resolved

# Go artifacts
_obj/

# Python artifacts
.venv/
dist/
*.egg-info
*.whl

# C artifacts
*.a
*.so
*.so.*
*.dylib
*.dll
*.pc
*.exp
*.lib

# Zig artifacts
.zig-cache/
zig-cache/
zig-out/

# Example dirs
/examples/*/

# Grammar volatiles
*.wasm
*.obj
*.o

# Archives
*.tar.gz
*.tgz
*.zip



================================================
FILE: crates/cli/src/templates/go.mod
================================================
module PARSER_URL_STRIPPED

go 1.22

require github.com/tree-sitter/go-tree-sitter v0.24.0



================================================
FILE: crates/cli/src/templates/grammar.js
================================================
/**
 * @file PARSER_DESCRIPTION
 * @author PARSER_AUTHOR_NAME PARSER_AUTHOR_EMAIL
 * @license PARSER_LICENSE
 */

/// <reference types="tree-sitter-cli/dsl" />
// @ts-check

module.exports = grammar({
  name: "LOWER_PARSER_NAME",

  rules: {
    // TODO: add the actual grammar rules
    source_file: $ => "hello"
  }
});



================================================
FILE: crates/cli/src/templates/index.d.ts
================================================
type BaseNode = {
  type: string;
  named: boolean;
};

type ChildNode = {
  multiple: boolean;
  required: boolean;
  types: BaseNode[];
};

type NodeInfo =
  | (BaseNode & {
      subtypes: BaseNode[];
    })
  | (BaseNode & {
      fields: { [name: string]: ChildNode };
      children: ChildNode[];
    });

type Language = {
  language: unknown;
  nodeTypeInfo: NodeInfo[];
};

declare const language: Language;
export = language;



================================================
FILE: crates/cli/src/templates/index.js
================================================
const root = require("path").join(__dirname, "..", "..");

module.exports =
  typeof process.versions.bun === "string"
    // Support `bun build --compile` by being statically analyzable enough to find the .node file at build-time
    ? require(`../../prebuilds/${process.platform}-${process.arch}/tree-sitter-KEBAB_PARSER_NAME.node`)
    : require("node-gyp-build")(root);

try {
  module.exports.nodeTypeInfo = require("../../src/node-types.json");
} catch (_) {}



================================================
FILE: crates/cli/src/templates/js-binding.cc
================================================
#include <napi.h>

typedef struct TSLanguage TSLanguage;

extern "C" TSLanguage *tree_sitter_PARSER_NAME();

// "tree-sitter", "language" hashed with BLAKE2
const napi_type_tag LANGUAGE_TYPE_TAG = {
    0x8AF2E5212AD58ABF, 0xD5006CAD83ABBA16
};

Napi::Object Init(Napi::Env env, Napi::Object exports) {
    auto language = Napi::External<TSLanguage>::New(env, tree_sitter_PARSER_NAME());
    language.TypeTag(&LANGUAGE_TYPE_TAG);
    exports["language"] = language;
    return exports;
}

NODE_API_MODULE(tree_sitter_PARSER_NAME_binding, Init)



================================================
FILE: crates/cli/src/templates/lib.rs
================================================
//! This crate provides TITLE_PARSER_NAME language support for the [tree-sitter] parsing library.
//!
//! Typically, you will use the [`LANGUAGE`] constant to add this language to a
//! tree-sitter [`Parser`], and then use the parser to parse some code:
//!
//! ```
//! let code = r#"
//! "#;
//! let mut parser = tree_sitter::Parser::new();
//! let language = tree_sitter_PARSER_NAME::LANGUAGE;
//! parser
//!     .set_language(&language.into())
//!     .expect("Error loading TITLE_PARSER_NAME parser");
//! let tree = parser.parse(code, None).unwrap();
//! assert!(!tree.root_node().has_error());
//! ```
//!
//! [`Parser`]: https://docs.rs/tree-sitter/RUST_BINDING_VERSION/tree_sitter/struct.Parser.html
//! [tree-sitter]: https://tree-sitter.github.io/

use tree_sitter_language::LanguageFn;

extern "C" {
    fn tree_sitter_PARSER_NAME() -> *const ();
}

/// The tree-sitter [`LanguageFn`] for this grammar.
pub const LANGUAGE: LanguageFn = unsafe { LanguageFn::from_raw(tree_sitter_PARSER_NAME) };

/// The content of the [`node-types.json`] file for this grammar.
///
/// [`node-types.json`]: https://tree-sitter.github.io/tree-sitter/using-parsers/6-static-node-types
pub const NODE_TYPES: &str = include_str!("../../src/node-types.json");

// NOTE: uncomment these to include any queries that this grammar contains:

// pub const HIGHLIGHTS_QUERY: &str = include_str!("../../queries/highlights.scm");
// pub const INJECTIONS_QUERY: &str = include_str!("../../queries/injections.scm");
// pub const LOCALS_QUERY: &str = include_str!("../../queries/locals.scm");
// pub const TAGS_QUERY: &str = include_str!("../../queries/tags.scm");

#[cfg(test)]
mod tests {
    #[test]
    fn test_can_load_grammar() {
        let mut parser = tree_sitter::Parser::new();
        parser
            .set_language(&super::LANGUAGE.into())
            .expect("Error loading TITLE_PARSER_NAME parser");
    }
}



================================================
FILE: crates/cli/src/templates/makefile
================================================
LANGUAGE_NAME := tree-sitter-KEBAB_PARSER_NAME
HOMEPAGE_URL := PARSER_URL
VERSION := PARSER_VERSION

# repository
SRC_DIR := src

TS ?= tree-sitter

# install directory layout
PREFIX ?= /usr/local
DATADIR ?= $(PREFIX)/share
INCLUDEDIR ?= $(PREFIX)/include
LIBDIR ?= $(PREFIX)/lib
BINDIR ?= $(PREFIX)/bin
PCLIBDIR ?= $(LIBDIR)/pkgconfig

# source/object files
PARSER := $(SRC_DIR)/parser.c
EXTRAS := $(filter-out $(PARSER),$(wildcard $(SRC_DIR)/*.c))
OBJS := $(patsubst %.c,%.o,$(PARSER) $(EXTRAS))

# flags
ARFLAGS ?= rcs
override CFLAGS += -I$(SRC_DIR) -std=c11 -fPIC

# ABI versioning
SONAME_MAJOR = $(shell sed -n 's/\#define LANGUAGE_VERSION //p' $(PARSER))
SONAME_MINOR = $(word 1,$(subst ., ,$(VERSION)))

# OS-specific bits
MACHINE := $(shell $(CC) -dumpmachine)

ifneq ($(findstring darwin,$(MACHINE)),)
	SOEXT = dylib
	SOEXTVER_MAJOR = $(SONAME_MAJOR).$(SOEXT)
	SOEXTVER = $(SONAME_MAJOR).$(SONAME_MINOR).$(SOEXT)
	LINKSHARED = -dynamiclib -Wl,-install_name,$(LIBDIR)/lib$(LANGUAGE_NAME).$(SOEXTVER),-rpath,@executable_path/../Frameworks
else ifneq ($(findstring mingw32,$(MACHINE)),)
	SOEXT = dll
	LINKSHARED += -s -shared -Wl,--out-implib,lib$(LANGUAGE_NAME).dll.a
else
	SOEXT = so
	SOEXTVER_MAJOR = $(SOEXT).$(SONAME_MAJOR)
	SOEXTVER = $(SOEXT).$(SONAME_MAJOR).$(SONAME_MINOR)
	LINKSHARED = -shared -Wl,-soname,lib$(LANGUAGE_NAME).$(SOEXTVER)
ifneq ($(filter $(shell uname),FreeBSD NetBSD DragonFly),)
	PCLIBDIR := $(PREFIX)/libdata/pkgconfig
endif
endif

all: lib$(LANGUAGE_NAME).a lib$(LANGUAGE_NAME).$(SOEXT) $(LANGUAGE_NAME).pc

lib$(LANGUAGE_NAME).a: $(OBJS)
	$(AR) $(ARFLAGS) $@ $^

lib$(LANGUAGE_NAME).$(SOEXT): $(OBJS)
	$(CC) $(LDFLAGS) $(LINKSHARED) $^ $(LDLIBS) -o $@
ifneq ($(STRIP),)
	$(STRIP) $@
endif

ifneq ($(findstring mingw32,$(MACHINE)),)
lib$(LANGUAGE_NAME).dll.a: lib$(LANGUAGE_NAME).$(SOEXT)
endif

$(LANGUAGE_NAME).pc: bindings/c/$(LANGUAGE_NAME).pc.in
	sed -e 's|@PROJECT_VERSION@|$(VERSION)|' \
		-e 's|@CMAKE_INSTALL_LIBDIR@|$(LIBDIR:$(PREFIX)/%=%)|' \
		-e 's|@CMAKE_INSTALL_INCLUDEDIR@|$(INCLUDEDIR:$(PREFIX)/%=%)|' \
		-e 's|@PROJECT_DESCRIPTION@|$(DESCRIPTION)|' \
		-e 's|@PROJECT_HOMEPAGE_URL@|$(HOMEPAGE_URL)|' \
		-e 's|@CMAKE_INSTALL_PREFIX@|$(PREFIX)|' $< > $@

$(SRC_DIR)/grammar.json: grammar.js
	$(TS) generate --stage=json $^

$(PARSER): $(SRC_DIR)/grammar.json
	$(TS) generate --stage=parser $^

install: all
	install -d '$(DESTDIR)$(DATADIR)'/tree-sitter/queries/KEBAB_PARSER_NAME '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter '$(DESTDIR)$(PCLIBDIR)' '$(DESTDIR)$(LIBDIR)'
	install -m644 bindings/c/tree_sitter/$(LANGUAGE_NAME).h '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter/$(LANGUAGE_NAME).h
	install -m644 $(LANGUAGE_NAME).pc '$(DESTDIR)$(PCLIBDIR)'/$(LANGUAGE_NAME).pc
	install -m644 lib$(LANGUAGE_NAME).a '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).a
	install -m755 lib$(LANGUAGE_NAME).$(SOEXT) '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXTVER)
ifneq ($(findstring mingw32,$(MACHINE)),)
	install -d '$(DESTDIR)$(BINDIR)'
	install -m755 lib$(LANGUAGE_NAME).dll '$(DESTDIR)$(BINDIR)'/lib$(LANGUAGE_NAME).dll
	install -m755 lib$(LANGUAGE_NAME).dll.a '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).dll.a
else
	install -m755 lib$(LANGUAGE_NAME).$(SOEXT) '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXTVER)
	cd '$(DESTDIR)$(LIBDIR)' && ln -sf lib$(LANGUAGE_NAME).$(SOEXTVER) lib$(LANGUAGE_NAME).$(SOEXTVER_MAJOR)
	cd '$(DESTDIR)$(LIBDIR)' && ln -sf lib$(LANGUAGE_NAME).$(SOEXTVER_MAJOR) lib$(LANGUAGE_NAME).$(SOEXT)
endif
ifneq ($(wildcard queries/*.scm),)
	install -m644 queries/*.scm '$(DESTDIR)$(DATADIR)'/tree-sitter/queries/KEBAB_PARSER_NAME
endif

uninstall:
	$(RM) '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).a \
		'$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXTVER) \
		'$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXTVER_MAJOR) \
		'$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXT) \
		'$(DESTDIR)$(INCLUDEDIR)'/tree_sitter/$(LANGUAGE_NAME).h \
		'$(DESTDIR)$(PCLIBDIR)'/$(LANGUAGE_NAME).pc
	$(RM) -r '$(DESTDIR)$(DATADIR)'/tree-sitter/queries/KEBAB_PARSER_NAME

clean:
	$(RM) $(OBJS) $(LANGUAGE_NAME).pc lib$(LANGUAGE_NAME).a lib$(LANGUAGE_NAME).$(SOEXT) lib$(LANGUAGE_NAME).lib

test:
	$(TS) test

.PHONY: all install uninstall clean test



================================================
FILE: crates/cli/src/templates/package.json
================================================
{
  "name": "tree-sitter-PARSER_NAME",
  "version": "PARSER_VERSION",
  "description": "PARSER_DESCRIPTION",
  "repository": "PARSER_URL",
  "funding": "FUNDING_URL",
  "license": "PARSER_LICENSE",
  "author": {
    "name": "PARSER_AUTHOR_NAME",
    "email": "PARSER_AUTHOR_EMAIL",
    "url": "PARSER_AUTHOR_URL"
  },
  "main": "bindings/node",
  "types": "bindings/node",
  "keywords": [
    "incremental",
    "parsing",
    "tree-sitter",
    "LOWER_PARSER_NAME"
  ],
  "files": [
    "grammar.js",
    "tree-sitter.json",
    "binding.gyp",
    "prebuilds/**",
    "bindings/node/*",
    "queries/*",
    "src/**",
    "*.wasm"
  ],
  "dependencies": {
    "node-addon-api": "^8.3.1",
    "node-gyp-build": "^4.8.4"
  },
  "devDependencies": {
    "prebuildify": "^6.0.1",
    "tree-sitter-cli": "^CLI_VERSION"
  },
  "peerDependencies": {
    "tree-sitter": "^0.22.4"
  },
  "peerDependenciesMeta": {
    "tree-sitter": {
      "optional": true
    }
  },
  "scripts": {
    "install": "node-gyp-build",
    "prestart": "tree-sitter build --wasm",
    "start": "tree-sitter playground",
    "test": "node --test bindings/node/*_test.js"
  }
}



================================================
FILE: crates/cli/src/templates/package.swift
================================================
// swift-tools-version:5.3

import Foundation
import PackageDescription

var sources = ["src/parser.c"]
if FileManager.default.fileExists(atPath: "src/scanner.c") {
    sources.append("src/scanner.c")
}

let package = Package(
    name: "PARSER_CLASS_NAME",
    products: [
        .library(name: "PARSER_CLASS_NAME", targets: ["PARSER_CLASS_NAME"]),
    ],
    dependencies: [
        .package(name: "SwiftTreeSitter", url: "https://github.com/tree-sitter/swift-tree-sitter", from: "0.9.0"),
    ],
    targets: [
        .target(
            name: "PARSER_CLASS_NAME",
            dependencies: [],
            path: ".",
            sources: sources,
            resources: [
                .copy("queries")
            ],
            publicHeadersPath: "bindings/swift",
            cSettings: [.headerSearchPath("src")]
        ),
        .testTarget(
            name: "PARSER_CLASS_NAMETests",
            dependencies: [
                "SwiftTreeSitter",
                "PARSER_CLASS_NAME",
            ],
            path: "bindings/swift/PARSER_CLASS_NAMETests"
        )
    ],
    cLanguageStandard: .c11
)



================================================
FILE: crates/cli/src/templates/PARSER_NAME.h
================================================
#ifndef TREE_SITTER_UPPER_PARSER_NAME_H_
#define TREE_SITTER_UPPER_PARSER_NAME_H_

typedef struct TSLanguage TSLanguage;

#ifdef __cplusplus
extern "C" {
#endif

const TSLanguage *tree_sitter_PARSER_NAME(void);

#ifdef __cplusplus
}
#endif

#endif // TREE_SITTER_UPPER_PARSER_NAME_H_



================================================
FILE: crates/cli/src/templates/PARSER_NAME.pc.in
================================================
prefix=@CMAKE_INSTALL_PREFIX@
libdir=${prefix}/@CMAKE_INSTALL_LIBDIR@
includedir=${prefix}/@CMAKE_INSTALL_INCLUDEDIR@

Name: tree-sitter-PARSER_NAME
Description: @PROJECT_DESCRIPTION@
URL: @PROJECT_HOMEPAGE_URL@
Version: @PROJECT_VERSION@
Libs: -L${libdir} -ltree-sitter-PARSER_NAME
Cflags: -I${includedir}



================================================
FILE: crates/cli/src/templates/py-binding.c
================================================
#include <Python.h>

typedef struct TSLanguage TSLanguage;

TSLanguage *tree_sitter_LOWER_PARSER_NAME(void);

static PyObject* _binding_language(PyObject *Py_UNUSED(self), PyObject *Py_UNUSED(args)) {
    return PyCapsule_New(tree_sitter_LOWER_PARSER_NAME(), "tree_sitter.Language", NULL);
}

static struct PyModuleDef_Slot slots[] = {
#ifdef Py_GIL_DISABLED
    {Py_mod_gil, Py_MOD_GIL_NOT_USED},
#endif
    {0, NULL}
};

static PyMethodDef methods[] = {
    {"language", _binding_language, METH_NOARGS,
     "Get the tree-sitter language for this grammar."},
    {NULL, NULL, 0, NULL}
};

static struct PyModuleDef module = {
    .m_base = PyModuleDef_HEAD_INIT,
    .m_name = "_binding",
    .m_doc = NULL,
    .m_size = 0,
    .m_methods = methods,
    .m_slots = slots,
};

PyMODINIT_FUNC PyInit__binding(void) {
    return PyModuleDef_Init(&module);
}



================================================
FILE: crates/cli/src/templates/pyproject.toml
================================================
[build-system]
requires = ["setuptools>=62.4.0", "wheel"]
build-backend = "setuptools.build_meta"

[project]
name = "tree-sitter-PARSER_NAME"
description = "PARSER_DESCRIPTION"
version = "PARSER_VERSION"
keywords = ["incremental", "parsing", "tree-sitter", "PARSER_NAME"]
classifiers = [
  "Intended Audience :: Developers",
  "Topic :: Software Development :: Compilers",
  "Topic :: Text Processing :: Linguistic",
  "Typing :: Typed",
]
authors = [{ name = "PARSER_AUTHOR_NAME", email = "PARSER_AUTHOR_EMAIL" }]
requires-python = ">=3.10"
license.text = "PARSER_LICENSE"
readme = "README.md"

[project.urls]
Homepage = "PARSER_URL"
Funding = "FUNDING_URL"

[project.optional-dependencies]
core = ["tree-sitter~=0.24"]

[tool.cibuildwheel]
build = "cp310-*"
build-frontend = "build"



================================================
FILE: crates/cli/src/templates/root.zig
================================================
const testing = @import("std").testing;

const ts = @import("tree-sitter");
const Language = ts.Language;
const Parser = ts.Parser;

pub extern fn tree_sitter_PARSER_NAME() callconv(.C) *const Language;

pub export fn language() *const Language {
    return tree_sitter_PARSER_NAME();
}

test "can load grammar" {
    const parser = Parser.create();
    defer parser.destroy();
    try testing.expectEqual(parser.setLanguage(language()), void{});
    try testing.expectEqual(parser.getLanguage(), tree_sitter_PARSER_NAME());
}




================================================
FILE: crates/cli/src/templates/setup.py
================================================
from os import name as os_name, path
from sysconfig import get_config_var

from setuptools import Extension, find_packages, setup
from setuptools.command.build import build
from setuptools.command.egg_info import egg_info
from wheel.bdist_wheel import bdist_wheel

sources = [
    "bindings/python/tree_sitter_LOWER_PARSER_NAME/binding.c",
    "src/parser.c",
]
if path.exists("src/scanner.c"):
    sources.append("src/scanner.c")

macros: list[tuple[str, str | None]] = [
    ("PY_SSIZE_T_CLEAN", None),
    ("TREE_SITTER_HIDE_SYMBOLS", None),
]
if limited_api := not get_config_var("Py_GIL_DISABLED"):
    macros.append(("Py_LIMITED_API", "0x030A0000"))

if os_name != "nt":
    cflags = ["-std=c11", "-fvisibility=hidden"]
else:
    cflags = ["/std:c11", "/utf-8"]


class Build(build):
    def run(self):
        if path.isdir("queries"):
            dest = path.join(self.build_lib, "tree_sitter_PARSER_NAME", "queries")
            self.copy_tree("queries", dest)
        super().run()


class BdistWheel(bdist_wheel):
    def get_tag(self):
        python, abi, platform = super().get_tag()
        if python.startswith("cp"):
            python, abi = "cp310", "abi3"
        return python, abi, platform


class EggInfo(egg_info):
    def find_sources(self):
        super().find_sources()
        self.filelist.recursive_include("queries", "*.scm")
        self.filelist.include("src/tree_sitter/*.h")


setup(
    packages=find_packages("bindings/python"),
    package_dir={"": "bindings/python"},
    package_data={
        "tree_sitter_LOWER_PARSER_NAME": ["*.pyi", "py.typed"],
        "tree_sitter_LOWER_PARSER_NAME.queries": ["*.scm"],
    },
    ext_package="tree_sitter_LOWER_PARSER_NAME",
    ext_modules=[
        Extension(
            name="_binding",
            sources=sources,
            extra_compile_args=cflags,
            define_macros=macros,
            include_dirs=["src"],
            py_limited_api=limited_api,
        )
    ],
    cmdclass={
        "build": Build,
        "bdist_wheel": BdistWheel,
        "egg_info": EggInfo,
    },
    zip_safe=False
)



================================================
FILE: crates/cli/src/templates/test_binding.py
================================================
from unittest import TestCase

from tree_sitter import Language, Parser
import tree_sitter_LOWER_PARSER_NAME


class TestLanguage(TestCase):
    def test_can_load_grammar(self):
        try:
            Parser(Language(tree_sitter_LOWER_PARSER_NAME.language()))
        except Exception:
            self.fail("Error loading TITLE_PARSER_NAME grammar")



================================================
FILE: crates/cli/src/templates/tests.swift
================================================
import XCTest
import SwiftTreeSitter
import PARSER_CLASS_NAME

final class PARSER_CLASS_NAMETests: XCTestCase {
    func testCanLoadGrammar() throws {
        let parser = Parser()
        let language = Language(language: tree_sitter_LOWER_PARSER_NAME())
        XCTAssertNoThrow(try parser.setLanguage(language),
                         "Error loading TITLE_PARSER_NAME grammar")
    }
}



================================================
FILE: crates/cli/src/templates/.editorconfig
================================================
root = true

[*]
charset = utf-8

[*.{json,toml,yml,gyp}]
indent_style = space
indent_size = 2

[*.js]
indent_style = space
indent_size = 2

[*.scm]
indent_style = space
indent_size = 2

[*.{c,cc,h}]
indent_style = space
indent_size = 4

[*.rs]
indent_style = space
indent_size = 4

[*.{py,pyi}]
indent_style = space
indent_size = 4

[*.swift]
indent_style = space
indent_size = 4

[*.go]
indent_style = tab
indent_size = 8

[Makefile]
indent_style = tab
indent_size = 8

[parser.c]
indent_size = 2

[{alloc,array,parser}.h]
indent_size = 2



================================================
FILE: crates/cli/src/tests/async_context_test.rs
================================================
use std::{
    future::Future,
    pin::{pin, Pin},
    ptr,
    task::{self, Context, Poll, RawWaker, RawWakerVTable, Waker},
};

use tree_sitter::Parser;

use super::helpers::fixtures::get_language;

#[test]
fn test_node_in_fut() {
    let (ret, pended) = tokio_like_spawn(async {
        let mut parser = Parser::new();
        let language = get_language("bash");
        parser.set_language(&language).unwrap();

        let tree = parser.parse("#", None).unwrap();

        let root = tree.root_node();
        let root_ref = &root;

        let fut_val_fn = || async {
            yield_now().await;
            root.child(0).unwrap().kind()
        };

        yield_now().await;

        let fut_ref_fn = || async {
            yield_now().await;
            root_ref.child(0).unwrap().kind()
        };

        let f1 = fut_val_fn().await;
        let f2 = fut_ref_fn().await;
        assert_eq!(f1, f2);

        let fut_val = async {
            yield_now().await;
            root.child(0).unwrap().kind()
        };

        let fut_ref = async {
            yield_now().await;
            root_ref.child(0).unwrap().kind()
        };

        let f1 = fut_val.await;
        let f2 = fut_ref.await;
        assert_eq!(f1, f2);

        f1
    })
    .join();
    assert_eq!(ret, "comment");
    assert_eq!(pended, 5);
}

#[test]
fn test_node_and_cursor_ref_in_fut() {
    let ((), pended) = tokio_like_spawn(async {
        let mut parser = Parser::new();
        let language = get_language("c");
        parser.set_language(&language).unwrap();

        let tree = parser.parse("#", None).unwrap();

        let root = tree.root_node();
        let root_ref = &root;

        let mut cursor = tree.walk();
        let cursor_ref = &mut cursor;

        cursor_ref.goto_first_child();

        let fut_val = async {
            yield_now().await;
            let _ = root.to_sexp();
        };

        yield_now().await;

        let fut_ref = async {
            yield_now().await;
            let _ = root_ref.to_sexp();
            cursor_ref.goto_first_child();
        };

        fut_val.await;
        fut_ref.await;

        cursor_ref.goto_first_child();
    })
    .join();
    assert_eq!(pended, 3);
}

#[test]
fn test_node_and_cursor_ref_in_fut_with_fut_fabrics() {
    let ((), pended) = tokio_like_spawn(async {
        let mut parser = Parser::new();
        let language = get_language("javascript");
        parser.set_language(&language).unwrap();

        let tree = parser.parse("#", None).unwrap();

        let root = tree.root_node();
        let root_ref = &root;

        let mut cursor = tree.walk();
        let cursor_ref = &mut cursor;

        cursor_ref.goto_first_child();

        let fut_val = || async {
            yield_now().await;
            let _ = root.to_sexp();
        };

        yield_now().await;

        let fut_ref = || async move {
            yield_now().await;
            let _ = root_ref.to_sexp();
            cursor_ref.goto_first_child();
        };

        fut_val().await;
        fut_val().await;
        fut_ref().await;
    })
    .join();
    assert_eq!(pended, 4);
}

#[test]
fn test_node_and_cursor_ref_in_fut_with_inner_spawns() {
    let (ret, pended) = tokio_like_spawn(async {
        let mut parser = Parser::new();
        let language = get_language("rust");
        parser.set_language(&language).unwrap();

        let tree = parser.parse("#", None).unwrap();

        let mut cursor = tree.walk();
        let cursor_ref = &mut cursor;

        cursor_ref.goto_first_child();

        let fut_val = || {
            let tree = tree.clone();
            async move {
                let root = tree.root_node();
                let mut cursor = tree.walk();
                let cursor_ref = &mut cursor;
                yield_now().await;
                let _ = root.to_sexp();
                cursor_ref.goto_first_child();
            }
        };

        yield_now().await;

        let fut_ref = || {
            let tree = tree.clone();
            async move {
                let root = tree.root_node();
                let root_ref = &root;
                let mut cursor = tree.walk();
                let cursor_ref = &mut cursor;
                yield_now().await;
                let _ = root_ref.to_sexp();
                cursor_ref.goto_first_child();
            }
        };

        let ((), p1) = tokio_like_spawn(fut_val()).await.unwrap();
        let ((), p2) = tokio_like_spawn(fut_ref()).await.unwrap();

        cursor_ref.goto_first_child();

        fut_val().await;
        fut_val().await;
        fut_ref().await;

        cursor_ref.goto_first_child();

        p1 + p2
    })
    .join();
    assert_eq!(pended, 4);
    assert_eq!(ret, 2);
}

fn tokio_like_spawn<T>(future: T) -> JoinHandle<(T::Output, usize)>
where
    T: Future + Send + 'static,
    T::Output: Send + 'static,
{
    // No runtime, just noop waker

    let waker = noop_waker();
    let mut cx = task::Context::from_waker(&waker);

    let mut pending = 0;
    let mut future = pin!(future);
    let ret = loop {
        match future.as_mut().poll(&mut cx) {
            Poll::Pending => pending += 1,
            Poll::Ready(r) => {
                break r;
            }
        }
    };
    JoinHandle::new((ret, pending))
}

async fn yield_now() {
    struct SimpleYieldNow {
        yielded: bool,
    }

    impl Future for SimpleYieldNow {
        type Output = ();

        fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<()> {
            cx.waker().wake_by_ref();
            if self.yielded {
                return Poll::Ready(());
            }
            self.yielded = true;
            Poll::Pending
        }
    }

    SimpleYieldNow { yielded: false }.await;
}

pub const fn noop_waker() -> Waker {
    const VTABLE: RawWakerVTable = RawWakerVTable::new(
        // Cloning just returns a new no-op raw waker
        |_| RAW,
        // `wake` does nothing
        |_| {},
        // `wake_by_ref` does nothing
        |_| {},
        // Dropping does nothing as we don't allocate anything
        |_| {},
    );
    const RAW: RawWaker = RawWaker::new(ptr::null(), &VTABLE);
    unsafe { Waker::from_raw(RAW) }
}

struct JoinHandle<T> {
    data: Option<T>,
}

impl<T> JoinHandle<T> {
    #[must_use]
    const fn new(data: T) -> Self {
        Self { data: Some(data) }
    }

    fn join(&mut self) -> T {
        self.data.take().unwrap()
    }
}

impl<T: Unpin> Future for JoinHandle<T> {
    type Output = std::result::Result<T, ()>;

    fn poll(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll<Self::Output> {
        let data = self.get_mut().data.take().unwrap();
        Poll::Ready(Ok(data))
    }
}



================================================
FILE: crates/cli/src/tests/corpus_test.rs
================================================
use std::{collections::HashMap, env, fs};

use tree_sitter::Parser;
use tree_sitter_proc_macro::test_with_seed;

use crate::{
    fuzz::{
        corpus_test::{
            check_changed_ranges, check_consistent_sizes, get_parser, set_included_ranges,
        },
        edits::{get_random_edit, invert_edit},
        flatten_tests, new_seed,
        random::Rand,
        EDIT_COUNT, EXAMPLE_EXCLUDE, EXAMPLE_INCLUDE, ITERATION_COUNT, LANGUAGE_FILTER,
        LOG_GRAPH_ENABLED, START_SEED,
    },
    parse::perform_edit,
    test::{parse_tests, print_diff, print_diff_key, strip_sexp_fields},
    tests::{
        allocations,
        helpers::fixtures::{fixtures_dir, get_language, get_test_language, SCRATCH_BASE_DIR},
    },
};

#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_bash_language(seed: usize) {
    test_language_corpus(
        "bash",
        seed,
        Some(&[
            // Fragile tests where edit customization changes
            // lead to significant parse tree structure changes.
            "bash - corpus - commands - Nested Heredocs",
            "bash - corpus - commands - Quoted Heredocs",
            "bash - corpus - commands - Heredocs with weird characters",
        ]),
        None,
    );
}

#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_c_language(seed: usize) {
    test_language_corpus("c", seed, None, None);
}

#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_cpp_language(seed: usize) {
    test_language_corpus("cpp", seed, None, None);
}

#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_embedded_template_language(seed: usize) {
    test_language_corpus("embedded-template", seed, None, None);
}

#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_go_language(seed: usize) {
    test_language_corpus("go", seed, None, None);
}

#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_html_language(seed: usize) {
    test_language_corpus("html", seed, None, None);
}

#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_java_language(seed: usize) {
    test_language_corpus(
        "java",
        seed,
        Some(&["java - corpus - expressions - switch with unnamed pattern variable"]),
        None,
    );
}

#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_javascript_language(seed: usize) {
    test_language_corpus("javascript", seed, None, None);
}

#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_json_language(seed: usize) {
    test_language_corpus("json", seed, None, None);
}

#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_php_language(seed: usize) {
    test_language_corpus("php", seed, None, Some("php"));
}

#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_python_language(seed: usize) {
    test_language_corpus("python", seed, None, None);
}

#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_ruby_language(seed: usize) {
    test_language_corpus("ruby", seed, None, None);
}

#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_rust_language(seed: usize) {
    test_language_corpus("rust", seed, None, None);
}

#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_typescript_language(seed: usize) {
    test_language_corpus("typescript", seed, None, Some("typescript"));
}

#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_tsx_language(seed: usize) {
    test_language_corpus("typescript", seed, None, Some("tsx"));
}

pub fn test_language_corpus(
    language_name: &str,
    start_seed: usize,
    skipped: Option<&[&str]>,
    language_dir: Option<&str>,
) {
    if let Some(filter) = LANGUAGE_FILTER.as_ref() {
        if language_name != filter {
            return;
        }
    }

    let language_dir = language_dir.unwrap_or_default();

    let grammars_dir = fixtures_dir().join("grammars");
    let error_corpus_dir = fixtures_dir().join("error_corpus");
    let template_corpus_dir = fixtures_dir().join("template_corpus");
    let corpus_dir = grammars_dir.join(language_name).join("test").join("corpus");

    println!("Testing {language_name} corpus @ {}", corpus_dir.display());

    let error_corpus_file = error_corpus_dir.join(format!("{language_name}_errors.txt"));
    let template_corpus_file = template_corpus_dir.join(format!("{language_name}_templates.txt"));
    let main_tests = parse_tests(&corpus_dir).unwrap();
    let error_tests = parse_tests(&error_corpus_file).unwrap_or_default();
    let template_tests = parse_tests(&template_corpus_file).unwrap_or_default();
    let mut tests = flatten_tests(
        main_tests,
        EXAMPLE_INCLUDE.as_ref(),
        EXAMPLE_EXCLUDE.as_ref(),
    );
    tests.extend(flatten_tests(
        error_tests,
        EXAMPLE_INCLUDE.as_ref(),
        EXAMPLE_EXCLUDE.as_ref(),
    ));
    tests.extend(
        flatten_tests(
            template_tests,
            EXAMPLE_INCLUDE.as_ref(),
            EXAMPLE_EXCLUDE.as_ref(),
        )
        .into_iter()
        .map(|mut t| {
            t.template_delimiters = Some(("<%", "%>"));
            t
        }),
    );

    tests.retain(|t| t.languages[0].is_empty() || t.languages.contains(&Box::from(language_dir)));

    let mut skipped = skipped.map(|x| x.iter().map(|x| (*x, 0)).collect::<HashMap<&str, usize>>());

    let language_path = if language_dir.is_empty() {
        language_name.to_string()
    } else {
        format!("{language_name}/{language_dir}")
    };
    let language = get_language(&language_path);
    let mut failure_count = 0;

    let log_seed = env::var("TREE_SITTER_LOG_SEED").is_ok();
    let dump_edits = env::var("TREE_SITTER_DUMP_EDITS").is_ok();

    if log_seed {
        println!("  start seed: {start_seed}");
    }

    println!();
    for (test_index, test) in tests.iter().enumerate() {
        let test_name = format!("{language_name} - {}", test.name);
        if let Some(skipped) = skipped.as_mut() {
            if let Some(counter) = skipped.get_mut(test_name.as_str()) {
                println!("  {test_index}. {test_name} - SKIPPED");
                *counter += 1;
                continue;
            }
        }

        println!("  {test_index}. {test_name}");

        let passed = allocations::record(|| {
            let mut log_session = None;
            let mut parser = get_parser(&mut log_session, "log.html");
            parser.set_language(&language).unwrap();
            set_included_ranges(&mut parser, &test.input, test.template_delimiters);

            let tree = parser.parse(&test.input, None).unwrap();
            let mut actual_output = tree.root_node().to_sexp();
            if !test.has_fields {
                actual_output = strip_sexp_fields(&actual_output);
            }

            if actual_output != test.output {
                println!("Incorrect initial parse for {test_name}");
                print_diff_key();
                print_diff(&actual_output, &test.output, true);
                println!();
                return false;
            }

            true
        })
        .unwrap();

        if !passed {
            failure_count += 1;
            continue;
        }

        let mut parser = Parser::new();
        parser.set_language(&language).unwrap();
        let tree = parser.parse(&test.input, None).unwrap();
        drop(parser);

        for trial in 0..*ITERATION_COUNT {
            let seed = start_seed + trial;
            let passed = allocations::record(|| {
                let mut rand = Rand::new(seed);
                let mut log_session = None;
                let mut parser = get_parser(&mut log_session, "log.html");
                parser.set_language(&language).unwrap();
                let mut tree = tree.clone();
                let mut input = test.input.clone();

                if *LOG_GRAPH_ENABLED {
                    eprintln!("{}\n", String::from_utf8_lossy(&input));
                }

                // Perform a random series of edits and reparse.
                let edit_count = rand.unsigned(*EDIT_COUNT);
                let mut undo_stack = Vec::with_capacity(edit_count);
                for _ in 0..=edit_count {
                    let edit = get_random_edit(&mut rand, &input);
                    undo_stack.push(invert_edit(&input, &edit));
                    perform_edit(&mut tree, &mut input, &edit).unwrap();
                }

                if log_seed {
                    println!("   {test_index}.{trial:<2} seed: {seed}");
                }

                if dump_edits {
                    fs::write(
                        SCRATCH_BASE_DIR
                            .join(format!("edit.{seed}.{test_index}.{trial} {test_name}")),
                        &input,
                    )
                    .unwrap();
                }

                if *LOG_GRAPH_ENABLED {
                    eprintln!("{}\n", String::from_utf8_lossy(&input));
                }

                set_included_ranges(&mut parser, &input, test.template_delimiters);
                let mut tree2 = parser.parse(&input, Some(&tree)).unwrap();

                // Check that the new tree is consistent.
                check_consistent_sizes(&tree2, &input);
                if let Err(message) = check_changed_ranges(&tree, &tree2, &input) {
                    println!("\nUnexpected scope change in seed {seed} with start seed {start_seed}\n{message}\n\n",);
                    return false;
                }

                // Undo all of the edits and re-parse again.
                while let Some(edit) = undo_stack.pop() {
                    perform_edit(&mut tree2, &mut input, &edit).unwrap();
                }
                if *LOG_GRAPH_ENABLED {
                    eprintln!("{}\n", String::from_utf8_lossy(&input));
                }

                set_included_ranges(&mut parser, &test.input, test.template_delimiters);
                let tree3 = parser.parse(&input, Some(&tree2)).unwrap();

                // Verify that the final tree matches the expectation from the corpus.
                let mut actual_output = tree3.root_node().to_sexp();
                if !test.has_fields {
                    actual_output = strip_sexp_fields(&actual_output);
                }

                if actual_output != test.output {
                    println!("Incorrect parse for {test_name} - seed {seed}");
                    print_diff_key();
                    print_diff(&actual_output, &test.output, true);
                    println!();
                    return false;
                }

                // Check that the edited tree is consistent.
                check_consistent_sizes(&tree3, &input);
                if let Err(message) = check_changed_ranges(&tree2, &tree3, &input) {
                    println!("Unexpected scope change in seed {seed} with start seed {start_seed}\n{message}\n\n");
                    return false;
                }

                true
            }).unwrap();

            if !passed {
                failure_count += 1;
                break;
            }
        }
    }

    assert!(
        failure_count == 0,
        "{failure_count} {language_name} corpus tests failed"
    );

    if let Some(skipped) = skipped.as_mut() {
        skipped.retain(|_, v| *v == 0);

        if !skipped.is_empty() {
            println!("Non matchable skip definitions:");
            for k in skipped.keys() {
                println!("  {k}");
            }
            panic!("Non matchable skip definitions needs to be removed");
        }
    }
}

#[test]
fn test_feature_corpus_files() {
    let test_grammars_dir = fixtures_dir().join("test_grammars");

    let mut failure_count = 0;
    for entry in fs::read_dir(test_grammars_dir).unwrap() {
        let entry = entry.unwrap();
        if !entry.metadata().unwrap().is_dir() {
            continue;
        }
        let language_name = entry.file_name();
        let language_name = language_name.to_str().unwrap();

        if let Some(filter) = LANGUAGE_FILTER.as_ref() {
            if language_name != filter {
                continue;
            }
        }

        let test_path = entry.path();
        let mut grammar_path = test_path.join("grammar.js");
        if !grammar_path.exists() {
            grammar_path = test_path.join("grammar.json");
        }
        let error_message_path = test_path.join("expected_error.txt");
        let grammar_json = tree_sitter_generate::load_grammar_file(&grammar_path, None).unwrap();
        let generate_result =
            tree_sitter_generate::generate_parser_for_grammar(&grammar_json, Some((0, 0, 0)));

        if error_message_path.exists() {
            if EXAMPLE_INCLUDE.is_some() || EXAMPLE_EXCLUDE.is_some() {
                continue;
            }

            eprintln!("test language: {language_name:?}");

            let expected_message = fs::read_to_string(&error_message_path)
                .unwrap()
                .replace("\r\n", "\n");
            if let Err(e) = generate_result {
                let actual_message = e.to_string().replace("\r\n", "\n");
                if expected_message != actual_message {
                    eprintln!(
                        "Unexpected error message.\n\nExpected:\n\n{expected_message}\nActual:\n\n{actual_message}\n",
                    );
                    failure_count += 1;
                }
            } else {
                eprintln!("Expected error message but got none for test grammar '{language_name}'",);
                failure_count += 1;
            }
        } else {
            if let Err(e) = &generate_result {
                eprintln!("Unexpected error for test grammar '{language_name}':\n{e}",);
                failure_count += 1;
                continue;
            }

            let corpus_path = test_path.join("corpus.txt");
            let c_code = generate_result.unwrap().1;
            let language = get_test_language(language_name, &c_code, Some(&test_path));
            let test = parse_tests(&corpus_path).unwrap();
            let tests = flatten_tests(test, EXAMPLE_INCLUDE.as_ref(), EXAMPLE_EXCLUDE.as_ref());

            if !tests.is_empty() {
                eprintln!("test language: {language_name:?}");
            }

            for test in tests {
                eprintln!("  example: {:?}", test.name);

                let passed = allocations::record(|| {
                    let mut log_session = None;
                    let mut parser = get_parser(&mut log_session, "log.html");
                    parser.set_language(&language).unwrap();
                    let tree = parser.parse(&test.input, None).unwrap();
                    let mut actual_output = tree.root_node().to_sexp();
                    if !test.has_fields {
                        actual_output = strip_sexp_fields(&actual_output);
                    }
                    if actual_output == test.output {
                        true
                    } else {
                        print_diff_key();
                        print_diff(&actual_output, &test.output, true);
                        println!();
                        false
                    }
                })
                .unwrap();

                if !passed {
                    failure_count += 1;
                }
            }
        }
    }

    assert!(failure_count == 0, "{failure_count} corpus tests failed");
}



================================================
FILE: crates/cli/src/tests/detect_language.rs
================================================
use std::{fs, path::Path};

use tree_sitter_loader::Loader;

use crate::tests::helpers::fixtures::scratch_dir;

#[test]
fn detect_language_by_first_line_regex() {
    let strace_dir = tree_sitter_dir(
        r#"{
  "grammars": [
    {
      "name": "strace",
      "path": ".",
      "scope": "source.strace",
      "file-types": [
        "strace"
      ],
      "first-line-regex":  "[0-9:.]* *execve"
    }
  ],
  "metadata": {
    "version": "0.0.1"
  }
}
"#,
        "strace",
    );

    let mut loader = Loader::with_parser_lib_path(scratch_dir().to_path_buf());
    let config = loader
        .find_language_configurations_at_path(strace_dir.path(), false)
        .unwrap();

    // this is just to validate that we can read the tree-sitter.json correctly
    assert_eq!(config[0].scope.as_ref().unwrap(), "source.strace");

    let file_name = strace_dir.path().join("strace.log");
    fs::write(&file_name, "execve\nworld").unwrap();
    assert_eq!(
        get_lang_scope(&loader, &file_name),
        Some("source.strace".into())
    );

    let file_name = strace_dir.path().join("strace.log");
    fs::write(&file_name, "447845 execve\nworld").unwrap();
    assert_eq!(
        get_lang_scope(&loader, &file_name),
        Some("source.strace".into())
    );

    let file_name = strace_dir.path().join("strace.log");
    fs::write(&file_name, "hello\nexecve").unwrap();
    assert!(get_lang_scope(&loader, &file_name).is_none());

    let file_name = strace_dir.path().join("strace.log");
    fs::write(&file_name, "").unwrap();
    assert!(get_lang_scope(&loader, &file_name).is_none());

    let dummy_dir = tree_sitter_dir(
        r#"{
  "grammars": [
    {
      "name": "dummy",
      "scope": "source.dummy",
      "path": ".",
      "file-types": [
        "dummy"
      ]
    }
  ],
  "metadata": {
    "version": "0.0.1"
  }
}
"#,
        "dummy",
    );

    // file-type takes precedence over first-line-regex
    loader
        .find_language_configurations_at_path(dummy_dir.path(), false)
        .unwrap();
    let file_name = dummy_dir.path().join("strace.dummy");
    fs::write(&file_name, "execve").unwrap();
    assert_eq!(
        get_lang_scope(&loader, &file_name),
        Some("source.dummy".into())
    );
}

#[test]
fn detect_langauge_by_double_barrel_file_extension() {
    let blade_dir = tree_sitter_dir(
        r#"{
  "grammars": [
    {
      "name": "blade",
      "path": ".",
      "scope": "source.blade",
      "file-types": [
        "blade.php"
      ]
    }
  ],
  "metadata": {
    "version": "0.0.1"
  }
}
"#,
        "blade",
    );

    let mut loader = Loader::with_parser_lib_path(scratch_dir().to_path_buf());
    let config = loader
        .find_language_configurations_at_path(blade_dir.path(), false)
        .unwrap();

    // this is just to validate that we can read the tree-sitter.json correctly
    assert_eq!(config[0].scope.as_ref().unwrap(), "source.blade");

    let file_name = blade_dir.path().join("foo.blade.php");
    fs::write(&file_name, "").unwrap();
    assert_eq!(
        get_lang_scope(&loader, &file_name),
        Some("source.blade".into())
    );
}

#[test]
fn detect_language_without_filename() {
    let gitignore_dir = tree_sitter_dir(
        r#"{
  "grammars": [
    {
      "name": "gitignore",
      "path": ".",
      "scope": "source.gitignore",
      "file-types": [
        ".gitignore"
      ]
    }
  ],
  "metadata": {
    "version": "0.0.1"
  }
}
"#,
        "gitignore",
    );

    let mut loader = Loader::with_parser_lib_path(scratch_dir().to_path_buf());
    let config = loader
        .find_language_configurations_at_path(gitignore_dir.path(), false)
        .unwrap();

    // this is just to validate that we can read the tree-sitter.json correctly
    assert_eq!(config[0].scope.as_ref().unwrap(), "source.gitignore");

    let file_name = gitignore_dir.path().join(".gitignore");
    fs::write(&file_name, "").unwrap();
    assert_eq!(
        get_lang_scope(&loader, &file_name),
        Some("source.gitignore".into())
    );
}

#[test]
fn detect_language_without_file_extension() {
    let ssh_config_dir = tree_sitter_dir(
        r#"{
  "grammars": [
    {
      "name": "ssh_config",
      "path": ".",
      "scope": "source.ssh_config",
      "file-types": [
        "ssh_config"
      ]
    }
  ],
  "metadata": {
    "version": "0.0.1"
  }
}
"#,
        "ssh_config",
    );

    let mut loader = Loader::with_parser_lib_path(scratch_dir().to_path_buf());
    let config = loader
        .find_language_configurations_at_path(ssh_config_dir.path(), false)
        .unwrap();

    // this is just to validate that we can read the tree-sitter.json correctly
    assert_eq!(config[0].scope.as_ref().unwrap(), "source.ssh_config");

    let file_name = ssh_config_dir.path().join("ssh_config");
    fs::write(&file_name, "").unwrap();
    assert_eq!(
        get_lang_scope(&loader, &file_name),
        Some("source.ssh_config".into())
    );
}

fn tree_sitter_dir(tree_sitter_json: &str, name: &str) -> tempfile::TempDir {
    let temp_dir = tempfile::tempdir().unwrap();
    fs::write(temp_dir.path().join("tree-sitter.json"), tree_sitter_json).unwrap();
    fs::create_dir_all(temp_dir.path().join("src/tree_sitter")).unwrap();
    fs::write(
        temp_dir.path().join("src/grammar.json"),
        format!(r#"{{"name":"{name}"}}"#),
    )
    .unwrap();
    fs::write(
        temp_dir.path().join("src/parser.c"),
        format!(
            r#"
                #include "tree_sitter/parser.h"
                #ifdef _WIN32
                #define TS_PUBLIC __declspec(dllexport)
                #else
                #define TS_PUBLIC __attribute__((visibility("default")))
                #endif
                TS_PUBLIC const TSLanguage *tree_sitter_{name}() {{}}
            "#
        ),
    )
    .unwrap();
    fs::write(
        temp_dir.path().join("src/tree_sitter/parser.h"),
        include_str!("../../../../lib/src/parser.h"),
    )
    .unwrap();
    temp_dir
}

// If we manage to get the language scope, it means we correctly detected the file-type
fn get_lang_scope(loader: &Loader, file_name: &Path) -> Option<String> {
    loader
        .language_configuration_for_file_name(file_name)
        .ok()
        .and_then(|config| {
            if let Some((_, config)) = config {
                config.scope.clone()
            } else if let Ok(Some((_, config))) =
                loader.language_configuration_for_first_line_regex(file_name)
            {
                config.scope.clone()
            } else {
                None
            }
        })
}



================================================
FILE: crates/cli/src/tests/helpers.rs
================================================
pub mod allocations;
pub mod edits;
pub(super) mod fixtures;
pub(super) mod query_helpers;



================================================
FILE: crates/cli/src/tests/highlight_test.rs
================================================
use std::{
    ffi::CString,
    fs,
    os::raw::c_char,
    ptr, slice, str,
    sync::{
        atomic::{AtomicUsize, Ordering},
        LazyLock,
    },
};

use tree_sitter_highlight::{
    c, Error, Highlight, HighlightConfiguration, HighlightEvent, Highlighter, HtmlRenderer,
};

use super::helpers::fixtures::{get_highlight_config, get_language, get_language_queries_path};

static JS_HIGHLIGHT: LazyLock<HighlightConfiguration> =
    LazyLock::new(|| get_highlight_config("javascript", Some("injections.scm"), &HIGHLIGHT_NAMES));

static JSDOC_HIGHLIGHT: LazyLock<HighlightConfiguration> =
    LazyLock::new(|| get_highlight_config("jsdoc", None, &HIGHLIGHT_NAMES));

static HTML_HIGHLIGHT: LazyLock<HighlightConfiguration> =
    LazyLock::new(|| get_highlight_config("html", Some("injections.scm"), &HIGHLIGHT_NAMES));

static EJS_HIGHLIGHT: LazyLock<HighlightConfiguration> = LazyLock::new(|| {
    get_highlight_config(
        "embedded-template",
        Some("injections-ejs.scm"),
        &HIGHLIGHT_NAMES,
    )
});

static RUST_HIGHLIGHT: LazyLock<HighlightConfiguration> =
    LazyLock::new(|| get_highlight_config("rust", Some("injections.scm"), &HIGHLIGHT_NAMES));

static HIGHLIGHT_NAMES: LazyLock<Vec<String>> = LazyLock::new(|| {
    [
        "attribute",
        "boolean",
        "carriage-return",
        "comment",
        "constant",
        "constant.builtin",
        "constructor",
        "embedded",
        "function",
        "function.builtin",
        "keyword",
        "module",
        "number",
        "operator",
        "property",
        "property.builtin",
        "punctuation",
        "punctuation.bracket",
        "punctuation.delimiter",
        "punctuation.special",
        "string",
        "string.special",
        "tag",
        "type",
        "type.builtin",
        "variable",
        "variable.builtin",
        "variable.parameter",
    ]
    .iter()
    .copied()
    .map(String::from)
    .collect()
});

static HTML_ATTRS: LazyLock<Vec<String>> = LazyLock::new(|| {
    HIGHLIGHT_NAMES
        .iter()
        .map(|s| format!("class={s}"))
        .collect()
});

#[test]
fn test_highlighting_javascript() {
    let source = "const a = function(b) { return b + c; }";
    assert_eq!(
        &to_token_vector(source, &JS_HIGHLIGHT).unwrap(),
        &[vec![
            ("const", vec!["keyword"]),
            (" ", vec![]),
            ("a", vec!["function"]),
            (" ", vec![]),
            ("=", vec!["operator"]),
            (" ", vec![]),
            ("function", vec!["keyword"]),
            ("(", vec!["punctuation.bracket"]),
            ("b", vec!["variable"]),
            (")", vec!["punctuation.bracket"]),
            (" ", vec![]),
            ("{", vec!["punctuation.bracket"]),
            (" ", vec![]),
            ("return", vec!["keyword"]),
            (" ", vec![]),
            ("b", vec!["variable"]),
            (" ", vec![]),
            ("+", vec!["operator"]),
            (" ", vec![]),
            ("c", vec!["variable"]),
            (";", vec!["punctuation.delimiter"]),
            (" ", vec![]),
            ("}", vec!["punctuation.bracket"]),
        ]]
    );
}

#[test]
fn test_highlighting_injected_html_in_javascript() {
    let source = ["const s = html `<div>${a < b}</div>`;"].join("\n");

    assert_eq!(
        &to_token_vector(&source, &JS_HIGHLIGHT).unwrap(),
        &[vec![
            ("const", vec!["keyword"]),
            (" ", vec![]),
            ("s", vec!["variable"]),
            (" ", vec![]),
            ("=", vec!["operator"]),
            (" ", vec![]),
            ("html", vec!["function"]),
            (" ", vec![]),
            ("`", vec!["string"]),
            ("<", vec!["string", "punctuation.bracket"]),
            ("div", vec!["string", "tag"]),
            (">", vec!["string", "punctuation.bracket"]),
            ("${", vec!["string", "embedded", "punctuation.special"]),
            ("a", vec!["string", "embedded", "variable"]),
            (" ", vec!["string", "embedded"]),
            ("<", vec!["string", "embedded", "operator"]),
            (" ", vec!["string", "embedded"]),
            ("b", vec!["string", "embedded", "variable"]),
            ("}", vec!["string", "embedded", "punctuation.special"]),
            ("</", vec!["string", "punctuation.bracket"]),
            ("div", vec!["string", "tag"]),
            (">", vec!["string", "punctuation.bracket"]),
            ("`", vec!["string"]),
            (";", vec!["punctuation.delimiter"]),
        ]]
    );
}

#[test]
fn test_highlighting_injected_javascript_in_html_mini() {
    let source = "<script>const x = new Thing();</script>";

    assert_eq!(
        &to_token_vector(source, &HTML_HIGHLIGHT).unwrap(),
        &[vec![
            ("<", vec!["punctuation.bracket"]),
            ("script", vec!["tag"]),
            (">", vec!["punctuation.bracket"]),
            ("const", vec!["keyword"]),
            (" ", vec![]),
            ("x", vec!["variable"]),
            (" ", vec![]),
            ("=", vec!["operator"]),
            (" ", vec![]),
            ("new", vec!["keyword"]),
            (" ", vec![]),
            ("Thing", vec!["constructor"]),
            ("(", vec!["punctuation.bracket"]),
            (")", vec!["punctuation.bracket"]),
            (";", vec!["punctuation.delimiter"]),
            ("</", vec!["punctuation.bracket"]),
            ("script", vec!["tag"]),
            (">", vec!["punctuation.bracket"]),
        ],]
    );
}

#[test]
fn test_highlighting_injected_javascript_in_html() {
    let source = [
        "<body>",
        "  <script>",
        "    const x = new Thing();",
        "  </script>",
        "</body>",
    ]
    .join("\n");

    assert_eq!(
        &to_token_vector(&source, &HTML_HIGHLIGHT).unwrap(),
        &[
            vec![
                ("<", vec!["punctuation.bracket"]),
                ("body", vec!["tag"]),
                (">", vec!["punctuation.bracket"]),
            ],
            vec![
                ("  ", vec![]),
                ("<", vec!["punctuation.bracket"]),
                ("script", vec!["tag"]),
                (">", vec!["punctuation.bracket"]),
            ],
            vec![
                ("    ", vec![]),
                ("const", vec!["keyword"]),
                (" ", vec![]),
                ("x", vec!["variable"]),
                (" ", vec![]),
                ("=", vec!["operator"]),
                (" ", vec![]),
                ("new", vec!["keyword"]),
                (" ", vec![]),
                ("Thing", vec!["constructor"]),
                ("(", vec!["punctuation.bracket"]),
                (")", vec!["punctuation.bracket"]),
                (";", vec!["punctuation.delimiter"]),
            ],
            vec![
                ("  ", vec![]),
                ("</", vec!["punctuation.bracket"]),
                ("script", vec!["tag"]),
                (">", vec!["punctuation.bracket"]),
            ],
            vec![
                ("</", vec!["punctuation.bracket"]),
                ("body", vec!["tag"]),
                (">", vec!["punctuation.bracket"]),
            ],
        ]
    );
}

#[test]
fn test_highlighting_multiline_nodes_to_html() {
    let source = [
        "const SOMETHING = `",
        "  one ${",
        "    two()",
        "  } three",
        "`",
        "",
    ]
    .join("\n");

    assert_eq!(
        &to_html(&source, &JS_HIGHLIGHT).unwrap(),
        &[
            "<span class=keyword>const</span> <span class=constant>SOMETHING</span> <span class=operator>=</span> <span class=string>`</span>\n".to_string(),
            "<span class=string>  one <span class=embedded><span class=punctuation.special>${</span></span></span>\n".to_string(),
            "<span class=string><span class=embedded>    <span class=function>two</span><span class=punctuation.bracket>(</span><span class=punctuation.bracket>)</span></span></span>\n".to_string(),
            "<span class=string><span class=embedded>  <span class=punctuation.special>}</span></span> three</span>\n".to_string(),
            "<span class=string>`</span>\n".to_string(),
        ]
    );
}

#[test]
fn test_highlighting_with_local_variable_tracking() {
    let source = [
        "module.exports = function a(b) {",
        "  const module = c;",
        "  console.log(module, b);",
        "}",
    ]
    .join("\n");

    assert_eq!(
        &to_token_vector(&source, &JS_HIGHLIGHT).unwrap(),
        &[
            vec![
                ("module", vec!["variable.builtin"]),
                (".", vec!["punctuation.delimiter"]),
                ("exports", vec!["function"]),
                (" ", vec![]),
                ("=", vec!["operator"]),
                (" ", vec![]),
                ("function", vec!["keyword"]),
                (" ", vec![]),
                ("a", vec!["function"]),
                ("(", vec!["punctuation.bracket"]),
                ("b", vec!["variable"]),
                (")", vec!["punctuation.bracket"]),
                (" ", vec![]),
                ("{", vec!["punctuation.bracket"])
            ],
            vec![
                ("  ", vec![]),
                ("const", vec!["keyword"]),
                (" ", vec![]),
                ("module", vec!["variable"]),
                (" ", vec![]),
                ("=", vec!["operator"]),
                (" ", vec![]),
                ("c", vec!["variable"]),
                (";", vec!["punctuation.delimiter"])
            ],
            vec![
                ("  ", vec![]),
                ("console", vec!["variable.builtin"]),
                (".", vec!["punctuation.delimiter"]),
                ("log", vec!["function"]),
                ("(", vec!["punctuation.bracket"]),
                // Not a builtin, because `module` was defined as a variable above.
                ("module", vec!["variable"]),
                (",", vec!["punctuation.delimiter"]),
                (" ", vec![]),
                // A parameter, because `b` was defined as a parameter above.
                ("b", vec!["variable"]),
                (")", vec!["punctuation.bracket"]),
                (";", vec!["punctuation.delimiter"]),
            ],
            vec![("}", vec!["punctuation.bracket"])]
        ],
    );
}

#[test]
fn test_highlighting_empty_lines() {
    let source = [
        "class A {",
        "",
        "  b(c) {",
        "",
        "    d(e)",
        "",
        "  }",
        "",
        "}",
    ]
    .join("\n");

    assert_eq!(
        &to_html(&source, &JS_HIGHLIGHT).unwrap(),
        &[
            "<span class=keyword>class</span> <span class=constructor>A</span> <span class=punctuation.bracket>{</span>\n".to_string(),
            "\n".to_string(),
            "  <span class=function>b</span><span class=punctuation.bracket>(</span><span class=variable>c</span><span class=punctuation.bracket>)</span> <span class=punctuation.bracket>{</span>\n".to_string(),
            "\n".to_string(),
            "    <span class=function>d</span><span class=punctuation.bracket>(</span><span class=variable>e</span><span class=punctuation.bracket>)</span>\n".to_string(),
            "\n".to_string(),
            "  <span class=punctuation.bracket>}</span>\n".to_string(),
            "\n".to_string(),
            "<span class=punctuation.bracket>}</span>\n".to_string(),
        ]
    );
}

#[test]
fn test_highlighting_carriage_returns() {
    let source = "a = \"a\rb\"\r\nb\r";

    assert_eq!(
        &to_html(source, &JS_HIGHLIGHT).unwrap(),
        &[
            "<span class=variable>a</span> <span class=operator>=</span> <span class=string>&quot;a<span class=carriage-return></span><span class=variable>b</span>&quot;</span>\n",
            "<span class=variable>b</span><span class=carriage-return></span>\n",
        ],
    );
}

#[test]
fn test_highlighting_ejs_with_html_and_javascript() {
    let source = ["<div><% foo() %></div><script> bar() </script>"].join("\n");

    assert_eq!(
        &to_token_vector(&source, &EJS_HIGHLIGHT).unwrap(),
        &[[
            ("<", vec!["punctuation.bracket"]),
            ("div", vec!["tag"]),
            (">", vec!["punctuation.bracket"]),
            ("<%", vec!["keyword"]),
            (" ", vec![]),
            ("foo", vec!["function"]),
            ("(", vec!["punctuation.bracket"]),
            (")", vec!["punctuation.bracket"]),
            (" ", vec![]),
            ("%>", vec!["keyword"]),
            ("</", vec!["punctuation.bracket"]),
            ("div", vec!["tag"]),
            (">", vec!["punctuation.bracket"]),
            ("<", vec!["punctuation.bracket"]),
            ("script", vec!["tag"]),
            (">", vec!["punctuation.bracket"]),
            (" ", vec![]),
            ("bar", vec!["function"]),
            ("(", vec!["punctuation.bracket"]),
            (")", vec!["punctuation.bracket"]),
            (" ", vec![]),
            ("</", vec!["punctuation.bracket"]),
            ("script", vec!["tag"]),
            (">", vec!["punctuation.bracket"]),
        ]],
    );
}

#[test]
fn test_highlighting_javascript_with_jsdoc() {
    // Regression test: the middle comment has no highlights. This should not prevent
    // later injections from highlighting properly.
    let source = ["a /* @see a */ b; /* nothing */ c; /* @see b */"].join("\n");

    assert_eq!(
        &to_token_vector(&source, &JS_HIGHLIGHT).unwrap(),
        &[[
            ("a", vec!["variable"]),
            (" ", vec![]),
            ("/* ", vec!["comment"]),
            ("@see", vec!["comment", "keyword"]),
            (" a */", vec!["comment"]),
            (" ", vec![]),
            ("b", vec!["variable"]),
            (";", vec!["punctuation.delimiter"]),
            (" ", vec![]),
            ("/* nothing */", vec!["comment"]),
            (" ", vec![]),
            ("c", vec!["variable"]),
            (";", vec!["punctuation.delimiter"]),
            (" ", vec![]),
            ("/* ", vec!["comment"]),
            ("@see", vec!["comment", "keyword"]),
            (" b */", vec!["comment"])
        ]],
    );
}

#[test]
fn test_highlighting_with_content_children_included() {
    let source = ["assert!(", "    a.b.c() < D::e::<F>()", ");"].join("\n");

    assert_eq!(
        &to_token_vector(&source, &RUST_HIGHLIGHT).unwrap(),
        &[
            vec![
                ("assert", vec!["function"]),
                ("!", vec!["function"]),
                ("(", vec!["punctuation.bracket"]),
            ],
            vec![
                ("    a", vec![]),
                (".", vec!["punctuation.delimiter"]),
                ("b", vec!["property"]),
                (".", vec!["punctuation.delimiter"]),
                ("c", vec!["function"]),
                ("(", vec!["punctuation.bracket"]),
                (")", vec!["punctuation.bracket"]),
                (" < ", vec![]),
                ("D", vec!["type"]),
                ("::", vec!["punctuation.delimiter"]),
                ("e", vec!["function"]),
                ("::", vec!["punctuation.delimiter"]),
                ("<", vec!["punctuation.bracket"]),
                ("F", vec!["type"]),
                (">", vec!["punctuation.bracket"]),
                ("(", vec!["punctuation.bracket"]),
                (")", vec!["punctuation.bracket"]),
            ],
            vec![
                (")", vec!["punctuation.bracket"]),
                (";", vec!["punctuation.delimiter"]),
            ]
        ],
    );
}

#[test]
fn test_highlighting_cancellation() {
    // An HTML document with a large injected JavaScript document:
    let mut source = "<script>\n".to_string();
    for _ in 0..500 {
        source += "function a() { console.log('hi'); }\n";
    }
    source += "</script>\n";

    // Cancel the highlighting before parsing the injected document.
    let cancellation_flag = AtomicUsize::new(0);
    let injection_callback = |name: &str| {
        cancellation_flag.store(1, Ordering::SeqCst);
        test_language_for_injection_string(name)
    };

    // The initial `highlight` call, which eagerly parses the outer document, should not fail.
    let mut highlighter = Highlighter::new();
    let events = highlighter
        .highlight(
            &HTML_HIGHLIGHT,
            source.as_bytes(),
            Some(&cancellation_flag),
            injection_callback,
        )
        .unwrap();

    // Iterating the scopes should not panic. It should return an error once the
    // cancellation is detected.
    for event in events {
        if let Err(e) = event {
            assert_eq!(e, Error::Cancelled);
            return;
        }
    }

    panic!("Expected an error while iterating highlighter");
}

#[test]
fn test_highlighting_via_c_api() {
    let highlights = [
        "class=tag\0",
        "class=function\0",
        "class=string\0",
        "class=keyword\0",
    ];
    let highlight_names = highlights
        .iter()
        .map(|h| h["class=".len()..].as_ptr().cast::<c_char>())
        .collect::<Vec<_>>();
    let highlight_attrs = highlights
        .iter()
        .map(|h| h.as_bytes().as_ptr().cast::<c_char>())
        .collect::<Vec<_>>();
    let highlighter = unsafe {
        c::ts_highlighter_new(
            std::ptr::addr_of!(highlight_names[0]),
            std::ptr::addr_of!(highlight_attrs[0]),
            highlights.len() as u32,
        )
    };

    let source_code = c_string("<script>\nconst a = b('c');\nc.d();\n</script>");

    let js_scope = c_string("source.js");
    let js_injection_regex = c_string("^javascript");
    let language = get_language("javascript");
    let lang_name = c_string("javascript");
    let queries = get_language_queries_path("javascript");
    let highlights_query = fs::read_to_string(queries.join("highlights.scm")).unwrap();
    let injections_query = fs::read_to_string(queries.join("injections.scm")).unwrap();
    let locals_query = fs::read_to_string(queries.join("locals.scm")).unwrap();
    unsafe {
        c::ts_highlighter_add_language(
            highlighter,
            lang_name.as_ptr(),
            js_scope.as_ptr(),
            js_injection_regex.as_ptr(),
            language,
            highlights_query.as_ptr().cast::<c_char>(),
            injections_query.as_ptr().cast::<c_char>(),
            locals_query.as_ptr().cast::<c_char>(),
            highlights_query.len() as u32,
            injections_query.len() as u32,
            locals_query.len() as u32,
        );
    }

    let html_scope = c_string("text.html.basic");
    let html_injection_regex = c_string("^html");
    let language = get_language("html");
    let lang_name = c_string("html");
    let queries = get_language_queries_path("html");
    let highlights_query = fs::read_to_string(queries.join("highlights.scm")).unwrap();
    let injections_query = fs::read_to_string(queries.join("injections.scm")).unwrap();
    unsafe {
        c::ts_highlighter_add_language(
            highlighter,
            lang_name.as_ptr(),
            html_scope.as_ptr(),
            html_injection_regex.as_ptr(),
            language,
            highlights_query.as_ptr().cast::<c_char>(),
            injections_query.as_ptr().cast::<c_char>(),
            ptr::null(),
            highlights_query.len() as u32,
            injections_query.len() as u32,
            0,
        );
    }

    let buffer = c::ts_highlight_buffer_new();

    unsafe {
        c::ts_highlighter_highlight(
            highlighter,
            html_scope.as_ptr(),
            source_code.as_ptr(),
            source_code.as_bytes().len() as u32,
            buffer,
            ptr::null_mut(),
        );
    }

    let output_bytes = unsafe { c::ts_highlight_buffer_content(buffer) };
    let output_line_offsets = unsafe { c::ts_highlight_buffer_line_offsets(buffer) };
    let output_len = unsafe { c::ts_highlight_buffer_len(buffer) };
    let output_line_count = unsafe { c::ts_highlight_buffer_line_count(buffer) };

    let output_bytes = unsafe { slice::from_raw_parts(output_bytes, output_len as usize) };
    let output_line_offsets =
        unsafe { slice::from_raw_parts(output_line_offsets, output_line_count as usize) };

    let mut lines = Vec::with_capacity(output_line_count as usize);
    for i in 0..(output_line_count as usize) {
        let line_start = output_line_offsets[i] as usize;
        let line_end = output_line_offsets
            .get(i + 1)
            .map_or(output_bytes.len(), |x| *x as usize);
        lines.push(str::from_utf8(&output_bytes[line_start..line_end]).unwrap());
    }

    assert_eq!(
        lines,
        vec![
            "&lt;<span class=tag>script</span>&gt;\n",
            "<span class=keyword>const</span> a = <span class=function>b</span>(<span class=string>&#39;c&#39;</span>);\n",
            "c.<span class=function>d</span>();\n",
            "&lt;/<span class=tag>script</span>&gt;\n",
        ]
    );

    unsafe {
        c::ts_highlighter_delete(highlighter);
        c::ts_highlight_buffer_delete(buffer);
    }
}

#[test]
fn test_highlighting_with_all_captures_applied() {
    let source = "fn main(a: u32, b: u32) -> { let c = a + b; }";
    let language = get_language("rust");
    let highlights_query = indoc::indoc! {"
        [
          \"fn\"
          \"let\"
        ] @keyword
        (identifier) @variable
        (function_item name: (identifier) @function)
        (parameter pattern: (identifier) @variable.parameter)
        (primitive_type) @type.builtin
        \"=\" @operator
        [ \"->\" \":\" \";\" ] @punctuation.delimiter
        [ \"{\" \"}\" \"(\" \")\" ] @punctuation.bracket
    "};
    let mut rust_highlight_reverse =
        HighlightConfiguration::new(language, "rust", highlights_query, "", "").unwrap();
    rust_highlight_reverse.configure(&HIGHLIGHT_NAMES);

    assert_eq!(
        &to_token_vector(source, &rust_highlight_reverse).unwrap(),
        &[[
            ("fn", vec!["keyword"]),
            (" ", vec![]),
            ("main", vec!["function"]),
            ("(", vec!["punctuation.bracket"]),
            ("a", vec!["variable.parameter"]),
            (":", vec!["punctuation.delimiter"]),
            (" ", vec![]),
            ("u32", vec!["type.builtin"]),
            (", ", vec![]),
            ("b", vec!["variable.parameter"]),
            (":", vec!["punctuation.delimiter"]),
            (" ", vec![]),
            ("u32", vec!["type.builtin"]),
            (")", vec!["punctuation.bracket"]),
            (" ", vec![]),
            ("->", vec!["punctuation.delimiter"]),
            (" ", vec![]),
            ("{", vec!["punctuation.bracket"]),
            (" ", vec![]),
            ("let", vec!["keyword"]),
            (" ", vec![]),
            ("c", vec!["variable"]),
            (" ", vec![]),
            ("=", vec!["operator"]),
            (" ", vec![]),
            ("a", vec!["variable"]),
            (" + ", vec![]),
            ("b", vec!["variable"]),
            (";", vec!["punctuation.delimiter"]),
            (" ", vec![]),
            ("}", vec!["punctuation.bracket"])
        ]],
    );
}

#[test]
fn test_decode_utf8_lossy() {
    use tree_sitter::LossyUtf8;

    let parts = LossyUtf8::new(b"hi").collect::<Vec<_>>();
    assert_eq!(parts, vec!["hi"]);

    let parts = LossyUtf8::new(b"hi\xc0\xc1bye").collect::<Vec<_>>();
    assert_eq!(parts, vec!["hi", "\u{fffd}", "\u{fffd}", "bye"]);

    let parts = LossyUtf8::new(b"\xc0\xc1bye").collect::<Vec<_>>();
    assert_eq!(parts, vec!["\u{fffd}", "\u{fffd}", "bye"]);

    let parts = LossyUtf8::new(b"hello\xc0\xc1").collect::<Vec<_>>();
    assert_eq!(parts, vec!["hello", "\u{fffd}", "\u{fffd}"]);
}

fn c_string(s: &str) -> CString {
    CString::new(s.as_bytes().to_vec()).unwrap()
}

fn test_language_for_injection_string<'a>(string: &str) -> Option<&'a HighlightConfiguration> {
    match string {
        "javascript" => Some(&JS_HIGHLIGHT),
        "html" => Some(&HTML_HIGHLIGHT),
        "rust" => Some(&RUST_HIGHLIGHT),
        "jsdoc" => Some(&JSDOC_HIGHLIGHT),
        _ => None,
    }
}

fn to_html<'a>(
    src: &'a str,
    language_config: &'a HighlightConfiguration,
) -> Result<Vec<String>, Error> {
    let src = src.as_bytes();
    let mut renderer = HtmlRenderer::new();
    let mut highlighter = Highlighter::new();
    let events = highlighter.highlight(
        language_config,
        src,
        None,
        &test_language_for_injection_string,
    )?;

    renderer.set_carriage_return_highlight(
        HIGHLIGHT_NAMES
            .iter()
            .position(|s| s == "carriage-return")
            .map(Highlight),
    );
    renderer
        .render(events, src, &|highlight, output| {
            output.extend(HTML_ATTRS[highlight.0].as_bytes());
        })
        .unwrap();
    Ok(renderer
        .lines()
        .map(std::string::ToString::to_string)
        .collect())
}

#[allow(clippy::type_complexity)]
fn to_token_vector<'a>(
    src: &'a str,
    language_config: &'a HighlightConfiguration,
) -> Result<Vec<Vec<(&'a str, Vec<&'static str>)>>, Error> {
    let src = src.as_bytes();
    let mut highlighter = Highlighter::new();
    let mut lines = Vec::new();
    let mut highlights = Vec::new();
    let mut line = Vec::new();
    let events = highlighter.highlight(
        language_config,
        src,
        None,
        &test_language_for_injection_string,
    )?;
    for event in events {
        match event? {
            HighlightEvent::HighlightStart(s) => highlights.push(HIGHLIGHT_NAMES[s.0].as_str()),
            HighlightEvent::HighlightEnd => {
                highlights.pop();
            }
            HighlightEvent::Source { start, end } => {
                let s = str::from_utf8(&src[start..end]).unwrap();
                for (i, l) in s.split('\n').enumerate() {
                    let l = l.trim_end_matches('\r');
                    if i > 0 {
                        lines.push(std::mem::take(&mut line));
                    }
                    if !l.is_empty() {
                        line.push((l, highlights.clone()));
                    }
                }
            }
        }
    }
    if !line.is_empty() {
        lines.push(line);
    }
    Ok(lines)
}



================================================
FILE: crates/cli/src/tests/language_test.rs
================================================
use tree_sitter::{self, Parser};

use super::helpers::fixtures::get_language;

#[test]
fn test_lookahead_iterator() {
    let mut parser = Parser::new();
    let language = get_language("rust");
    parser.set_language(&language).unwrap();

    let tree = parser.parse("struct Stuff {}", None).unwrap();

    let mut cursor = tree.walk();

    assert!(cursor.goto_first_child()); // struct
    assert!(cursor.goto_first_child()); // struct keyword

    let next_state = cursor.node().next_parse_state();
    assert_ne!(next_state, 0);
    assert_eq!(
        next_state,
        language.next_state(cursor.node().parse_state(), cursor.node().grammar_id())
    );
    assert!((next_state as usize) < language.parse_state_count());
    assert!(cursor.goto_next_sibling()); // type_identifier
    assert_eq!(next_state, cursor.node().parse_state());
    assert_eq!(cursor.node().grammar_name(), "identifier");
    assert_ne!(cursor.node().grammar_id(), cursor.node().kind_id());

    let expected_symbols = ["//", "/*", "identifier", "line_comment", "block_comment"];
    let mut lookahead = language.lookahead_iterator(next_state).unwrap();
    assert_eq!(*lookahead.language(), language);
    assert!(lookahead.iter_names().eq(expected_symbols));

    lookahead.reset_state(next_state);
    assert!(lookahead.iter_names().eq(expected_symbols));

    lookahead.reset(&language, next_state);
    assert!(lookahead
        .map(|s| language.node_kind_for_id(s).unwrap())
        .eq(expected_symbols));
}

#[test]
fn test_lookahead_iterator_modifiable_only_by_mut() {
    let mut parser = Parser::new();
    let language = get_language("rust");
    parser.set_language(&language).unwrap();

    let tree = parser.parse("struct Stuff {}", None).unwrap();

    let mut cursor = tree.walk();

    assert!(cursor.goto_first_child()); // struct
    assert!(cursor.goto_first_child()); // struct keyword

    let next_state = cursor.node().next_parse_state();
    assert_ne!(next_state, 0);

    let mut lookahead = language.lookahead_iterator(next_state).unwrap();
    let _ = lookahead.next();

    let mut names = lookahead.iter_names();
    let _ = names.next();
}

#[test]
fn test_symbol_metadata_checks() {
    let language = get_language("rust");
    for i in 0..language.node_kind_count() {
        let sym = i as u16;
        let name = language.node_kind_for_id(sym).unwrap();
        match name {
            "_type"
            | "_expression"
            | "_pattern"
            | "_literal"
            | "_literal_pattern"
            | "_declaration_statement" => assert!(language.node_kind_is_supertype(sym)),

            "_raw_string_literal_start"
            | "_raw_string_literal_end"
            | "_line_doc_comment"
            | "_error_sentinel" => assert!(!language.node_kind_is_supertype(sym)),

            "enum_item" | "struct_item" | "type_item" => {
                assert!(language.node_kind_is_named(sym));
            }

            "=>" | "[" | "]" | "(" | ")" | "{" | "}" => {
                assert!(language.node_kind_is_visible(sym));
            }

            _ => {}
        }
    }
}

#[test]
fn test_supertypes() {
    let language = get_language("rust");
    let supertypes = language.supertypes();

    if language.abi_version() < 15 {
        return;
    }

    assert_eq!(supertypes.len(), 5);
    assert_eq!(
        supertypes
            .iter()
            .filter_map(|&s| language.node_kind_for_id(s))
            .map(|s| s.to_string())
            .collect::<Vec<String>>(),
        vec![
            "_expression",
            "_literal",
            "_literal_pattern",
            "_pattern",
            "_type"
        ]
    );

    for &supertype in supertypes {
        let mut subtypes = language
            .subtypes_for_supertype(supertype)
            .iter()
            .filter_map(|symbol| language.node_kind_for_id(*symbol))
            .collect::<Vec<&str>>();
        subtypes.sort_unstable();
        subtypes.dedup();

        match language.node_kind_for_id(supertype) {
            Some("_literal") => {
                assert_eq!(
                    subtypes,
                    &[
                        "boolean_literal",
                        "char_literal",
                        "float_literal",
                        "integer_literal",
                        "raw_string_literal",
                        "string_literal"
                    ]
                );
            }
            Some("_pattern") => {
                assert_eq!(
                    subtypes,
                    &[
                        "_",
                        "_literal_pattern",
                        "captured_pattern",
                        "const_block",
                        "generic_pattern",
                        "identifier",
                        "macro_invocation",
                        "mut_pattern",
                        "or_pattern",
                        "range_pattern",
                        "ref_pattern",
                        "reference_pattern",
                        "remaining_field_pattern",
                        "scoped_identifier",
                        "slice_pattern",
                        "struct_pattern",
                        "tuple_pattern",
                        "tuple_struct_pattern",
                    ]
                );
            }
            Some("_type") => {
                assert_eq!(
                    subtypes,
                    &[
                        "abstract_type",
                        "array_type",
                        "bounded_type",
                        "dynamic_type",
                        "function_type",
                        "generic_type",
                        "macro_invocation",
                        "metavariable",
                        "never_type",
                        "pointer_type",
                        "primitive_type",
                        "reference_type",
                        "removed_trait_bound",
                        "scoped_type_identifier",
                        "tuple_type",
                        "type_identifier",
                        "unit_type"
                    ]
                );
            }
            _ => {}
        }
    }
}



================================================
FILE: crates/cli/src/tests/node_test.rs
================================================
use tree_sitter::{Node, Parser, Point, Tree};
use tree_sitter_generate::load_grammar_file;

use super::{
    get_random_edit,
    helpers::fixtures::{fixtures_dir, get_language, get_test_language},
    Rand,
};
use crate::{
    parse::perform_edit,
    tests::{generate_parser, helpers::fixtures::get_test_fixture_language},
};

const JSON_EXAMPLE: &str = r#"

[
  123,
  false,
  {
    "x": null
  }
]
"#;

const GRAMMAR_WITH_ALIASES_AND_EXTRAS: &str = r#"{
  "name": "aliases_and_extras",

  "extras": [
    {"type": "PATTERN", "value": "\\s+"},
    {"type": "SYMBOL", "name": "comment"}
  ],

  "rules": {
    "a": {
      "type": "SEQ",
      "members": [
        {"type": "SYMBOL", "name": "b"},
        {
          "type": "ALIAS",
          "value": "B",
          "named": true,
          "content": {"type": "SYMBOL", "name": "b"}
        },
        {
          "type": "ALIAS",
          "value": "C",
          "named": true,
          "content": {"type": "SYMBOL", "name": "_c"}
        }
      ]
    },

    "b": {"type": "STRING", "value": "b"},

    "_c": {"type": "STRING", "value": "c"},

    "comment": {"type": "STRING", "value": "..."}
  }
}"#;

#[test]
fn test_node_child() {
    let tree = parse_json_example();
    let array_node = tree.root_node().child(0).unwrap();

    assert_eq!(array_node.kind(), "array");
    assert_eq!(array_node.named_child_count(), 3);
    assert_eq!(array_node.start_byte(), JSON_EXAMPLE.find('[').unwrap());
    assert_eq!(array_node.end_byte(), JSON_EXAMPLE.find(']').unwrap() + 1);
    assert_eq!(array_node.start_position(), Point::new(2, 0));
    assert_eq!(array_node.end_position(), Point::new(8, 1));
    assert_eq!(array_node.child_count(), 7);

    let left_bracket_node = array_node.child(0).unwrap();
    let number_node = array_node.child(1).unwrap();
    let comma_node1 = array_node.child(2).unwrap();
    let false_node = array_node.child(3).unwrap();
    let comma_node2 = array_node.child(4).unwrap();
    let object_node = array_node.child(5).unwrap();
    let right_bracket_node = array_node.child(6).unwrap();

    assert_eq!(left_bracket_node.kind(), "[");
    assert_eq!(number_node.kind(), "number");
    assert_eq!(comma_node1.kind(), ",");
    assert_eq!(false_node.kind(), "false");
    assert_eq!(comma_node2.kind(), ",");
    assert_eq!(object_node.kind(), "object");
    assert_eq!(right_bracket_node.kind(), "]");

    assert!(!left_bracket_node.is_named());
    assert!(number_node.is_named());
    assert!(!comma_node1.is_named());
    assert!(false_node.is_named());
    assert!(!comma_node2.is_named());
    assert!(object_node.is_named());
    assert!(!right_bracket_node.is_named());

    assert_eq!(number_node.start_byte(), JSON_EXAMPLE.find("123").unwrap());
    assert_eq!(
        number_node.end_byte(),
        JSON_EXAMPLE.find("123").unwrap() + 3
    );
    assert_eq!(number_node.start_position(), Point::new(3, 2));
    assert_eq!(number_node.end_position(), Point::new(3, 5));

    assert_eq!(false_node.start_byte(), JSON_EXAMPLE.find("false").unwrap());
    assert_eq!(
        false_node.end_byte(),
        JSON_EXAMPLE.find("false").unwrap() + 5
    );
    assert_eq!(false_node.start_position(), Point::new(4, 2));
    assert_eq!(false_node.end_position(), Point::new(4, 7));

    assert_eq!(object_node.start_byte(), JSON_EXAMPLE.find('{').unwrap());
    assert_eq!(object_node.start_position(), Point::new(5, 2));
    assert_eq!(object_node.end_position(), Point::new(7, 3));

    assert_eq!(object_node.child_count(), 3);
    let left_brace_node = object_node.child(0).unwrap();
    let pair_node = object_node.child(1).unwrap();
    let right_brace_node = object_node.child(2).unwrap();

    assert_eq!(left_brace_node.kind(), "{");
    assert_eq!(pair_node.kind(), "pair");
    assert_eq!(right_brace_node.kind(), "}");

    assert!(!left_brace_node.is_named());
    assert!(pair_node.is_named());
    assert!(!right_brace_node.is_named());

    assert_eq!(pair_node.start_byte(), JSON_EXAMPLE.find("\"x\"").unwrap());
    assert_eq!(pair_node.end_byte(), JSON_EXAMPLE.find("null").unwrap() + 4);
    assert_eq!(pair_node.start_position(), Point::new(6, 4));
    assert_eq!(pair_node.end_position(), Point::new(6, 13));

    assert_eq!(pair_node.child_count(), 3);
    let string_node = pair_node.child(0).unwrap();
    let colon_node = pair_node.child(1).unwrap();
    let null_node = pair_node.child(2).unwrap();

    assert_eq!(string_node.kind(), "string");
    assert_eq!(colon_node.kind(), ":");
    assert_eq!(null_node.kind(), "null");

    assert!(string_node.is_named());
    assert!(!colon_node.is_named());
    assert!(null_node.is_named());

    assert_eq!(
        string_node.start_byte(),
        JSON_EXAMPLE.find("\"x\"").unwrap()
    );
    assert_eq!(
        string_node.end_byte(),
        JSON_EXAMPLE.find("\"x\"").unwrap() + 3
    );
    assert_eq!(string_node.start_position(), Point::new(6, 4));
    assert_eq!(string_node.end_position(), Point::new(6, 7));

    assert_eq!(null_node.start_byte(), JSON_EXAMPLE.find("null").unwrap());
    assert_eq!(null_node.end_byte(), JSON_EXAMPLE.find("null").unwrap() + 4);
    assert_eq!(null_node.start_position(), Point::new(6, 9));
    assert_eq!(null_node.end_position(), Point::new(6, 13));

    assert_eq!(string_node.parent().unwrap(), pair_node);
    assert_eq!(null_node.parent().unwrap(), pair_node);
    assert_eq!(pair_node.parent().unwrap(), object_node);
    assert_eq!(number_node.parent().unwrap(), array_node);
    assert_eq!(false_node.parent().unwrap(), array_node);
    assert_eq!(object_node.parent().unwrap(), array_node);
    assert_eq!(array_node.parent().unwrap(), tree.root_node());
    assert_eq!(tree.root_node().parent(), None);

    assert_eq!(
        tree.root_node().child_with_descendant(null_node).unwrap(),
        array_node
    );
    assert_eq!(
        array_node.child_with_descendant(null_node).unwrap(),
        object_node
    );
    assert_eq!(
        object_node.child_with_descendant(null_node).unwrap(),
        pair_node
    );
    assert_eq!(
        pair_node.child_with_descendant(null_node).unwrap(),
        null_node
    );
    assert_eq!(null_node.child_with_descendant(null_node), None);
}

#[test]
fn test_node_children() {
    let tree = parse_json_example();
    let mut cursor = tree.walk();
    let array_node = tree.root_node().child(0).unwrap();
    assert_eq!(
        array_node
            .children(&mut cursor)
            .map(|n| n.kind())
            .collect::<Vec<_>>(),
        &["[", "number", ",", "false", ",", "object", "]",]
    );
    assert_eq!(
        array_node
            .named_children(&mut cursor)
            .map(|n| n.kind())
            .collect::<Vec<_>>(),
        &["number", "false", "object"]
    );
    let object_node = array_node
        .named_children(&mut cursor)
        .find(|n| n.kind() == "object")
        .unwrap();
    assert_eq!(
        object_node
            .children(&mut cursor)
            .map(|n| n.kind())
            .collect::<Vec<_>>(),
        &["{", "pair", "}",]
    );
}

#[test]
fn test_node_children_by_field_name() {
    let mut parser = Parser::new();
    parser.set_language(&get_language("python")).unwrap();
    let source = "
        if one:
            a()
        elif two:
            b()
        elif three:
            c()
        elif four:
            d()
    ";

    let tree = parser.parse(source, None).unwrap();
    let node = tree.root_node().child(0).unwrap();
    assert_eq!(node.kind(), "if_statement");
    let mut cursor = tree.walk();
    let alternatives = node.children_by_field_name("alternative", &mut cursor);
    let alternative_texts =
        alternatives.map(|n| &source[n.child_by_field_name("condition").unwrap().byte_range()]);
    assert_eq!(
        alternative_texts.collect::<Vec<_>>(),
        &["two", "three", "four",]
    );
}

#[test]
fn test_node_parent_of_child_by_field_name() {
    let mut parser = Parser::new();
    parser.set_language(&get_language("javascript")).unwrap();
    let tree = parser.parse("foo(a().b[0].c.d.e())", None).unwrap();
    let call_node = tree
        .root_node()
        .named_child(0)
        .unwrap()
        .named_child(0)
        .unwrap();
    assert_eq!(call_node.kind(), "call_expression");

    // Regression test - when a field points to a hidden node (in this case, `_expression`)
    // the hidden node should not be added to the node parent cache.
    assert_eq!(
        call_node.child_by_field_name("function").unwrap().parent(),
        Some(call_node)
    );
}

#[test]
fn test_parent_of_zero_width_node() {
    let code = "def dupa(foo):";

    let mut parser = Parser::new();
    parser.set_language(&get_language("python")).unwrap();

    let tree = parser.parse(code, None).unwrap();
    let root = tree.root_node();
    let function_definition = root.child(0).unwrap();
    let block = function_definition.child(4).unwrap();
    let block_parent = block.parent().unwrap();

    assert_eq!(block.to_string(), "(block)");
    assert_eq!(block_parent.kind(), "function_definition");
    assert_eq!(block_parent.to_string(), "(function_definition name: (identifier) parameters: (parameters (identifier)) body: (block))");

    assert_eq!(
        root.child_with_descendant(block).unwrap(),
        function_definition
    );
    assert_eq!(
        function_definition.child_with_descendant(block).unwrap(),
        block
    );
    assert_eq!(block.child_with_descendant(block), None);

    let code = "<script></script>";
    parser.set_language(&get_language("html")).unwrap();

    let tree = parser.parse(code, None).unwrap();
    let root = tree.root_node();
    let script_element = root.child(0).unwrap();
    let raw_text = script_element.child(1).unwrap();
    let parent = raw_text.parent().unwrap();
    assert_eq!(parent, script_element);
}

#[test]
fn test_next_sibling_of_zero_width_node() {
    let mut parser = Parser::new();
    let language = get_test_fixture_language("next_sibling_from_zwt");
    parser.set_language(&language).unwrap();

    let tree = parser.parse("abdef", None).unwrap();

    let root_node = tree.root_node();
    let missing_c = root_node.child(2).unwrap();
    assert!(missing_c.is_missing());
    assert_eq!(missing_c.kind(), "c");
    let node_d = root_node.child(3).unwrap();
    assert_eq!(missing_c.next_sibling().unwrap(), node_d);

    let prev_sibling = node_d.prev_sibling().unwrap();
    assert_eq!(prev_sibling, missing_c);
}

#[test]
fn test_first_child_for_offset() {
    let mut parser = Parser::new();
    parser.set_language(&get_language("javascript")).unwrap();
    let tree = parser.parse("x10 + 100", None).unwrap();
    let sum_node = tree.root_node().child(0).unwrap().child(0).unwrap();

    assert_eq!(
        sum_node.first_child_for_byte(0).unwrap().kind(),
        "identifier"
    );
    assert_eq!(
        sum_node.first_child_for_byte(1).unwrap().kind(),
        "identifier"
    );
    assert_eq!(sum_node.first_child_for_byte(3).unwrap().kind(), "+");
    assert_eq!(sum_node.first_child_for_byte(5).unwrap().kind(), "number");
}

#[test]
fn test_first_named_child_for_offset() {
    let mut parser = Parser::new();
    parser.set_language(&get_language("javascript")).unwrap();
    let tree = parser.parse("x10 + 100", None).unwrap();
    let sum_node = tree.root_node().child(0).unwrap().child(0).unwrap();

    assert_eq!(
        sum_node.first_named_child_for_byte(0).unwrap().kind(),
        "identifier"
    );
    assert_eq!(
        sum_node.first_named_child_for_byte(1).unwrap().kind(),
        "identifier"
    );
    assert_eq!(
        sum_node.first_named_child_for_byte(3).unwrap().kind(),
        "number"
    );
}

#[test]
fn test_node_field_name_for_child() {
    let mut parser = Parser::new();
    parser.set_language(&get_language("c")).unwrap();
    let tree = parser
        .parse("int w = x + /* y is special! */ y;", None)
        .unwrap();
    let translation_unit_node = tree.root_node();
    let declaration_node = translation_unit_node.named_child(0).unwrap();

    let binary_expression_node = declaration_node
        .child_by_field_name("declarator")
        .unwrap()
        .child_by_field_name("value")
        .unwrap();

    // -------------------
    // left: (identifier)  0
    // operator: "+"       1 <--- (not a named child)
    // (comment)           2 <--- (is an extra)
    // right: (identifier) 3
    // -------------------

    assert_eq!(binary_expression_node.field_name_for_child(0), Some("left"));
    assert_eq!(
        binary_expression_node.field_name_for_child(1),
        Some("operator")
    );
    // The comment should not have a field name, as it's just an extra
    assert_eq!(binary_expression_node.field_name_for_child(2), None);
    assert_eq!(
        binary_expression_node.field_name_for_child(3),
        Some("right")
    );
    // Negative test - Not a valid child index
    assert_eq!(binary_expression_node.field_name_for_child(4), None);
}

#[test]
fn test_node_field_name_for_named_child() {
    let mut parser = Parser::new();
    parser.set_language(&get_language("c")).unwrap();
    let tree = parser
        .parse("int w = x + /* y is special! */ y;", None)
        .unwrap();
    let translation_unit_node = tree.root_node();
    let declaration_node = translation_unit_node.named_child(0).unwrap();

    let binary_expression_node = declaration_node
        .child_by_field_name("declarator")
        .unwrap()
        .child_by_field_name("value")
        .unwrap();

    // -------------------
    // left: (identifier)  0
    // operator: "+"       _ <--- (not a named child)
    // (comment)           1 <--- (is an extra)
    // right: (identifier) 2
    // -------------------

    assert_eq!(
        binary_expression_node.field_name_for_named_child(0),
        Some("left")
    );
    // The comment should not have a field name, as it's just an extra
    assert_eq!(binary_expression_node.field_name_for_named_child(1), None);
    // The operator is not a named child, so the named child at index 2 is the right child
    assert_eq!(
        binary_expression_node.field_name_for_named_child(2),
        Some("right")
    );
    // Negative test - Not a valid child index
    assert_eq!(binary_expression_node.field_name_for_named_child(3), None);
}

#[test]
fn test_node_child_by_field_name_with_extra_hidden_children() {
    let mut parser = Parser::new();
    parser.set_language(&get_language("python")).unwrap();

    // In the Python grammar, some fields are applied to `suite` nodes,
    // which consist of an invisible `indent` token followed by a block.
    // Check that when searching for a child with a field name, we don't
    //
    let tree = parser.parse("while a:\n  pass", None).unwrap();
    let while_node = tree.root_node().child(0).unwrap();
    assert_eq!(while_node.kind(), "while_statement");
    assert_eq!(
        while_node.child_by_field_name("body").unwrap(),
        while_node.child(3).unwrap(),
    );
}

#[test]
fn test_node_named_child() {
    let tree = parse_json_example();
    let array_node = tree.root_node().child(0).unwrap();

    let number_node = array_node.named_child(0).unwrap();
    let false_node = array_node.named_child(1).unwrap();
    let object_node = array_node.named_child(2).unwrap();

    assert_eq!(number_node.kind(), "number");
    assert_eq!(number_node.start_byte(), JSON_EXAMPLE.find("123").unwrap());
    assert_eq!(
        number_node.end_byte(),
        JSON_EXAMPLE.find("123").unwrap() + 3
    );
    assert_eq!(number_node.start_position(), Point::new(3, 2));
    assert_eq!(number_node.end_position(), Point::new(3, 5));

    assert_eq!(false_node.kind(), "false");
    assert_eq!(false_node.start_byte(), JSON_EXAMPLE.find("false").unwrap());
    assert_eq!(
        false_node.end_byte(),
        JSON_EXAMPLE.find("false").unwrap() + 5
    );
    assert_eq!(false_node.start_position(), Point::new(4, 2));
    assert_eq!(false_node.end_position(), Point::new(4, 7));

    assert_eq!(object_node.kind(), "object");
    assert_eq!(object_node.start_byte(), JSON_EXAMPLE.find('{').unwrap());
    assert_eq!(object_node.start_position(), Point::new(5, 2));
    assert_eq!(object_node.end_position(), Point::new(7, 3));

    assert_eq!(object_node.named_child_count(), 1);

    let pair_node = object_node.named_child(0).unwrap();
    assert_eq!(pair_node.kind(), "pair");
    assert_eq!(pair_node.start_byte(), JSON_EXAMPLE.find("\"x\"").unwrap());
    assert_eq!(pair_node.end_byte(), JSON_EXAMPLE.find("null").unwrap() + 4);
    assert_eq!(pair_node.start_position(), Point::new(6, 4));
    assert_eq!(pair_node.end_position(), Point::new(6, 13));

    let string_node = pair_node.named_child(0).unwrap();
    let null_node = pair_node.named_child(1).unwrap();

    assert_eq!(string_node.kind(), "string");
    assert_eq!(null_node.kind(), "null");

    assert_eq!(
        string_node.start_byte(),
        JSON_EXAMPLE.find("\"x\"").unwrap()
    );
    assert_eq!(
        string_node.end_byte(),
        JSON_EXAMPLE.find("\"x\"").unwrap() + 3
    );
    assert_eq!(string_node.start_position(), Point::new(6, 4));
    assert_eq!(string_node.end_position(), Point::new(6, 7));

    assert_eq!(null_node.start_byte(), JSON_EXAMPLE.find("null").unwrap());
    assert_eq!(null_node.end_byte(), JSON_EXAMPLE.find("null").unwrap() + 4);
    assert_eq!(null_node.start_position(), Point::new(6, 9));
    assert_eq!(null_node.end_position(), Point::new(6, 13));

    assert_eq!(string_node.parent().unwrap(), pair_node);
    assert_eq!(null_node.parent().unwrap(), pair_node);
    assert_eq!(pair_node.parent().unwrap(), object_node);
    assert_eq!(number_node.parent().unwrap(), array_node);
    assert_eq!(false_node.parent().unwrap(), array_node);
    assert_eq!(object_node.parent().unwrap(), array_node);
    assert_eq!(array_node.parent().unwrap(), tree.root_node());
    assert_eq!(tree.root_node().parent(), None);

    assert_eq!(
        tree.root_node().child_with_descendant(null_node).unwrap(),
        array_node
    );
    assert_eq!(
        array_node.child_with_descendant(null_node).unwrap(),
        object_node
    );
    assert_eq!(
        object_node.child_with_descendant(null_node).unwrap(),
        pair_node
    );
    assert_eq!(
        pair_node.child_with_descendant(null_node).unwrap(),
        null_node
    );
    assert_eq!(null_node.child_with_descendant(null_node), None);
}

#[test]
fn test_node_named_child_with_aliases_and_extras() {
    let (parser_name, parser_code) = generate_parser(GRAMMAR_WITH_ALIASES_AND_EXTRAS).unwrap();

    let mut parser = Parser::new();
    parser
        .set_language(&get_test_language(&parser_name, &parser_code, None))
        .unwrap();

    let tree = parser.parse("b ... b ... c", None).unwrap();
    let root = tree.root_node();
    assert_eq!(root.to_sexp(), "(a (b) (comment) (B) (comment) (C))");
    assert_eq!(root.named_child_count(), 5);
    assert_eq!(root.named_child(0).unwrap().kind(), "b");
    assert_eq!(root.named_child(1).unwrap().kind(), "comment");
    assert_eq!(root.named_child(2).unwrap().kind(), "B");
    assert_eq!(root.named_child(3).unwrap().kind(), "comment");
    assert_eq!(root.named_child(4).unwrap().kind(), "C");
}

#[test]
fn test_node_descendant_count() {
    let tree = parse_json_example();
    let value_node = tree.root_node();
    let all_nodes = get_all_nodes(&tree);

    assert_eq!(value_node.descendant_count(), all_nodes.len());

    let mut cursor = value_node.walk();
    for (i, node) in all_nodes.iter().enumerate() {
        cursor.goto_descendant(i);
        assert_eq!(cursor.node(), *node, "index {i}");
    }

    for (i, node) in all_nodes.iter().enumerate().rev() {
        cursor.goto_descendant(i);
        assert_eq!(cursor.node(), *node, "rev index {i}");
    }
}

#[test]
fn test_descendant_count_single_node_tree() {
    let mut parser = Parser::new();
    parser
        .set_language(&get_language("embedded-template"))
        .unwrap();
    let tree = parser.parse("hello", None).unwrap();

    let nodes = get_all_nodes(&tree);
    assert_eq!(nodes.len(), 2);
    assert_eq!(tree.root_node().descendant_count(), 2);

    let mut cursor = tree.root_node().walk();

    cursor.goto_descendant(0);
    assert_eq!(cursor.depth(), 0);
    assert_eq!(cursor.node(), nodes[0]);
    cursor.goto_descendant(1);
    assert_eq!(cursor.depth(), 1);
    assert_eq!(cursor.node(), nodes[1]);
}

#[test]
fn test_node_descendant_for_range() {
    let tree = parse_json_example();
    let array_node = tree.root_node();

    // Leaf node exactly matches the given bounds - byte query
    let colon_index = JSON_EXAMPLE.find(':').unwrap();
    let colon_node = array_node
        .descendant_for_byte_range(colon_index, colon_index + 1)
        .unwrap();
    assert_eq!(colon_node.kind(), ":");
    assert_eq!(colon_node.start_byte(), colon_index);
    assert_eq!(colon_node.end_byte(), colon_index + 1);
    assert_eq!(colon_node.start_position(), Point::new(6, 7));
    assert_eq!(colon_node.end_position(), Point::new(6, 8));

    // Leaf node exactly matches the given bounds - point query
    let colon_node = array_node
        .descendant_for_point_range(Point::new(6, 7), Point::new(6, 8))
        .unwrap();
    assert_eq!(colon_node.kind(), ":");
    assert_eq!(colon_node.start_byte(), colon_index);
    assert_eq!(colon_node.end_byte(), colon_index + 1);
    assert_eq!(colon_node.start_position(), Point::new(6, 7));
    assert_eq!(colon_node.end_position(), Point::new(6, 8));

    // The given point is between two adjacent leaf nodes - byte query
    let colon_index = JSON_EXAMPLE.find(':').unwrap();
    let colon_node = array_node
        .descendant_for_byte_range(colon_index, colon_index)
        .unwrap();
    assert_eq!(colon_node.kind(), ":");
    assert_eq!(colon_node.start_byte(), colon_index);
    assert_eq!(colon_node.end_byte(), colon_index + 1);
    assert_eq!(colon_node.start_position(), Point::new(6, 7));
    assert_eq!(colon_node.end_position(), Point::new(6, 8));

    // The given point is between two adjacent leaf nodes - point query
    let colon_node = array_node
        .descendant_for_point_range(Point::new(6, 7), Point::new(6, 7))
        .unwrap();
    assert_eq!(colon_node.kind(), ":");
    assert_eq!(colon_node.start_byte(), colon_index);
    assert_eq!(colon_node.end_byte(), colon_index + 1);
    assert_eq!(colon_node.start_position(), Point::new(6, 7));
    assert_eq!(colon_node.end_position(), Point::new(6, 8));

    // Leaf node starts at the lower bound, ends after the upper bound - byte query
    let string_index = JSON_EXAMPLE.find("\"x\"").unwrap();
    let string_node = array_node
        .descendant_for_byte_range(string_index, string_index + 2)
        .unwrap();
    assert_eq!(string_node.kind(), "string");
    assert_eq!(string_node.start_byte(), string_index);
    assert_eq!(string_node.end_byte(), string_index + 3);
    assert_eq!(string_node.start_position(), Point::new(6, 4));
    assert_eq!(string_node.end_position(), Point::new(6, 7));

    // Leaf node starts at the lower bound, ends after the upper bound - point query
    let string_node = array_node
        .descendant_for_point_range(Point::new(6, 4), Point::new(6, 6))
        .unwrap();
    assert_eq!(string_node.kind(), "string");
    assert_eq!(string_node.start_byte(), string_index);
    assert_eq!(string_node.end_byte(), string_index + 3);
    assert_eq!(string_node.start_position(), Point::new(6, 4));
    assert_eq!(string_node.end_position(), Point::new(6, 7));

    // Leaf node starts before the lower bound, ends at the upper bound - byte query
    let null_index = JSON_EXAMPLE.find("null").unwrap();
    let null_node = array_node
        .descendant_for_byte_range(null_index + 1, null_index + 4)
        .unwrap();
    assert_eq!(null_node.kind(), "null");
    assert_eq!(null_node.start_byte(), null_index);
    assert_eq!(null_node.end_byte(), null_index + 4);
    assert_eq!(null_node.start_position(), Point::new(6, 9));
    assert_eq!(null_node.end_position(), Point::new(6, 13));

    // Leaf node starts before the lower bound, ends at the upper bound - point query
    let null_node = array_node
        .descendant_for_point_range(Point::new(6, 11), Point::new(6, 13))
        .unwrap();
    assert_eq!(null_node.kind(), "null");
    assert_eq!(null_node.start_byte(), null_index);
    assert_eq!(null_node.end_byte(), null_index + 4);
    assert_eq!(null_node.start_position(), Point::new(6, 9));
    assert_eq!(null_node.end_position(), Point::new(6, 13));

    // The bounds span multiple leaf nodes - return the smallest node that does span it.
    let pair_node = array_node
        .descendant_for_byte_range(string_index + 2, string_index + 4)
        .unwrap();
    assert_eq!(pair_node.kind(), "pair");
    assert_eq!(pair_node.start_byte(), string_index);
    assert_eq!(pair_node.end_byte(), string_index + 9);
    assert_eq!(pair_node.start_position(), Point::new(6, 4));
    assert_eq!(pair_node.end_position(), Point::new(6, 13));

    assert_eq!(colon_node.parent(), Some(pair_node));

    // no leaf spans the given range - return the smallest node that does span it.
    let pair_node = array_node
        .named_descendant_for_point_range(Point::new(6, 6), Point::new(6, 8))
        .unwrap();
    assert_eq!(pair_node.kind(), "pair");
    assert_eq!(pair_node.start_byte(), string_index);
    assert_eq!(pair_node.end_byte(), string_index + 9);
    assert_eq!(pair_node.start_position(), Point::new(6, 4));
    assert_eq!(pair_node.end_position(), Point::new(6, 13));

    // Zero-width token
    {
        let code = "<script></script>";
        let mut parser = Parser::new();
        parser.set_language(&get_language("html")).unwrap();

        let tree = parser.parse(code, None).unwrap();
        let root = tree.root_node();

        let child = root
            .named_descendant_for_point_range(Point::new(0, 8), Point::new(0, 8))
            .unwrap();
        assert_eq!(child.kind(), "raw_text");

        let child2 = root.named_descendant_for_byte_range(8, 8).unwrap();
        assert_eq!(child2.kind(), "raw_text");

        assert_eq!(child, child2);
    }

    // Negative test, start > end
    assert_eq!(array_node.descendant_for_byte_range(1, 0), None);
    assert_eq!(
        array_node.descendant_for_point_range(Point::new(6, 8), Point::new(6, 7)),
        None
    );
}

#[test]
fn test_node_edit() {
    let mut code = JSON_EXAMPLE.as_bytes().to_vec();
    let mut tree = parse_json_example();
    let mut rand = Rand::new(0);

    for _ in 0..10 {
        let mut nodes_before = get_all_nodes(&tree);

        let edit = get_random_edit(&mut rand, &code);
        let mut tree2 = tree.clone();
        let edit = perform_edit(&mut tree2, &mut code, &edit).unwrap();
        for node in &mut nodes_before {
            node.edit(&edit);
        }

        let nodes_after = get_all_nodes(&tree2);
        for (i, node) in nodes_before.into_iter().enumerate() {
            assert_eq!(
                (node.kind(), node.start_byte(), node.start_position()),
                (
                    nodes_after[i].kind(),
                    nodes_after[i].start_byte(),
                    nodes_after[i].start_position()
                ),
            );
        }

        tree = tree2;
    }
}

#[test]
fn test_root_node_with_offset() {
    let mut parser = Parser::new();
    parser.set_language(&get_language("javascript")).unwrap();
    let tree = parser.parse("  if (a) b", None).unwrap();

    let node = tree.root_node_with_offset(6, Point::new(2, 2));
    assert_eq!(node.byte_range(), 8..16);
    assert_eq!(node.start_position(), Point::new(2, 4));
    assert_eq!(node.end_position(), Point::new(2, 12));

    let child = node.child(0).unwrap().child(2).unwrap();
    assert_eq!(child.kind(), "expression_statement");
    assert_eq!(child.byte_range(), 15..16);
    assert_eq!(child.start_position(), Point::new(2, 11));
    assert_eq!(child.end_position(), Point::new(2, 12));

    let mut cursor = node.walk();
    cursor.goto_first_child();
    cursor.goto_first_child();
    cursor.goto_next_sibling();
    let child = cursor.node();
    assert_eq!(child.kind(), "parenthesized_expression");
    assert_eq!(child.byte_range(), 11..14);
    assert_eq!(child.start_position(), Point::new(2, 7));
    assert_eq!(child.end_position(), Point::new(2, 10));
}

#[test]
fn test_node_is_extra() {
    let mut parser = Parser::new();
    parser.set_language(&get_language("javascript")).unwrap();
    let tree = parser.parse("foo(/* hi */);", None).unwrap();

    let root_node = tree.root_node();
    let comment_node = root_node.descendant_for_byte_range(7, 7).unwrap();

    assert_eq!(root_node.kind(), "program");
    assert_eq!(comment_node.kind(), "comment");
    assert!(!root_node.is_extra());
    assert!(comment_node.is_extra());
}

#[test]
fn test_node_is_error() {
    let mut parser = Parser::new();
    parser.set_language(&get_language("javascript")).unwrap();
    let tree = parser.parse("foo(", None).unwrap();
    let root_node = tree.root_node();
    assert_eq!(root_node.kind(), "program");
    assert!(root_node.has_error());

    let child = root_node.child(0).unwrap();
    assert_eq!(child.kind(), "ERROR");
    assert!(child.is_error());
}

#[test]
fn test_node_sexp() {
    let mut parser = Parser::new();
    parser.set_language(&get_language("javascript")).unwrap();
    let tree = parser.parse("if (a) b", None).unwrap();
    let root_node = tree.root_node();
    let if_node = root_node.descendant_for_byte_range(0, 0).unwrap();
    let paren_node = root_node.descendant_for_byte_range(3, 3).unwrap();
    let identifier_node = root_node.descendant_for_byte_range(4, 4).unwrap();
    assert_eq!(if_node.kind(), "if");
    assert_eq!(if_node.to_sexp(), "(\"if\")");
    assert_eq!(paren_node.kind(), "(");
    assert_eq!(paren_node.to_sexp(), "(\"(\")");
    assert_eq!(identifier_node.kind(), "identifier");
    assert_eq!(identifier_node.to_sexp(), "(identifier)");
}

#[test]
fn test_node_field_names() {
    let (parser_name, parser_code) = generate_parser(
        r#"
        {
            "name": "test_grammar_with_fields",
            "extras": [
                {"type": "PATTERN", "value": "\\s+"}
            ],
            "rules": {
                "rule_a": {
                    "type": "SEQ",
                    "members": [
                        {
                            "type": "FIELD",
                            "name": "field_1",
                            "content": {"type": "STRING", "value": "child-0"}
                        },
                        {
                            "type": "CHOICE",
                            "members": [
                                {"type": "STRING", "value": "child-1"},
                                {"type": "BLANK"},

                                // This isn't used in the test, but prevents `_hidden_rule1`
                                // from being eliminated as a unit reduction.
                                {
                                    "type": "ALIAS",
                                    "value": "x",
                                    "named": true,
                                    "content": {
                                        "type": "SYMBOL",
                                        "name": "_hidden_rule1"
                                    }
                                }
                            ]
                        },
                        {
                            "type": "FIELD",
                            "name": "field_2",
                            "content": {"type": "SYMBOL", "name": "_hidden_rule1"}
                        },
                        {"type": "SYMBOL", "name": "_hidden_rule2"}
                    ]
                },

                // Fields pointing to hidden nodes with a single child resolve to the child.
                "_hidden_rule1": {
                    "type": "CHOICE",
                    "members": [
                        {"type": "STRING", "value": "child-2"},
                        {"type": "STRING", "value": "child-2.5"}
                    ]
                },

                // Fields within hidden nodes can be referenced through the parent node.
                "_hidden_rule2": {
                    "type": "SEQ",
                    "members": [
                        {"type": "STRING", "value": "child-3"},
                        {
                            "type": "FIELD",
                            "name": "field_3",
                            "content": {"type": "STRING", "value": "child-4"}
                        }
                    ]
                }
            }
        }
    "#,
    )
    .unwrap();

    let mut parser = Parser::new();
    let language = get_test_language(&parser_name, &parser_code, None);
    parser.set_language(&language).unwrap();

    let tree = parser
        .parse("child-0 child-1 child-2 child-3 child-4", None)
        .unwrap();
    let root_node = tree.root_node();

    assert_eq!(root_node.child_by_field_name("field_1"), root_node.child(0));
    assert_eq!(root_node.child_by_field_name("field_2"), root_node.child(2));
    assert_eq!(root_node.child_by_field_name("field_3"), root_node.child(4));
    assert_eq!(
        root_node.child(0).unwrap().child_by_field_name("field_1"),
        None
    );
    assert_eq!(root_node.child_by_field_name("not_a_real_field"), None);

    let mut cursor = root_node.walk();
    assert_eq!(cursor.field_name(), None);
    cursor.goto_first_child();
    assert_eq!(cursor.node().kind(), "child-0");
    assert_eq!(cursor.field_name(), Some("field_1"));
    cursor.goto_next_sibling();
    assert_eq!(cursor.node().kind(), "child-1");
    assert_eq!(cursor.field_name(), None);
    cursor.goto_next_sibling();
    assert_eq!(cursor.node().kind(), "child-2");
    assert_eq!(cursor.field_name(), Some("field_2"));
    cursor.goto_next_sibling();
    assert_eq!(cursor.node().kind(), "child-3");
    assert_eq!(cursor.field_name(), None);
    cursor.goto_next_sibling();
    assert_eq!(cursor.node().kind(), "child-4");
    assert_eq!(cursor.field_name(), Some("field_3"));
}

#[test]
fn test_node_field_calls_in_language_without_fields() {
    let (parser_name, parser_code) = generate_parser(
        r#"
        {
            "name": "test_grammar_with_no_fields",
            "extras": [
                {"type": "PATTERN", "value": "\\s+"}
            ],
            "rules": {
                "a": {
                    "type": "SEQ",
                    "members": [
                        {
                            "type": "STRING",
                            "value": "b"
                        },
                        {
                            "type": "STRING",
                            "value": "c"
                        },
                        {
                            "type": "STRING",
                            "value": "d"
                        }
                    ]
                }
            }
        }
    "#,
    )
    .unwrap();

    let mut parser = Parser::new();
    let language = get_test_language(&parser_name, &parser_code, None);
    parser.set_language(&language).unwrap();

    let tree = parser.parse("b c d", None).unwrap();

    let root_node = tree.root_node();
    assert_eq!(root_node.kind(), "a");
    assert_eq!(root_node.child_by_field_name("something"), None);

    let mut cursor = root_node.walk();
    assert_eq!(cursor.field_name(), None);
    assert!(cursor.goto_first_child());
    assert_eq!(cursor.field_name(), None);
}

#[test]
fn test_node_is_named_but_aliased_as_anonymous() {
    let grammar_json = load_grammar_file(
        &fixtures_dir()
            .join("test_grammars")
            .join("named_rule_aliased_as_anonymous")
            .join("grammar.js"),
        None,
    )
    .unwrap();

    let (parser_name, parser_code) = generate_parser(&grammar_json).unwrap();

    let mut parser = Parser::new();
    let language = get_test_language(&parser_name, &parser_code, None);
    parser.set_language(&language).unwrap();

    let tree = parser.parse("B C B", None).unwrap();

    let root_node = tree.root_node();
    assert!(!root_node.has_error());
    assert_eq!(root_node.child_count(), 3);
    assert_eq!(root_node.named_child_count(), 2);

    let aliased = root_node.child(0).unwrap();
    assert!(!aliased.is_named());
    assert_eq!(aliased.kind(), "the-alias");

    assert_eq!(root_node.named_child(0).unwrap().kind(), "c");
}

#[test]
fn test_node_numeric_symbols_respect_simple_aliases() {
    let mut parser = Parser::new();
    parser.set_language(&get_language("python")).unwrap();

    // Example 1:
    // Python argument lists can contain "splat" arguments, which are not allowed
    // within other expressions. This includes `parenthesized_list_splat` nodes
    // like `(*b)`. These `parenthesized_list_splat` nodes are aliased as
    // `parenthesized_expression`. Their numeric `symbol`, aka `kind_id` should
    // match that of a normal `parenthesized_expression`.
    let tree = parser.parse("(a((*b)))", None).unwrap();
    let root = tree.root_node();
    assert_eq!(
        root.to_sexp(),
        "(module (expression_statement (parenthesized_expression (call function: (identifier) arguments: (argument_list (parenthesized_expression (list_splat (identifier))))))))",
    );

    let outer_expr_node = root.child(0).unwrap().child(0).unwrap();
    assert_eq!(outer_expr_node.kind(), "parenthesized_expression");

    let inner_expr_node = outer_expr_node
        .named_child(0)
        .unwrap()
        .child_by_field_name("arguments")
        .unwrap()
        .named_child(0)
        .unwrap();
    assert_eq!(inner_expr_node.kind(), "parenthesized_expression");
    assert_eq!(inner_expr_node.kind_id(), outer_expr_node.kind_id());

    // Example 2:
    // Ruby handles the unary (negative) and binary (minus) `-` operators using two
    // different tokens. One or more of these is an external token that's
    // aliased as `-`. Their numeric kind ids should match.
    parser.set_language(&get_language("ruby")).unwrap();
    let tree = parser.parse("-a - b", None).unwrap();
    let root = tree.root_node();
    assert_eq!(
        root.to_sexp(),
        "(program (binary left: (unary operand: (identifier)) right: (identifier)))",
    );

    let binary_node = root.child(0).unwrap();
    assert_eq!(binary_node.kind(), "binary");

    let unary_minus_node = binary_node
        .child_by_field_name("left")
        .unwrap()
        .child(0)
        .unwrap();
    assert_eq!(unary_minus_node.kind(), "-");

    let binary_minus_node = binary_node.child_by_field_name("operator").unwrap();
    assert_eq!(binary_minus_node.kind(), "-");
    assert_eq!(unary_minus_node.kind_id(), binary_minus_node.kind_id());
}

#[test]
fn test_hidden_zero_width_node_with_visible_child() {
    let code = r"
class Foo {
  std::
private:
  std::string s;
};
";

    let mut parser = Parser::new();
    parser.set_language(&get_language("cpp")).unwrap();
    let tree = parser.parse(code, None).unwrap();
    let root = tree.root_node();

    let class_specifier = root.child(0).unwrap();
    let field_decl_list = class_specifier.child_by_field_name("body").unwrap();
    let field_decl = field_decl_list.named_child(0).unwrap();
    let field_ident = field_decl.child_by_field_name("declarator").unwrap();
    assert_eq!(
        field_decl.child_with_descendant(field_ident).unwrap(),
        field_ident
    );
}

fn get_all_nodes(tree: &Tree) -> Vec<Node> {
    let mut result = Vec::new();
    let mut visited_children = false;
    let mut cursor = tree.walk();
    loop {
        if !visited_children {
            result.push(cursor.node());
            if !cursor.goto_first_child() {
                visited_children = true;
            }
        } else if cursor.goto_next_sibling() {
            visited_children = false;
        } else if !cursor.goto_parent() {
            break;
        }
    }
    result
}

fn parse_json_example() -> Tree {
    let mut parser = Parser::new();
    parser.set_language(&get_language("json")).unwrap();
    parser.parse(JSON_EXAMPLE, None).unwrap()
}



================================================
FILE: crates/cli/src/tests/parser_hang_test.rs
================================================
// For some reasons `Command::spawn` doesn't work in CI env for many exotic arches.
#![cfg(all(any(target_arch = "x86_64", target_arch = "x86"), not(sanitizing)))]

use std::{
    env::VarError,
    process::{Command, Stdio},
};

use tree_sitter::Parser;
use tree_sitter_generate::load_grammar_file;

use super::generate_parser;
use crate::tests::helpers::fixtures::{fixtures_dir, get_test_language};

// The `sanitizing` cfg is required to don't run tests under specific sunitizer
// because they don't work well with subprocesses _(it's an assumption)_.
//
// Below are two alternative examples of how to disable tests for some arches
// if a way with excluding the whole mod from compilation wouldn't work well.
//
// XXX: Also may be it makes sense to keep such tests as ignored by default
//      to omit surprises and enable them on CI by passing an extra option explicitly:
//
//        > cargo test -- --include-ignored
//
// #[cfg(all(any(target_arch = "x86_64", target_arch = "x86"), not(sanitizing)))]
// #[cfg_attr(not(all(any(target_arch = "x86_64", target_arch = "x86"), not(sanitizing))), ignore)]
//
#[test]
fn test_grammar_that_should_hang_and_not_segfault() {
    let parent_sleep_millis = 1000;
    let test_name = "test_grammar_that_should_hang_and_not_segfault";
    let test_var = "CARGO_HANG_TEST";

    eprintln!("  {test_name}");

    let tests_exec_path = std::env::args()
        .next()
        .expect("Failed to get tests executable path");

    match std::env::var(test_var) {
        Ok(v) if v == test_name => {
            eprintln!("    child process id {}", std::process::id());
            hang_test();
        }

        Err(VarError::NotPresent) => {
            eprintln!("    parent process id {}", std::process::id());
            let mut command = Command::new(tests_exec_path);
            command.arg(test_name).env(test_var, test_name);

            if std::env::args().any(|x| x == "--nocapture") {
                command.arg("--nocapture");
            } else {
                command.stdout(Stdio::null()).stderr(Stdio::null());
            }

            match command.spawn() {
                Ok(mut child) => {
                    std::thread::sleep(std::time::Duration::from_millis(parent_sleep_millis));
                    match child.try_wait() {
                        Ok(Some(status)) if status.success() => {
                            panic!("Child didn't hang and exited successfully")
                        }
                        Ok(Some(status)) => panic!(
                            "Child didn't hang and exited with status code: {:?}",
                            status.code()
                        ),
                        _ => (),
                    }
                    if let Err(e) = child.kill() {
                        eprintln!(
                            "Failed to kill hang test's process id: {}, error: {e}",
                            child.id()
                        );
                    }
                }
                Err(e) => panic!("{e}"),
            }
        }

        Err(e) => panic!("Env var error: {e}"),

        _ => unreachable!(),
    }
}

fn hang_test() {
    let test_grammar_dir = fixtures_dir()
        .join("test_grammars")
        .join("get_col_should_hang_not_crash");

    let grammar_json = load_grammar_file(&test_grammar_dir.join("grammar.js"), None).unwrap();
    let (parser_name, parser_code) = generate_parser(grammar_json.as_str()).unwrap();

    let language = get_test_language(&parser_name, &parser_code, Some(test_grammar_dir.as_path()));

    let mut parser = Parser::new();
    parser.set_language(&language).unwrap();

    let code_that_should_hang = "\nHello";

    parser.parse(code_that_should_hang, None).unwrap();
}



================================================
FILE: crates/cli/src/tests/pathological_test.rs
================================================
use tree_sitter::Parser;

use super::helpers::{allocations, fixtures::get_language};

#[test]
fn test_pathological_example_1() {
    let language = "cpp";
    let source = r#"*ss<s"ss<sqXqss<s._<s<sq<(qqX<sqss<s.ss<sqsssq<(qss<qssqXqss<s._<s<sq<(qqX<sqss<s.ss<sqsssq<(qss<sqss<sqss<s._<s<sq>(qqX<sqss<s.ss<sqsssq<(qss<sq&=ss<s<sqss<s._<s<sq<(qqX<sqss<s.ss<sqs"#;

    allocations::record(|| {
        let mut parser = Parser::new();
        parser.set_language(&get_language(language)).unwrap();
        parser.parse(source, None).unwrap();
    });
}



================================================
FILE: crates/cli/src/tests/tags_test.rs
================================================
use std::{
    ffi::{CStr, CString},
    fs, ptr, slice, str,
};

use tree_sitter::Point;
use tree_sitter_tags::{c_lib as c, Error, TagsConfiguration, TagsContext};

use super::helpers::{
    allocations,
    fixtures::{get_language, get_language_queries_path},
};

const PYTHON_TAG_QUERY: &str = r#"
(
  (function_definition
    name: (identifier) @name
    body: (block . (expression_statement (string) @doc))) @definition.function
  (#strip! @doc "(^['\"\\s]*)|(['\"\\s]*$)")
)

(function_definition
  name: (identifier) @name) @definition.function

(
  (class_definition
    name: (identifier) @name
    body: (block
      . (expression_statement (string) @doc))) @definition.class
  (#strip! @doc "(^['\"\\s]*)|(['\"\\s]*$)")
)

(class_definition
  name: (identifier) @name) @definition.class

(call
  function: (identifier) @name) @reference.call

(call
  function: (attribute
    attribute: (identifier) @name)) @reference.call
"#;

const JS_TAG_QUERY: &str = r#"
(
    (comment)* @doc .
    (class_declaration
        name: (identifier) @name) @definition.class
    (#select-adjacent! @doc @definition.class)
    (#strip! @doc "(^[/\\*\\s]*)|([/\\*\\s]*$)")
)

(
    (comment)* @doc .
    (method_definition
        name: (property_identifier) @name) @definition.method
    (#select-adjacent! @doc @definition.method)
    (#strip! @doc "(^[/\\*\\s]*)|([/\\*\\s]*$)")
)

(
    (comment)* @doc .
    (function_declaration
        name: (identifier) @name) @definition.function
    (#select-adjacent! @doc @definition.function)
    (#strip! @doc "(^[/\\*\\s]*)|([/\\*\\s]*$)")
)

(call_expression
    function: (identifier) @name) @reference.call
"#;

const RUBY_TAG_QUERY: &str = r"
(method
    name: (_) @name) @definition.method

(call
    method: (identifier) @name) @reference.call

(setter (identifier) @ignore)

((identifier) @name @reference.call
 (#is-not? local))
";

#[test]
fn test_tags_python() {
    let language = get_language("python");
    let tags_config = TagsConfiguration::new(language, PYTHON_TAG_QUERY, "").unwrap();
    let mut tag_context = TagsContext::new();

    let source = br#"
    class Customer:
        """
        Data about a customer
        """

        def age(self):
            '''
            Get the customer's age
            '''
            compute_age(self.id)
    }
    "#;

    let tags = tag_context
        .generate_tags(&tags_config, source, None)
        .unwrap()
        .0
        .collect::<Result<Vec<_>, _>>()
        .unwrap();

    assert_eq!(
        tags.iter()
            .map(|t| (
                substr(source, &t.name_range),
                tags_config.syntax_type_name(t.syntax_type_id)
            ))
            .collect::<Vec<_>>(),
        &[
            ("Customer", "class"),
            ("age", "function"),
            ("compute_age", "call"),
        ]
    );

    assert_eq!(substr(source, &tags[0].line_range), "class Customer:");
    assert_eq!(substr(source, &tags[1].line_range), "def age(self):");
    assert_eq!(tags[0].docs.as_ref().unwrap(), "Data about a customer");
    assert_eq!(tags[1].docs.as_ref().unwrap(), "Get the customer's age");
}

#[test]
fn test_tags_javascript() {
    let language = get_language("javascript");
    let tags_config = TagsConfiguration::new(language, JS_TAG_QUERY, "").unwrap();
    let source = br"
    // hi

    // Data about a customer.
    // bla bla bla
    class Customer {
        /*
         * Get the customer's age
         */
        getAge() {
        }
    }

    // ok

    class Agent {

    }
    ";

    let mut tag_context = TagsContext::new();
    let tags = tag_context
        .generate_tags(&tags_config, source, None)
        .unwrap()
        .0
        .collect::<Result<Vec<_>, _>>()
        .unwrap();

    assert_eq!(
        tags.iter()
            .map(|t| (
                substr(source, &t.name_range),
                t.span.clone(),
                tags_config.syntax_type_name(t.syntax_type_id)
            ))
            .collect::<Vec<_>>(),
        &[
            ("Customer", Point::new(5, 10)..Point::new(5, 18), "class",),
            ("getAge", Point::new(9, 8)..Point::new(9, 14), "method",),
            ("Agent", Point::new(15, 10)..Point::new(15, 15), "class",)
        ]
    );
    assert_eq!(
        tags[0].docs.as_ref().unwrap(),
        "Data about a customer.\nbla bla bla"
    );
    assert_eq!(tags[1].docs.as_ref().unwrap(), "Get the customer's age");
    assert_eq!(tags[2].docs, None);
}

#[test]
fn test_tags_columns_measured_in_utf16_code_units() {
    let language = get_language("python");
    let tags_config = TagsConfiguration::new(language, PYTHON_TAG_QUERY, "").unwrap();
    let mut tag_context = TagsContext::new();

    let source = r#""❤️❤️❤️".hello_α_ω()"#.as_bytes();

    let tag = tag_context
        .generate_tags(&tags_config, source, None)
        .unwrap()
        .0
        .next()
        .unwrap()
        .unwrap();

    assert_eq!(substr(source, &tag.name_range), "hello_α_ω");
    assert_eq!(tag.span, Point::new(0, 21)..Point::new(0, 32));
    assert_eq!(tag.utf16_column_range, 9..18);
}

#[test]
fn test_tags_ruby() {
    let language = get_language("ruby");
    let locals_query =
        fs::read_to_string(get_language_queries_path("ruby").join("locals.scm")).unwrap();
    let tags_config = TagsConfiguration::new(language, RUBY_TAG_QUERY, &locals_query).unwrap();
    let source = strip_whitespace(
        8,
        "
        b = 1

        def foo=()
            c = 1

            # a is a method because it is not in scope
            # b is a method because `b` doesn't capture variables from its containing scope
            bar a, b, c

            [1, 2, 3].each do |a|
                # a is a parameter
                # b is a method
                # c is a variable, because the block captures variables from its containing scope.
                baz a, b, c
            end
        end",
    );

    let mut tag_context = TagsContext::new();
    let tags = tag_context
        .generate_tags(&tags_config, source.as_bytes(), None)
        .unwrap()
        .0
        .collect::<Result<Vec<_>, _>>()
        .unwrap();

    assert_eq!(
        tags.iter()
            .map(|t| (
                substr(source.as_bytes(), &t.name_range),
                tags_config.syntax_type_name(t.syntax_type_id),
                (t.span.start.row, t.span.start.column),
            ))
            .collect::<Vec<_>>(),
        &[
            ("foo=", "method", (2, 4)),
            ("bar", "call", (7, 4)),
            ("a", "call", (7, 8)),
            ("b", "call", (7, 11)),
            ("each", "call", (9, 14)),
            ("baz", "call", (13, 8)),
            ("b", "call", (13, 15),),
        ]
    );
}

#[test]
fn test_tags_cancellation() {
    use std::sync::atomic::{AtomicUsize, Ordering};

    allocations::record(|| {
        // Large javascript document
        let source = (0..500)
            .map(|_| "/* hi */ class A { /* ok */ b() {} }\n")
            .collect::<String>();

        let cancellation_flag = AtomicUsize::new(0);
        let language = get_language("javascript");
        let tags_config = TagsConfiguration::new(language, JS_TAG_QUERY, "").unwrap();

        let mut tag_context = TagsContext::new();
        let tags = tag_context
            .generate_tags(&tags_config, source.as_bytes(), Some(&cancellation_flag))
            .unwrap();

        for (i, tag) in tags.0.enumerate() {
            if i == 150 {
                cancellation_flag.store(1, Ordering::SeqCst);
            }
            if let Err(e) = tag {
                assert_eq!(e, Error::Cancelled);
                return;
            }
        }

        panic!("Expected to halt tagging with an error");
    });
}

#[test]
fn test_invalid_capture() {
    let language = get_language("python");
    let e = TagsConfiguration::new(language, "(identifier) @method", "")
        .expect_err("expected InvalidCapture error");
    assert_eq!(e, Error::InvalidCapture("method".to_string()));
}

#[test]
fn test_tags_with_parse_error() {
    let language = get_language("python");
    let tags_config = TagsConfiguration::new(language, PYTHON_TAG_QUERY, "").unwrap();
    let mut tag_context = TagsContext::new();

    let source = br"
    class Fine: pass
    class Bad
    ";

    let (tags, failed) = tag_context
        .generate_tags(&tags_config, source, None)
        .unwrap();

    let newtags = tags.collect::<Result<Vec<_>, _>>().unwrap();

    assert!(failed, "syntax error should have been detected");

    assert_eq!(
        newtags
            .iter()
            .map(|t| (
                substr(source, &t.name_range),
                tags_config.syntax_type_name(t.syntax_type_id)
            ))
            .collect::<Vec<_>>(),
        &[("Fine", "class"),]
    );
}

#[test]
fn test_tags_via_c_api() {
    allocations::record(|| {
        let tagger = c::ts_tagger_new();
        let buffer = c::ts_tags_buffer_new();
        let scope_name = "source.js";
        let language = get_language("javascript");

        let source_code = strip_whitespace(
            12,
            "
            var a = 1;

            // one
            // two
            // three
            function b() {
            }

            // four
            // five
            class C extends D {

            }

            b(a);",
        );

        let c_scope_name = CString::new(scope_name).unwrap();
        let result = unsafe {
            c::ts_tagger_add_language(
                tagger,
                c_scope_name.as_ptr(),
                language,
                JS_TAG_QUERY.as_ptr(),
                ptr::null(),
                JS_TAG_QUERY.len() as u32,
                0,
            )
        };
        assert_eq!(result, c::TSTagsError::Ok);

        let result = unsafe {
            c::ts_tagger_tag(
                tagger,
                c_scope_name.as_ptr(),
                source_code.as_ptr(),
                source_code.len() as u32,
                buffer,
                ptr::null(),
            )
        };
        assert_eq!(result, c::TSTagsError::Ok);
        let tags = unsafe {
            slice::from_raw_parts(
                c::ts_tags_buffer_tags(buffer),
                c::ts_tags_buffer_tags_len(buffer) as usize,
            )
        };
        let docs = str::from_utf8(unsafe {
            slice::from_raw_parts(
                c::ts_tags_buffer_docs(buffer).cast::<u8>(),
                c::ts_tags_buffer_docs_len(buffer) as usize,
            )
        })
        .unwrap();

        let syntax_types = unsafe {
            let mut len = 0;
            let ptr = c::ts_tagger_syntax_kinds_for_scope_name(
                tagger,
                c_scope_name.as_ptr(),
                &raw mut len,
            );
            slice::from_raw_parts(ptr, len as usize)
                .iter()
                .map(|i| CStr::from_ptr(*i).to_str().unwrap())
                .collect::<Vec<_>>()
        };

        assert_eq!(
            tags.iter()
                .map(|tag| (
                    syntax_types[tag.syntax_type_id as usize],
                    &source_code[tag.name_start_byte as usize..tag.name_end_byte as usize],
                    &source_code[tag.line_start_byte as usize..tag.line_end_byte as usize],
                    &docs[tag.docs_start_byte as usize..tag.docs_end_byte as usize],
                ))
                .collect::<Vec<_>>(),
            &[
                ("function", "b", "function b() {", "one\ntwo\nthree"),
                ("class", "C", "class C extends D {", "four\nfive"),
                ("call", "b", "b(a);", "")
            ]
        );

        unsafe {
            c::ts_tags_buffer_delete(buffer);
            c::ts_tagger_delete(tagger);
        }
    });
}

fn substr<'a>(source: &'a [u8], range: &std::ops::Range<usize>) -> &'a str {
    std::str::from_utf8(&source[range.clone()]).unwrap()
}

fn strip_whitespace(indent: usize, s: &str) -> String {
    s.lines()
        .skip(1)
        .map(|line| &line[line.len().min(indent)..])
        .collect::<Vec<_>>()
        .join("\n")
}



================================================
FILE: crates/cli/src/tests/test_highlight_test.rs
================================================
use tree_sitter::Parser;
use tree_sitter_highlight::{Highlight, Highlighter};

use super::helpers::fixtures::{get_highlight_config, get_language, test_loader};
use crate::{
    query_testing::{parse_position_comments, Assertion, Utf8Point},
    test_highlight::get_highlight_positions,
};

#[test]
fn test_highlight_test_with_basic_test() {
    let language = get_language("javascript");
    let config = get_highlight_config(
        "javascript",
        Some("injections.scm"),
        &[
            "function".to_string(),
            "variable".to_string(),
            "keyword".to_string(),
        ],
    );
    let source = [
        "// hi",
        "var abc = function(d) {",
        "  // ^ function",
        "  //       ^^^ keyword",
        "  return d + e;",
        "  //     ^ variable",
        "  //       ^ !variable",
        "};",
        "var y̆y̆y̆y̆ = function() {}",
        "  // ^ function",
        "  //       ^ keyword",
    ]
    .join("\n");

    let assertions =
        parse_position_comments(&mut Parser::new(), &language, source.as_bytes()).unwrap();
    assert_eq!(
        assertions,
        &[
            Assertion::new(1, 5, 1, false, String::from("function")),
            Assertion::new(1, 11, 3, false, String::from("keyword")),
            Assertion::new(4, 9, 1, false, String::from("variable")),
            Assertion::new(4, 11, 1, true, String::from("variable")),
            Assertion::new(8, 5, 1, false, String::from("function")),
            Assertion::new(8, 11, 1, false, String::from("keyword")),
        ]
    );

    let mut highlighter = Highlighter::new();
    let highlight_positions =
        get_highlight_positions(test_loader(), &mut highlighter, &config, source.as_bytes())
            .unwrap();
    assert_eq!(
        highlight_positions,
        &[
            (Utf8Point::new(1, 0), Utf8Point::new(1, 3), Highlight(2)), // "var"
            (Utf8Point::new(1, 4), Utf8Point::new(1, 7), Highlight(0)), // "abc"
            (Utf8Point::new(1, 10), Utf8Point::new(1, 18), Highlight(2)), // "function"
            (Utf8Point::new(1, 19), Utf8Point::new(1, 20), Highlight(1)), // "d"
            (Utf8Point::new(4, 2), Utf8Point::new(4, 8), Highlight(2)), // "return"
            (Utf8Point::new(4, 9), Utf8Point::new(4, 10), Highlight(1)), // "d"
            (Utf8Point::new(4, 13), Utf8Point::new(4, 14), Highlight(1)), // "e"
            (Utf8Point::new(8, 0), Utf8Point::new(8, 3), Highlight(2)), // "var"
            (Utf8Point::new(8, 4), Utf8Point::new(8, 8), Highlight(0)), // "y̆y̆y̆y̆"
            (Utf8Point::new(8, 11), Utf8Point::new(8, 19), Highlight(2)), // "function"
        ]
    );
}



================================================
FILE: crates/cli/src/tests/test_tags_test.rs
================================================
use tree_sitter::Parser;
use tree_sitter_tags::TagsContext;

use super::helpers::fixtures::{get_language, get_tags_config};
use crate::{
    query_testing::{parse_position_comments, Assertion, Utf8Point},
    test_tags::get_tag_positions,
};

#[test]
fn test_tags_test_with_basic_test() {
    let language = get_language("python");
    let config = get_tags_config("python");
    let source = [
        "# hi",
        "def abc(d):",
        "    # <- definition.function",
        "    e = fgh(d)",
        "    #    ^ reference.call",
        "    return d(e)",
        "    #      ^ reference.call",
        "    #        ^ !variable.parameter",
        "",
    ]
    .join("\n");

    let assertions =
        parse_position_comments(&mut Parser::new(), &language, source.as_bytes()).unwrap();

    assert_eq!(
        assertions,
        &[
            Assertion::new(1, 4, 1, false, String::from("definition.function")),
            Assertion::new(3, 9, 1, false, String::from("reference.call")),
            Assertion::new(5, 11, 1, false, String::from("reference.call")),
            Assertion::new(5, 13, 1, true, String::from("variable.parameter")),
        ]
    );

    let mut tags_context = TagsContext::new();
    let tag_positions = get_tag_positions(&mut tags_context, &config, source.as_bytes()).unwrap();
    assert_eq!(
        tag_positions,
        &[
            (
                Utf8Point::new(1, 4),
                Utf8Point::new(1, 7),
                "definition.function".to_string()
            ),
            (
                Utf8Point::new(3, 8),
                Utf8Point::new(3, 11),
                "reference.call".to_string()
            ),
            (
                Utf8Point::new(5, 11),
                Utf8Point::new(5, 12),
                "reference.call".to_string()
            ),
        ]
    );
}



================================================
FILE: crates/cli/src/tests/text_provider_test.rs
================================================
use std::{iter, sync::Arc};

use streaming_iterator::StreamingIterator;
use tree_sitter::{Language, Node, Parser, Point, Query, QueryCursor, TextProvider, Tree};

use crate::tests::helpers::fixtures::get_language;

fn parse_text(text: impl AsRef<[u8]>) -> (Tree, Language) {
    let language = get_language("c");
    let mut parser = Parser::new();
    parser.set_language(&language).unwrap();
    (parser.parse(text, None).unwrap(), language)
}

fn parse_text_with<T, F>(callback: &mut F) -> (Tree, Language)
where
    T: AsRef<[u8]>,
    F: FnMut(usize, Point) -> T,
{
    let language = get_language("c");
    let mut parser = Parser::new();
    parser.set_language(&language).unwrap();
    let tree = parser.parse_with_options(callback, None, None).unwrap();
    // eprintln!("{}", tree.clone().root_node().to_sexp());
    assert_eq!("comment", tree.root_node().child(0).unwrap().kind());
    (tree, language)
}

fn tree_query<I: AsRef<[u8]>>(tree: &Tree, text: impl TextProvider<I>, language: &Language) {
    let query = Query::new(language, "((comment) @c (#eq? @c \"// comment\"))").unwrap();
    let mut cursor = QueryCursor::new();
    let mut captures = cursor.captures(&query, tree.root_node(), text);
    let (match_, idx) = captures.next().unwrap();
    let capture = match_.captures[*idx];
    assert_eq!(capture.index as usize, *idx);
    assert_eq!("comment", capture.node.kind());
}

fn check_parsing<I: AsRef<[u8]>>(
    parser_text: impl AsRef<[u8]>,
    text_provider: impl TextProvider<I>,
) {
    let (tree, language) = parse_text(parser_text);
    tree_query(&tree, text_provider, &language);
}

fn check_parsing_callback<T, F, I: AsRef<[u8]>>(
    parser_callback: &mut F,
    text_provider: impl TextProvider<I>,
) where
    T: AsRef<[u8]>,
    F: FnMut(usize, Point) -> T,
{
    let (tree, language) = parse_text_with(parser_callback);
    tree_query(&tree, text_provider, &language);
}

#[test]
fn test_text_provider_for_str_slice() {
    let text: &str = "// comment";

    check_parsing(text, text.as_bytes());
    check_parsing(text.as_bytes(), text.as_bytes());
}

#[test]
fn test_text_provider_for_string() {
    let text: String = "// comment".to_owned();

    check_parsing(text.clone(), text.as_bytes());
    check_parsing(text.as_bytes(), text.as_bytes());
    check_parsing(<_ as AsRef<[u8]>>::as_ref(&text), text.as_bytes());
}

#[test]
fn test_text_provider_for_box_of_str_slice() {
    let text = "// comment".to_owned().into_boxed_str();

    check_parsing(text.as_bytes(), text.as_bytes());
    check_parsing(<_ as AsRef<str>>::as_ref(&text), text.as_bytes());
    check_parsing(text.as_ref(), text.as_ref().as_bytes());
    check_parsing(text.as_ref(), text.as_bytes());
}

#[test]
fn test_text_provider_for_box_of_bytes_slice() {
    let text = "// comment".to_owned().into_boxed_str().into_boxed_bytes();

    check_parsing(text.as_ref(), text.as_ref());
    check_parsing(text.as_ref(), &*text);
    check_parsing(&*text, &*text);
}

#[test]
fn test_text_provider_for_vec_of_bytes() {
    let text = "// comment".to_owned().into_bytes();

    check_parsing(&*text, &*text);
}

#[test]
fn test_text_provider_for_arc_of_bytes_slice() {
    let text: Arc<[u8]> = Arc::from("// comment".to_owned().into_bytes());

    check_parsing(&*text, &*text);
    check_parsing(text.as_ref(), text.as_ref());
    check_parsing(text.clone(), text.as_ref());
}

#[test]
fn test_text_provider_for_vec_utf16_text() {
    let source_text = "你好".encode_utf16().collect::<Vec<_>>();

    let language = get_language("c");
    let mut parser = Parser::new();
    parser.set_language(&language).unwrap();
    let tree = parser.parse_utf16_le(&source_text, None).unwrap();

    let tree_text = tree.root_node().utf16_text(&source_text);
    assert_eq!(source_text, tree_text);
}

#[test]
fn test_text_provider_callback_with_str_slice() {
    let text: &str = "// comment";

    check_parsing(text, |_node: Node<'_>| iter::once(text));
    check_parsing_callback(
        &mut |offset, _point| {
            (offset < text.len())
                .then_some(text.as_bytes())
                .unwrap_or_default()
        },
        |_node: Node<'_>| iter::once(text),
    );
}

#[test]
fn test_text_provider_callback_with_owned_string_slice() {
    let text: &str = "// comment";

    check_parsing_callback(
        &mut |offset, _point| {
            (offset < text.len())
                .then_some(text.as_bytes())
                .unwrap_or_default()
        },
        |_node: Node<'_>| {
            let slice: String = text.to_owned();
            iter::once(slice)
        },
    );
}

#[test]
fn test_text_provider_callback_with_owned_bytes_vec_slice() {
    let text: &str = "// comment";

    check_parsing_callback(
        &mut |offset, _point| {
            (offset < text.len())
                .then_some(text.as_bytes())
                .unwrap_or_default()
        },
        |_node: Node<'_>| {
            let slice = text.to_owned().into_bytes();
            iter::once(slice)
        },
    );
}

#[test]
fn test_text_provider_callback_with_owned_arc_of_bytes_slice() {
    let text: &str = "// comment";

    check_parsing_callback(
        &mut |offset, _point| {
            (offset < text.len())
                .then_some(text.as_bytes())
                .unwrap_or_default()
        },
        |_node: Node<'_>| {
            let slice: Arc<[u8]> = text.to_owned().into_bytes().into();
            iter::once(slice)
        },
    );
}



================================================
FILE: crates/cli/src/tests/tree_test.rs
================================================
use std::str;

use tree_sitter::{InputEdit, Parser, Point, Range, Tree};

use super::helpers::fixtures::get_language;
use crate::{
    fuzz::edits::Edit,
    parse::perform_edit,
    tests::{helpers::fixtures::get_test_fixture_language, invert_edit},
};

#[test]
fn test_tree_edit() {
    let mut parser = Parser::new();
    parser.set_language(&get_language("javascript")).unwrap();
    let tree = parser.parse("  abc  !==  def", None).unwrap();

    assert_eq!(
        tree.root_node().to_sexp(),
        "(program (expression_statement (binary_expression left: (identifier) right: (identifier))))"
    );

    // edit entirely within the tree's padding:
    // resize the padding of the tree and its leftmost descendants.
    {
        let mut tree = tree.clone();
        tree.edit(&InputEdit {
            start_byte: 1,
            old_end_byte: 1,
            new_end_byte: 2,
            start_position: Point::new(0, 1),
            old_end_position: Point::new(0, 1),
            new_end_position: Point::new(0, 2),
        });

        let expr = tree.root_node().child(0).unwrap().child(0).unwrap();
        let child1 = expr.child(0).unwrap();
        let child2 = expr.child(1).unwrap();

        assert!(expr.has_changes());
        assert_eq!(expr.start_byte(), 3);
        assert_eq!(expr.end_byte(), 16);
        assert!(child1.has_changes());
        assert_eq!(child1.start_byte(), 3);
        assert_eq!(child1.end_byte(), 6);
        assert!(!child2.has_changes());
        assert_eq!(child2.start_byte(), 8);
        assert_eq!(child2.end_byte(), 11);
    }

    // edit starting in the tree's padding but extending into its content:
    // shrink the content to compensate for the expanded padding.
    {
        let mut tree = tree.clone();
        tree.edit(&InputEdit {
            start_byte: 1,
            old_end_byte: 4,
            new_end_byte: 5,
            start_position: Point::new(0, 1),
            old_end_position: Point::new(0, 5),
            new_end_position: Point::new(0, 5),
        });

        let expr = tree.root_node().child(0).unwrap().child(0).unwrap();
        let child1 = expr.child(0).unwrap();
        let child2 = expr.child(1).unwrap();

        assert!(expr.has_changes());
        assert_eq!(expr.start_byte(), 5);
        assert_eq!(expr.end_byte(), 16);
        assert!(child1.has_changes());
        assert_eq!(child1.start_byte(), 5);
        assert_eq!(child1.end_byte(), 6);
        assert!(!child2.has_changes());
        assert_eq!(child2.start_byte(), 8);
        assert_eq!(child2.end_byte(), 11);
    }

    // insertion at the edge of a tree's padding:
    // expand the tree's padding.
    {
        let mut tree = tree.clone();
        tree.edit(&InputEdit {
            start_byte: 2,
            old_end_byte: 2,
            new_end_byte: 4,
            start_position: Point::new(0, 2),
            old_end_position: Point::new(0, 2),
            new_end_position: Point::new(0, 4),
        });

        let expr = tree.root_node().child(0).unwrap().child(0).unwrap();
        let child1 = expr.child(0).unwrap();
        let child2 = expr.child(1).unwrap();

        assert!(expr.has_changes());
        assert_eq!(expr.byte_range(), 4..17);
        assert!(child1.has_changes());
        assert_eq!(child1.byte_range(), 4..7);
        assert!(!child2.has_changes());
        assert_eq!(child2.byte_range(), 9..12);
    }

    // replacement starting at the edge of the tree's padding:
    // resize the content and not the padding.
    {
        let mut tree = tree.clone();
        tree.edit(&InputEdit {
            start_byte: 2,
            old_end_byte: 2,
            new_end_byte: 4,
            start_position: Point::new(0, 2),
            old_end_position: Point::new(0, 2),
            new_end_position: Point::new(0, 4),
        });

        let expr = tree.root_node().child(0).unwrap().child(0).unwrap();
        let child1 = expr.child(0).unwrap();
        let child2 = expr.child(1).unwrap();

        assert!(expr.has_changes());
        assert_eq!(expr.byte_range(), 4..17);
        assert!(child1.has_changes());
        assert_eq!(child1.byte_range(), 4..7);
        assert!(!child2.has_changes());
        assert_eq!(child2.byte_range(), 9..12);
    }

    // deletion that spans more than one child node:
    // shrink subsequent child nodes.
    {
        let mut tree = tree.clone();
        tree.edit(&InputEdit {
            start_byte: 1,
            old_end_byte: 11,
            new_end_byte: 4,
            start_position: Point::new(0, 1),
            old_end_position: Point::new(0, 11),
            new_end_position: Point::new(0, 4),
        });

        let expr = tree.root_node().child(0).unwrap().child(0).unwrap();
        let child1 = expr.child(0).unwrap();
        let child2 = expr.child(1).unwrap();
        let child3 = expr.child(2).unwrap();

        assert!(expr.has_changes());
        assert_eq!(expr.byte_range(), 4..8);
        assert!(child1.has_changes());
        assert_eq!(child1.byte_range(), 4..4);
        assert!(child2.has_changes());
        assert_eq!(child2.byte_range(), 4..4);
        assert!(child3.has_changes());
        assert_eq!(child3.byte_range(), 5..8);
    }

    // insertion at the end of the tree:
    // extend the tree's content.
    {
        let mut tree = tree.clone();
        tree.edit(&InputEdit {
            start_byte: 15,
            old_end_byte: 15,
            new_end_byte: 16,
            start_position: Point::new(0, 15),
            old_end_position: Point::new(0, 15),
            new_end_position: Point::new(0, 16),
        });

        let expr = tree.root_node().child(0).unwrap().child(0).unwrap();
        let child1 = expr.child(0).unwrap();
        let child2 = expr.child(1).unwrap();
        let child3 = expr.child(2).unwrap();

        assert!(expr.has_changes());
        assert_eq!(expr.byte_range(), 2..16);
        assert!(!child1.has_changes());
        assert_eq!(child1.byte_range(), 2..5);
        assert!(!child2.has_changes());
        assert_eq!(child2.byte_range(), 7..10);
        assert!(child3.has_changes());
        assert_eq!(child3.byte_range(), 12..16);
    }

    // replacement that starts within a token and extends beyond the end of the tree:
    // resize the token and empty out any subsequent child nodes.
    {
        let mut tree = tree.clone();
        tree.edit(&InputEdit {
            start_byte: 3,
            old_end_byte: 90,
            new_end_byte: 4,
            start_position: Point::new(0, 3),
            old_end_position: Point::new(0, 90),
            new_end_position: Point::new(0, 4),
        });

        let expr = tree.root_node().child(0).unwrap().child(0).unwrap();
        let child1 = expr.child(0).unwrap();
        let child2 = expr.child(1).unwrap();
        let child3 = expr.child(2).unwrap();
        assert_eq!(expr.byte_range(), 2..4);
        assert!(expr.has_changes());
        assert_eq!(child1.byte_range(), 2..4);
        assert!(child1.has_changes());
        assert_eq!(child2.byte_range(), 4..4);
        assert!(child2.has_changes());
        assert_eq!(child3.byte_range(), 4..4);
        assert!(child3.has_changes());
    }

    // replacement that starts in whitespace and extends beyond the end of the tree:
    // shift the token's start position and empty out its content.
    {
        let mut tree = tree;
        tree.edit(&InputEdit {
            start_byte: 6,
            old_end_byte: 90,
            new_end_byte: 8,
            start_position: Point::new(0, 6),
            old_end_position: Point::new(0, 90),
            new_end_position: Point::new(0, 8),
        });

        let expr = tree.root_node().child(0).unwrap().child(0).unwrap();
        let child1 = expr.child(0).unwrap();
        let child2 = expr.child(1).unwrap();
        let child3 = expr.child(2).unwrap();
        assert_eq!(expr.byte_range(), 2..8);
        assert!(expr.has_changes());
        assert_eq!(child1.byte_range(), 2..5);
        assert!(!child1.has_changes());
        assert_eq!(child2.byte_range(), 8..8);
        assert!(child2.has_changes());
        assert_eq!(child3.byte_range(), 8..8);
        assert!(child3.has_changes());
    }
}

#[test]
fn test_tree_edit_with_included_ranges() {
    let mut parser = Parser::new();
    parser.set_language(&get_language("html")).unwrap();

    let source = "<div><% if a %><span>a</span><% else %><span>b</span><% end %></div>";

    let ranges = [0..5, 15..29, 39..53, 62..68];

    parser
        .set_included_ranges(
            &ranges
                .iter()
                .map(|range| Range {
                    start_byte: range.start,
                    end_byte: range.end,
                    start_point: Point::new(0, range.start),
                    end_point: Point::new(0, range.end),
                })
                .collect::<Vec<_>>(),
        )
        .unwrap();

    let mut tree = parser.parse(source, None).unwrap();

    tree.edit(&InputEdit {
        start_byte: 29,
        old_end_byte: 53,
        new_end_byte: 29,
        start_position: Point::new(0, 29),
        old_end_position: Point::new(0, 53),
        new_end_position: Point::new(0, 29),
    });

    assert_eq!(
        tree.included_ranges(),
        &[
            Range {
                start_byte: 0,
                end_byte: 5,
                start_point: Point::new(0, 0),
                end_point: Point::new(0, 5),
            },
            Range {
                start_byte: 15,
                end_byte: 29,
                start_point: Point::new(0, 15),
                end_point: Point::new(0, 29),
            },
            Range {
                start_byte: 29,
                end_byte: 29,
                start_point: Point::new(0, 29),
                end_point: Point::new(0, 29),
            },
            Range {
                start_byte: 38,
                end_byte: 44,
                start_point: Point::new(0, 38),
                end_point: Point::new(0, 44),
            }
        ]
    );
}

#[test]
fn test_tree_cursor() {
    let mut parser = Parser::new();
    parser.set_language(&get_language("rust")).unwrap();

    let tree = parser
        .parse(
            "
                struct Stuff {
                    a: A,
                    b: Option<B>,
                }
            ",
            None,
        )
        .unwrap();

    let mut cursor = tree.walk();
    assert_eq!(cursor.node().kind(), "source_file");

    assert!(cursor.goto_first_child());
    assert_eq!(cursor.node().kind(), "struct_item");

    assert!(cursor.goto_first_child());
    assert_eq!(cursor.node().kind(), "struct");
    assert!(!cursor.node().is_named());

    assert!(cursor.goto_next_sibling());
    assert_eq!(cursor.node().kind(), "type_identifier");
    assert!(cursor.node().is_named());

    assert!(cursor.goto_next_sibling());
    assert_eq!(cursor.node().kind(), "field_declaration_list");
    assert!(cursor.node().is_named());

    assert!(cursor.goto_last_child());
    assert_eq!(cursor.node().kind(), "}");
    assert!(!cursor.node().is_named());
    assert_eq!(cursor.node().start_position(), Point { row: 4, column: 16 });

    assert!(cursor.goto_previous_sibling());
    assert_eq!(cursor.node().kind(), ",");
    assert!(!cursor.node().is_named());
    assert_eq!(cursor.node().start_position(), Point { row: 3, column: 32 });

    assert!(cursor.goto_previous_sibling());
    assert_eq!(cursor.node().kind(), "field_declaration");
    assert!(cursor.node().is_named());
    assert_eq!(cursor.node().start_position(), Point { row: 3, column: 20 });

    assert!(cursor.goto_previous_sibling());
    assert_eq!(cursor.node().kind(), ",");
    assert!(!cursor.node().is_named());
    assert_eq!(cursor.node().start_position(), Point { row: 2, column: 24 });

    assert!(cursor.goto_previous_sibling());
    assert_eq!(cursor.node().kind(), "field_declaration");
    assert!(cursor.node().is_named());
    assert_eq!(cursor.node().start_position(), Point { row: 2, column: 20 });

    assert!(cursor.goto_previous_sibling());
    assert_eq!(cursor.node().kind(), "{");
    assert!(!cursor.node().is_named());
    assert_eq!(cursor.node().start_position(), Point { row: 1, column: 29 });

    let mut copy = tree.walk();
    copy.reset_to(&cursor);

    assert_eq!(copy.node().kind(), "{");
    assert!(!copy.node().is_named());

    assert!(copy.goto_parent());
    assert_eq!(copy.node().kind(), "field_declaration_list");
    assert!(copy.node().is_named());

    assert!(copy.goto_parent());
    assert_eq!(copy.node().kind(), "struct_item");
}

#[test]
fn test_tree_cursor_previous_sibling_with_aliases() {
    let mut parser = Parser::new();
    parser
        .set_language(&get_test_fixture_language("aliases_in_root"))
        .unwrap();

    let text = "# comment\n# \nfoo foo";
    let tree = parser.parse(text, None).unwrap();
    let mut cursor = tree.walk();
    assert_eq!(cursor.node().kind(), "document");

    cursor.goto_first_child();
    assert_eq!(cursor.node().kind(), "comment");

    assert!(cursor.goto_next_sibling());
    assert_eq!(cursor.node().kind(), "comment");

    assert!(cursor.goto_next_sibling());
    assert_eq!(cursor.node().kind(), "bar");

    assert!(cursor.goto_previous_sibling());
    assert_eq!(cursor.node().kind(), "comment");

    assert!(cursor.goto_previous_sibling());
    assert_eq!(cursor.node().kind(), "comment");

    assert!(cursor.goto_next_sibling());
    assert_eq!(cursor.node().kind(), "comment");

    assert!(cursor.goto_next_sibling());
    assert_eq!(cursor.node().kind(), "bar");
}

#[test]
fn test_tree_cursor_previous_sibling() {
    let mut parser = Parser::new();
    parser.set_language(&get_language("rust")).unwrap();

    let text = "
    // Hi there
    // This is fun!
    // Another one!
";
    let tree = parser.parse(text, None).unwrap();

    let mut cursor = tree.walk();
    assert_eq!(cursor.node().kind(), "source_file");

    assert!(cursor.goto_last_child());
    assert_eq!(cursor.node().kind(), "line_comment");
    assert_eq!(
        cursor.node().utf8_text(text.as_bytes()).unwrap(),
        "// Another one!"
    );

    assert!(cursor.goto_previous_sibling());
    assert_eq!(cursor.node().kind(), "line_comment");
    assert_eq!(
        cursor.node().utf8_text(text.as_bytes()).unwrap(),
        "// This is fun!"
    );

    assert!(cursor.goto_previous_sibling());
    assert_eq!(cursor.node().kind(), "line_comment");
    assert_eq!(
        cursor.node().utf8_text(text.as_bytes()).unwrap(),
        "// Hi there"
    );

    assert!(!cursor.goto_previous_sibling());
}

#[test]
fn test_tree_cursor_fields() {
    let mut parser = Parser::new();
    parser.set_language(&get_language("javascript")).unwrap();

    let tree = parser
        .parse("function /*1*/ bar /*2*/ () {}", None)
        .unwrap();

    let mut cursor = tree.walk();
    assert_eq!(cursor.node().kind(), "program");

    cursor.goto_first_child();
    assert_eq!(cursor.node().kind(), "function_declaration");
    assert_eq!(cursor.field_name(), None);

    cursor.goto_first_child();
    assert_eq!(cursor.node().kind(), "function");
    assert_eq!(cursor.field_name(), None);

    cursor.goto_next_sibling();
    assert_eq!(cursor.node().kind(), "comment");
    assert_eq!(cursor.field_name(), None);

    cursor.goto_next_sibling();
    assert_eq!(cursor.node().kind(), "identifier");
    assert_eq!(cursor.field_name(), Some("name"));

    cursor.goto_next_sibling();
    assert_eq!(cursor.node().kind(), "comment");
    assert_eq!(cursor.field_name(), None);

    cursor.goto_next_sibling();
    assert_eq!(cursor.node().kind(), "formal_parameters");
    assert_eq!(cursor.field_name(), Some("parameters"));
}

#[test]
fn test_tree_cursor_child_for_point() {
    let mut parser = Parser::new();
    parser.set_language(&get_language("javascript")).unwrap();
    let source = &"
    [
        one,
        {
            two: tree
        },
        four, five, six
    ];"[1..];
    let tree = parser.parse(source, None).unwrap();

    let mut c = tree.walk();
    assert_eq!(c.node().kind(), "program");

    assert_eq!(c.goto_first_child_for_point(Point::new(7, 0)), None);
    assert_eq!(c.goto_first_child_for_point(Point::new(6, 7)), None);
    assert_eq!(c.node().kind(), "program");

    // descend to expression statement
    assert_eq!(c.goto_first_child_for_point(Point::new(6, 5)), Some(0));
    assert_eq!(c.node().kind(), "expression_statement");

    // step into ';' and back up
    assert_eq!(c.goto_first_child_for_point(Point::new(7, 0)), None);
    assert_eq!(c.goto_first_child_for_point(Point::new(6, 6)), None);
    assert_eq!(c.goto_first_child_for_point(Point::new(6, 5)), Some(1));
    assert_eq!(
        (c.node().kind(), c.node().start_position()),
        (";", Point::new(6, 5))
    );
    assert!(c.goto_parent());

    // descend into array
    assert_eq!(c.goto_first_child_for_point(Point::new(6, 4)), Some(0));
    assert_eq!(
        (c.node().kind(), c.node().start_position()),
        ("array", Point::new(0, 4))
    );

    // step into '[' and back up
    assert_eq!(c.goto_first_child_for_point(Point::new(0, 4)), Some(0));
    assert_eq!(
        (c.node().kind(), c.node().start_position()),
        ("[", Point::new(0, 4))
    );
    assert!(c.goto_parent());

    // step into identifier 'one' and back up
    assert_eq!(c.goto_first_child_for_point(Point::new(1, 0)), Some(1));
    assert_eq!(
        (c.node().kind(), c.node().start_position()),
        ("identifier", Point::new(1, 8))
    );
    assert!(c.goto_parent());
    assert_eq!(c.goto_first_child_for_point(Point::new(1, 10)), Some(1));
    assert_eq!(
        (c.node().kind(), c.node().start_position()),
        ("identifier", Point::new(1, 8))
    );
    assert!(c.goto_parent());

    // step into first ',' and back up
    assert_eq!(c.goto_first_child_for_point(Point::new(1, 11)), Some(2));
    assert_eq!(
        (c.node().kind(), c.node().start_position()),
        (",", Point::new(1, 11))
    );
    assert!(c.goto_parent());

    // step into identifier 'four' and back up
    assert_eq!(c.goto_first_child_for_point(Point::new(5, 0)), Some(5));
    assert_eq!(
        (c.node().kind(), c.node().start_position()),
        ("identifier", Point::new(5, 8))
    );
    assert!(c.goto_parent());
    assert_eq!(c.goto_first_child_for_point(Point::new(5, 0)), Some(5));
    assert_eq!(
        (c.node().kind(), c.node().start_position()),
        ("identifier", Point::new(5, 8))
    );
    assert!(c.goto_parent());

    // step into ']' and back up
    assert_eq!(c.goto_first_child_for_point(Point::new(6, 0)), Some(10));
    assert_eq!(
        (c.node().kind(), c.node().start_position()),
        ("]", Point::new(6, 4))
    );
    assert!(c.goto_parent());
    assert_eq!(c.goto_first_child_for_point(Point::new(6, 0)), Some(10));
    assert_eq!(
        (c.node().kind(), c.node().start_position()),
        ("]", Point::new(6, 4))
    );
    assert!(c.goto_parent());

    // descend into object
    assert_eq!(c.goto_first_child_for_point(Point::new(2, 0)), Some(3));
    assert_eq!(
        (c.node().kind(), c.node().start_position()),
        ("object", Point::new(2, 8))
    );
}

#[test]
fn test_tree_node_equality() {
    let mut parser = Parser::new();
    parser.set_language(&get_language("rust")).unwrap();
    let tree = parser.parse("struct A {}", None).unwrap();
    let node1 = tree.root_node();
    let node2 = tree.root_node();
    assert_eq!(node1, node2);
    assert_eq!(node1.child(0).unwrap(), node2.child(0).unwrap());
    assert_ne!(node1.child(0).unwrap(), node2);
}

#[test]
fn test_get_changed_ranges() {
    let source_code = b"{a: null};\n".to_vec();

    let mut parser = Parser::new();
    parser.set_language(&get_language("javascript")).unwrap();
    let tree = parser.parse(&source_code, None).unwrap();

    assert_eq!(
        tree.root_node().to_sexp(),
        "(program (expression_statement (object (pair key: (property_identifier) value: (null)))))"
    );

    // Updating one token
    {
        let mut tree = tree.clone();
        let mut source_code = source_code.clone();

        // Replace `null` with `nothing` - that token has changed syntax
        let edit = Edit {
            position: index_of(&source_code, "ull"),
            deleted_length: 3,
            inserted_text: b"othing".to_vec(),
        };
        let inverse_edit = invert_edit(&source_code, &edit);
        let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, &edit);
        assert_eq!(ranges, vec![range_of(&source_code, "nothing")]);

        // Replace `nothing` with `null` - that token has changed syntax
        let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, &inverse_edit);
        assert_eq!(ranges, vec![range_of(&source_code, "null")]);
    }

    // Changing only leading whitespace
    {
        let mut tree = tree.clone();
        let mut source_code = source_code.clone();

        // Insert leading newline - no changed ranges
        let edit = Edit {
            position: 0,
            deleted_length: 0,
            inserted_text: b"\n".to_vec(),
        };
        let inverse_edit = invert_edit(&source_code, &edit);
        let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, &edit);
        assert_eq!(ranges, vec![]);

        // Remove leading newline - no changed ranges
        let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, &inverse_edit);
        assert_eq!(ranges, vec![]);
    }

    // Inserting elements
    {
        let mut tree = tree.clone();
        let mut source_code = source_code.clone();

        // Insert a key-value pair before the `}` - those tokens are changed
        let edit1 = Edit {
            position: index_of(&source_code, "}"),
            deleted_length: 0,
            inserted_text: b", b: false".to_vec(),
        };
        let inverse_edit1 = invert_edit(&source_code, &edit1);
        let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, &edit1);
        assert_eq!(ranges, vec![range_of(&source_code, ", b: false")]);

        let edit2 = Edit {
            position: index_of(&source_code, ", b"),
            deleted_length: 0,
            inserted_text: b", c: 1".to_vec(),
        };
        let inverse_edit2 = invert_edit(&source_code, &edit2);
        let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, &edit2);
        assert_eq!(ranges, vec![range_of(&source_code, ", c: 1")]);

        // Remove the middle pair
        let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, &inverse_edit2);
        assert_eq!(ranges, vec![]);

        // Remove the second pair
        let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, &inverse_edit1);
        assert_eq!(ranges, vec![]);
    }

    // Wrapping elements in larger expressions
    {
        let mut tree = tree;
        let mut source_code = source_code.clone();

        // Replace `null` with the binary expression `b === null`
        let edit1 = Edit {
            position: index_of(&source_code, "null"),
            deleted_length: 0,
            inserted_text: b"b === ".to_vec(),
        };
        let inverse_edit1 = invert_edit(&source_code, &edit1);
        let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, &edit1);
        assert_eq!(ranges, vec![range_of(&source_code, "b === null")]);

        // Undo
        let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, &inverse_edit1);
        assert_eq!(ranges, vec![range_of(&source_code, "null")]);
    }
}

#[test]
fn test_consistency_with_mid_codepoint_edit() {
    let mut parser = Parser::new();
    parser.set_language(&get_language("php/php")).unwrap();
    let mut source_code =
        b"\n<?php\n\n<<<'\xE5\xAD\x97\xE6\xBC\xA2'\n  T\n\xE5\xAD\x97\xE6\xBC\xA2;".to_vec();
    let mut tree = parser.parse(&source_code, None).unwrap();

    let edit = Edit {
        position: 17,
        deleted_length: 0,
        inserted_text: vec![46],
    };
    perform_edit(&mut tree, &mut source_code, &edit).unwrap();
    let mut tree2 = parser.parse(&source_code, Some(&tree)).unwrap();

    let inverted = invert_edit(&source_code, &edit);
    perform_edit(&mut tree2, &mut source_code, &inverted).unwrap();
    let tree3 = parser.parse(&source_code, Some(&tree2)).unwrap();

    assert_eq!(tree3.root_node().to_sexp(), tree.root_node().to_sexp());
}

#[test]
fn test_tree_cursor_on_aliased_root_with_extra_child() {
    let source = r"
fn main() {
    C/* hi */::<D>::E;
}
";

    let mut parser = Parser::new();
    parser.set_language(&get_language("rust")).unwrap();

    let tree = parser.parse(source, None).unwrap();

    let function = tree.root_node().child(0).unwrap();
    let block = function.child(3).unwrap();
    let expression_statement = block.child(1).unwrap();
    let scoped_identifier = expression_statement.child(0).unwrap();
    let generic_type = scoped_identifier.child(0).unwrap();
    assert_eq!(generic_type.kind(), "generic_type");

    let mut cursor = generic_type.walk();
    assert!(cursor.goto_first_child());
    assert_eq!(cursor.node().kind(), "type_identifier");
    assert!(cursor.goto_next_sibling());
    assert_eq!(cursor.node().kind(), "block_comment");
}

fn index_of(text: &[u8], substring: &str) -> usize {
    str::from_utf8(text).unwrap().find(substring).unwrap()
}

fn range_of(text: &[u8], substring: &str) -> Range {
    let start_byte = index_of(text, substring);
    let end_byte = start_byte + substring.len();
    Range {
        start_byte,
        end_byte,
        start_point: Point::new(0, start_byte),
        end_point: Point::new(0, end_byte),
    }
}

fn get_changed_ranges(
    parser: &mut Parser,
    tree: &mut Tree,
    source_code: &mut Vec<u8>,
    edit: &Edit,
) -> Vec<Range> {
    perform_edit(tree, source_code, edit).unwrap();
    let new_tree = parser.parse(source_code, Some(tree)).unwrap();
    let result = tree.changed_ranges(&new_tree).collect();
    *tree = new_tree;
    result
}



================================================
FILE: crates/cli/src/tests/wasm_language_test.rs
================================================
use std::{fs, sync::LazyLock};

use streaming_iterator::StreamingIterator;
use tree_sitter::{
    wasmtime::Engine, Parser, Query, QueryCursor, WasmError, WasmErrorKind, WasmStore,
};

use crate::tests::helpers::{allocations, fixtures::WASM_DIR};

static ENGINE: LazyLock<Engine> = LazyLock::new(Engine::default);

#[test]
fn test_wasm_stdlib_symbols() {
    let symbols = tree_sitter::wasm_stdlib_symbols().collect::<Vec<_>>();
    assert_eq!(
        symbols,
        {
            let mut symbols = symbols.clone();
            symbols.sort_unstable();
            symbols
        },
        "symbols aren't sorted"
    );

    assert!(symbols.contains(&"malloc"));
    assert!(symbols.contains(&"free"));
    assert!(symbols.contains(&"memset"));
    assert!(symbols.contains(&"memcpy"));
}

#[test]
fn test_load_wasm_ruby_language() {
    allocations::record(|| {
        let mut store = WasmStore::new(&ENGINE).unwrap();
        let mut parser = Parser::new();
        let wasm = fs::read(WASM_DIR.join("tree-sitter-ruby.wasm")).unwrap();
        let language = store.load_language("ruby", &wasm).unwrap();
        parser.set_wasm_store(store).unwrap();
        parser.set_language(&language).unwrap();
        let tree = parser.parse("class A; end", None).unwrap();
        assert_eq!(
            tree.root_node().to_sexp(),
            "(program (class name: (constant)))"
        );
    });
}

#[test]
fn test_load_wasm_html_language() {
    allocations::record(|| {
        let mut store = WasmStore::new(&ENGINE).unwrap();
        let mut parser = Parser::new();
        let wasm = fs::read(WASM_DIR.join("tree-sitter-html.wasm")).unwrap();
        let language = store.load_language("html", &wasm).unwrap();
        parser.set_wasm_store(store).unwrap();
        parser.set_language(&language).unwrap();
        let tree = parser
            .parse("<div><span></span><p></p></div>", None)
            .unwrap();
        assert_eq!(
            tree.root_node().to_sexp(),
            "(document (element (start_tag (tag_name)) (element (start_tag (tag_name)) (end_tag (tag_name))) (element (start_tag (tag_name)) (end_tag (tag_name))) (end_tag (tag_name))))"
        );
    });
}

#[test]
fn test_load_wasm_rust_language() {
    allocations::record(|| {
        let mut store = WasmStore::new(&ENGINE).unwrap();
        let mut parser = Parser::new();
        let wasm = fs::read(WASM_DIR.join("tree-sitter-rust.wasm")).unwrap();
        let language = store.load_language("rust", &wasm).unwrap();
        parser.set_wasm_store(store).unwrap();
        parser.set_language(&language).unwrap();
        let tree = parser.parse("fn main() {}", None).unwrap();
        assert_eq!(tree.root_node().to_sexp(), "(source_file (function_item name: (identifier) parameters: (parameters) body: (block)))");
    });
}

#[test]
fn test_load_wasm_javascript_language() {
    allocations::record(|| {
        let mut store = WasmStore::new(&ENGINE).unwrap();
        let mut parser = Parser::new();
        let wasm = fs::read(WASM_DIR.join("tree-sitter-javascript.wasm")).unwrap();
        let language = store.load_language("javascript", &wasm).unwrap();
        parser.set_wasm_store(store).unwrap();
        parser.set_language(&language).unwrap();
        let tree = parser.parse("const a = b\nconst c = d", None).unwrap();
        assert_eq!(tree.root_node().to_sexp(), "(program (lexical_declaration (variable_declarator name: (identifier) value: (identifier))) (lexical_declaration (variable_declarator name: (identifier) value: (identifier))))");
    });
}

#[test]
fn test_load_multiple_wasm_languages() {
    allocations::record(|| {
        let mut store = WasmStore::new(&ENGINE).unwrap();
        let mut parser = Parser::new();

        let wasm_cpp = fs::read(WASM_DIR.join("tree-sitter-cpp.wasm")).unwrap();
        let wasm_rs = fs::read(WASM_DIR.join("tree-sitter-rust.wasm")).unwrap();
        let wasm_rb = fs::read(WASM_DIR.join("tree-sitter-ruby.wasm")).unwrap();
        let wasm_typescript = fs::read(WASM_DIR.join("tree-sitter-typescript.wasm")).unwrap();

        let language_rust = store.load_language("rust", &wasm_rs).unwrap();
        let language_cpp = store.load_language("cpp", &wasm_cpp).unwrap();
        let language_ruby = store.load_language("ruby", &wasm_rb).unwrap();
        let language_typescript = store.load_language("typescript", &wasm_typescript).unwrap();
        parser.set_wasm_store(store).unwrap();

        let mut parser2 = Parser::new();
        parser2
            .set_wasm_store(WasmStore::new(&ENGINE).unwrap())
            .unwrap();
        let mut query_cursor = QueryCursor::new();

        // First, parse with the store that originally loaded the languages.
        // Then parse with a new parser and wasm store, so that the languages
        // are added one-by-one, in between parses.
        for mut parser in [parser, parser2] {
            for _ in 0..2 {
                let query_rust = Query::new(&language_rust, "(const_item) @foo").unwrap();
                let query_typescript =
                    Query::new(&language_typescript, "(class_declaration) @foo").unwrap();

                parser.set_language(&language_cpp).unwrap();
                let tree = parser.parse("A<B> c = d();", None).unwrap();
                assert_eq!(
                    tree.root_node().to_sexp(),
                    "(translation_unit (declaration type: (template_type name: (type_identifier) arguments: (template_argument_list (type_descriptor type: (type_identifier)))) declarator: (init_declarator declarator: (identifier) value: (call_expression function: (identifier) arguments: (argument_list)))))"
                );

                parser.set_language(&language_rust).unwrap();
                let source = "const A: B = c();";
                let tree = parser.parse(source, None).unwrap();
                assert_eq!(
                    tree.root_node().to_sexp(),
                    "(source_file (const_item name: (identifier) type: (type_identifier) value: (call_expression function: (identifier) arguments: (arguments))))"
                );
                assert_eq!(
                    query_cursor
                        .matches(&query_rust, tree.root_node(), source.as_bytes())
                        .count(),
                    1
                );

                parser.set_language(&language_ruby).unwrap();
                let tree = parser.parse("class A; end", None).unwrap();
                assert_eq!(
                    tree.root_node().to_sexp(),
                    "(program (class name: (constant)))"
                );

                parser.set_language(&language_typescript).unwrap();
                let tree = parser.parse("class A {}", None).unwrap();
                assert_eq!(
                    tree.root_node().to_sexp(),
                    "(program (class_declaration name: (type_identifier) body: (class_body)))"
                );
                assert_eq!(
                    query_cursor
                        .matches(&query_typescript, tree.root_node(), source.as_bytes())
                        .count(),
                    1
                );
            }
        }
    });
}

#[test]
fn test_load_and_reload_wasm_language() {
    allocations::record(|| {
        let mut store = WasmStore::new(&ENGINE).unwrap();

        let wasm_rust = fs::read(WASM_DIR.join("tree-sitter-rust.wasm")).unwrap();
        let wasm_typescript = fs::read(WASM_DIR.join("tree-sitter-typescript.wasm")).unwrap();

        let language_rust = store.load_language("rust", &wasm_rust).unwrap();
        let language_typescript = store.load_language("typescript", &wasm_typescript).unwrap();
        assert_eq!(store.language_count(), 2);

        // When a language is dropped, stores can release their instances of that language.
        drop(language_rust);
        assert_eq!(store.language_count(), 1);

        let language_rust = store.load_language("rust", &wasm_rust).unwrap();
        assert_eq!(store.language_count(), 2);

        drop(language_rust);
        drop(language_typescript);
        assert_eq!(store.language_count(), 0);
    });
}

#[test]
fn test_reset_wasm_store() {
    allocations::record(|| {
        let mut language_store = WasmStore::new(&ENGINE).unwrap();
        let wasm = fs::read(WASM_DIR.join("tree-sitter-rust.wasm")).unwrap();
        let language = language_store.load_language("rust", &wasm).unwrap();

        let mut parser = Parser::new();
        let parser_store = WasmStore::new(&ENGINE).unwrap();
        parser.set_wasm_store(parser_store).unwrap();
        parser.set_language(&language).unwrap();
        let tree = parser.parse("fn main() {}", None).unwrap();
        assert_eq!(tree.root_node().to_sexp(), "(source_file (function_item name: (identifier) parameters: (parameters) body: (block)))");

        let parser_store = WasmStore::new(&ENGINE).unwrap();
        parser.set_wasm_store(parser_store).unwrap();
        let tree = parser.parse("fn main() {}", None).unwrap();
        assert_eq!(tree.root_node().to_sexp(), "(source_file (function_item name: (identifier) parameters: (parameters) body: (block)))");
    });
}

#[test]
fn test_load_wasm_errors() {
    allocations::record(|| {
        let mut store = WasmStore::new(&ENGINE).unwrap();
        let wasm = fs::read(WASM_DIR.join("tree-sitter-rust.wasm")).unwrap();

        let bad_wasm = &wasm[1..];
        assert_eq!(
            store.load_language("rust", bad_wasm).unwrap_err(),
            WasmError {
                kind: WasmErrorKind::Parse,
                message: "failed to parse dylink section of wasm module".into(),
            }
        );

        assert_eq!(
            store.load_language("not_rust", &wasm).unwrap_err(),
            WasmError {
                kind: WasmErrorKind::Instantiate,
                message: "module did not contain language function: tree_sitter_not_rust".into(),
            }
        );

        let mut bad_wasm = wasm.clone();
        bad_wasm[300..500].iter_mut().for_each(|b| *b = 0);
        assert_eq!(
            store.load_language("rust", &bad_wasm).unwrap_err().kind,
            WasmErrorKind::Compile,
        );
    });
}

#[test]
fn test_wasm_oom() {
    allocations::record(|| {
        let mut store = WasmStore::new(&ENGINE).unwrap();
        let mut parser = Parser::new();
        let wasm = fs::read(WASM_DIR.join("tree-sitter-html.wasm")).unwrap();
        let language = store.load_language("html", &wasm).unwrap();
        parser.set_wasm_store(store).unwrap();
        parser.set_language(&language).unwrap();

        let tag_name = "a-b".repeat(2 * 1024 * 1024);
        let code = format!("<{tag_name}>hello world</{tag_name}>");
        assert!(parser.parse(&code, None).is_none());

        let tag_name = "a-b".repeat(20);
        let code = format!("<{tag_name}>hello world</{tag_name}>");
        parser.set_language(&language).unwrap();
        let tree = parser.parse(&code, None).unwrap();
        assert_eq!(
            tree.root_node().to_sexp(),
            "(document (element (start_tag (tag_name)) (text) (end_tag (tag_name))))"
        );
    });
}



================================================
FILE: crates/cli/src/tests/helpers/allocations.rs
================================================
use std::{
    collections::HashMap,
    os::raw::c_void,
    sync::{
        atomic::{AtomicBool, AtomicUsize, Ordering::SeqCst},
        Mutex,
    },
};

#[ctor::ctor]
unsafe fn initialize_allocation_recording() {
    tree_sitter::set_allocator(
        Some(ts_record_malloc),
        Some(ts_record_calloc),
        Some(ts_record_realloc),
        Some(ts_record_free),
    );
}

#[derive(Debug, PartialEq, Eq, Hash)]
struct Allocation(*const c_void);
unsafe impl Send for Allocation {}
unsafe impl Sync for Allocation {}

#[derive(Default)]
struct AllocationRecorder {
    enabled: AtomicBool,
    allocation_count: AtomicUsize,
    outstanding_allocations: Mutex<HashMap<Allocation, usize>>,
}

thread_local! {
    static RECORDER: AllocationRecorder = AllocationRecorder::default();
}

extern "C" {
    fn malloc(size: usize) -> *mut c_void;
    fn calloc(count: usize, size: usize) -> *mut c_void;
    fn realloc(ptr: *mut c_void, size: usize) -> *mut c_void;
    fn free(ptr: *mut c_void);
}

pub fn record<T>(f: impl FnOnce() -> T) -> T {
    RECORDER.with(|recorder| {
        recorder.enabled.store(true, SeqCst);
        recorder.allocation_count.store(0, SeqCst);
        recorder.outstanding_allocations.lock().unwrap().clear();
    });

    let value = f();

    let outstanding_allocation_indices = RECORDER.with(|recorder| {
        recorder.enabled.store(false, SeqCst);
        recorder.allocation_count.store(0, SeqCst);
        recorder
            .outstanding_allocations
            .lock()
            .unwrap()
            .drain()
            .map(|e| e.1)
            .collect::<Vec<_>>()
    });
    assert!(
        outstanding_allocation_indices.is_empty(),
        "Leaked allocation indices: {outstanding_allocation_indices:?}"
    );
    value
}

fn record_alloc(ptr: *mut c_void) {
    RECORDER.with(|recorder| {
        if recorder.enabled.load(SeqCst) {
            let count = recorder.allocation_count.fetch_add(1, SeqCst);
            recorder
                .outstanding_allocations
                .lock()
                .unwrap()
                .insert(Allocation(ptr), count);
        }
    });
}

fn record_dealloc(ptr: *mut c_void) {
    RECORDER.with(|recorder| {
        if recorder.enabled.load(SeqCst) {
            recorder
                .outstanding_allocations
                .lock()
                .unwrap()
                .remove(&Allocation(ptr));
        }
    });
}

unsafe extern "C" fn ts_record_malloc(size: usize) -> *mut c_void {
    let result = malloc(size);
    record_alloc(result);
    result
}

unsafe extern "C" fn ts_record_calloc(count: usize, size: usize) -> *mut c_void {
    let result = calloc(count, size);
    record_alloc(result);
    result
}

unsafe extern "C" fn ts_record_realloc(ptr: *mut c_void, size: usize) -> *mut c_void {
    let result = realloc(ptr, size);
    if ptr.is_null() {
        record_alloc(result);
    } else if !core::ptr::eq(ptr, result) {
        record_dealloc(ptr);
        record_alloc(result);
    }
    result
}

unsafe extern "C" fn ts_record_free(ptr: *mut c_void) {
    record_dealloc(ptr);
    free(ptr);
}



================================================
FILE: crates/cli/src/tests/helpers/dirs.rs
================================================
pub static ROOT_DIR: LazyLock<PathBuf> = LazyLock::new(|| {
    PathBuf::from(env!("CARGO_MANIFEST_DIR"))
        .parent()
        .unwrap()
        .parent()
        .unwrap()
        .to_owned()
});

pub static FIXTURES_DIR: LazyLock<PathBuf> =
    LazyLock::new(|| ROOT_DIR.join("test").join("fixtures"));

pub static HEADER_DIR: LazyLock<PathBuf> = LazyLock::new(|| ROOT_DIR.join("lib").join("include"));

pub static GRAMMARS_DIR: LazyLock<PathBuf> =
    LazyLock::new(|| ROOT_DIR.join("test").join("fixtures").join("grammars"));

pub static SCRATCH_BASE_DIR: LazyLock<PathBuf> = LazyLock::new(|| {
    let result = ROOT_DIR.join("target").join("scratch");
    fs::create_dir_all(&result).unwrap();
    result
});

#[cfg(feature = "wasm")]
pub static WASM_DIR: LazyLock<PathBuf> = LazyLock::new(|| ROOT_DIR.join("target").join("release"));

pub static SCRATCH_DIR: LazyLock<PathBuf> = LazyLock::new(|| {
    // https://doc.rust-lang.org/reference/conditional-compilation.html
    let vendor = if cfg!(target_vendor = "apple") {
        "apple"
    } else if cfg!(target_vendor = "fortanix") {
        "fortanix"
    } else if cfg!(target_vendor = "pc") {
        "pc"
    } else {
        "unknown"
    };
    let env = if cfg!(target_env = "gnu") {
        "gnu"
    } else if cfg!(target_env = "msvc") {
        "msvc"
    } else if cfg!(target_env = "musl") {
        "musl"
    } else if cfg!(target_env = "sgx") {
        "sgx"
    } else {
        "unknown"
    };
    let endian = if cfg!(target_endian = "little") {
        "little"
    } else if cfg!(target_endian = "big") {
        "big"
    } else {
        "unknown"
    };

    let machine = format!(
        "{}-{}-{vendor}-{env}-{endian}",
        std::env::consts::ARCH,
        std::env::consts::OS
    );
    let result = SCRATCH_BASE_DIR.join(machine);
    fs::create_dir_all(&result).unwrap();
    result
});



================================================
FILE: crates/cli/src/tests/helpers/edits.rs
================================================
use std::{ops::Range, str};

#[derive(Debug)]
pub struct ReadRecorder<'a> {
    content: &'a [u8],
    indices_read: Vec<usize>,
}

impl<'a> ReadRecorder<'a> {
    #[must_use]
    pub const fn new(content: &'a [u8]) -> Self {
        Self {
            content,
            indices_read: Vec::new(),
        }
    }

    pub fn read(&mut self, offset: usize) -> &'a [u8] {
        if offset < self.content.len() {
            if let Err(i) = self.indices_read.binary_search(&offset) {
                self.indices_read.insert(i, offset);
            }
            &self.content[offset..(offset + 1)]
        } else {
            &[]
        }
    }

    pub fn strings_read(&self) -> Vec<&'a str> {
        let mut result = Vec::new();
        let mut last_range = Option::<Range<usize>>::None;
        for index in &self.indices_read {
            if let Some(ref mut range) = &mut last_range {
                if range.end == *index {
                    range.end += 1;
                } else {
                    result.push(str::from_utf8(&self.content[range.clone()]).unwrap());
                    last_range = None;
                }
            } else {
                last_range = Some(*index..(*index + 1));
            }
        }
        if let Some(range) = last_range {
            result.push(str::from_utf8(&self.content[range]).unwrap());
        }
        result
    }
}



================================================
FILE: crates/cli/src/tests/helpers/fixtures.rs
================================================
use std::{
    env, fs,
    path::{Path, PathBuf},
    sync::LazyLock,
};

use anyhow::Context;
use tree_sitter::Language;
use tree_sitter_generate::{load_grammar_file, ALLOC_HEADER, ARRAY_HEADER};
use tree_sitter_highlight::HighlightConfiguration;
use tree_sitter_loader::{CompileConfig, Loader};
use tree_sitter_tags::TagsConfiguration;

use crate::tests::generate_parser;

include!("./dirs.rs");

static TEST_LOADER: LazyLock<Loader> = LazyLock::new(|| {
    let mut loader = Loader::with_parser_lib_path(SCRATCH_DIR.clone());
    if env::var("TREE_SITTER_GRAMMAR_DEBUG").is_ok() {
        loader.debug_build(true);
    }
    loader
});

pub fn test_loader() -> &'static Loader {
    &TEST_LOADER
}

pub fn fixtures_dir() -> &'static Path {
    &FIXTURES_DIR
}

pub fn scratch_dir() -> &'static Path {
    &SCRATCH_DIR
}

pub fn get_language(name: &str) -> Language {
    let src_dir = GRAMMARS_DIR.join(name).join("src");
    let mut config = CompileConfig::new(&src_dir, None, None);
    config.header_paths.push(&HEADER_DIR);
    TEST_LOADER.load_language_at_path(config).unwrap()
}

pub fn get_test_fixture_language(name: &str) -> Language {
    let grammar_dir_path = fixtures_dir().join("test_grammars").join(name);
    let grammar_json = load_grammar_file(&grammar_dir_path.join("grammar.js"), None).unwrap();
    let (parser_name, parser_code) = generate_parser(&grammar_json).unwrap();
    get_test_language(&parser_name, &parser_code, Some(&grammar_dir_path))
}

pub fn get_language_queries_path(language_name: &str) -> PathBuf {
    GRAMMARS_DIR.join(language_name).join("queries")
}

pub fn get_highlight_config(
    language_name: &str,
    injection_query_filename: Option<&str>,
    highlight_names: &[String],
) -> HighlightConfiguration {
    let language = get_language(language_name);
    let queries_path = get_language_queries_path(language_name);
    let highlights_query = fs::read_to_string(queries_path.join("highlights.scm")).unwrap();
    let injections_query =
        injection_query_filename.map_or_else(String::new, |injection_query_filename| {
            fs::read_to_string(queries_path.join(injection_query_filename)).unwrap()
        });
    let locals_query = fs::read_to_string(queries_path.join("locals.scm")).unwrap_or_default();
    let mut result = HighlightConfiguration::new(
        language,
        language_name,
        &highlights_query,
        &injections_query,
        &locals_query,
    )
    .unwrap();
    result.configure(highlight_names);
    result
}

pub fn get_tags_config(language_name: &str) -> TagsConfiguration {
    let language = get_language(language_name);
    let queries_path = get_language_queries_path(language_name);
    let tags_query = fs::read_to_string(queries_path.join("tags.scm")).unwrap();
    let locals_query = fs::read_to_string(queries_path.join("locals.scm")).unwrap_or_default();
    TagsConfiguration::new(language, &tags_query, &locals_query).unwrap()
}

pub fn get_test_language(name: &str, parser_code: &str, path: Option<&Path>) -> Language {
    let src_dir = scratch_dir().join("src").join(name);
    fs::create_dir_all(&src_dir).unwrap();

    let parser_path = src_dir.join("parser.c");
    if !fs::read_to_string(&parser_path).is_ok_and(|content| content == parser_code) {
        fs::write(&parser_path, parser_code).unwrap();
    }

    let scanner_path = if let Some(path) = path {
        let scanner_path = path.join("scanner.c");
        if scanner_path.exists() {
            let scanner_code = fs::read_to_string(&scanner_path).unwrap();
            let scanner_copy_path = src_dir.join("scanner.c");
            if !fs::read_to_string(&scanner_copy_path).is_ok_and(|content| content == scanner_code)
            {
                fs::write(&scanner_copy_path, scanner_code).unwrap();
            }
            Some(scanner_copy_path)
        } else {
            None
        }
    } else {
        None
    };

    let header_path = src_dir.join("tree_sitter");
    fs::create_dir_all(&header_path).unwrap();

    for (file, content) in [
        ("alloc.h", ALLOC_HEADER),
        ("array.h", ARRAY_HEADER),
        ("parser.h", tree_sitter::PARSER_HEADER),
    ] {
        let file = header_path.join(file);
        fs::write(&file, content)
            .with_context(|| format!("Failed to write {:?}", file.file_name().unwrap()))
            .unwrap();
    }

    let paths_to_check = if let Some(scanner_path) = &scanner_path {
        vec![parser_path, scanner_path.clone()]
    } else {
        vec![parser_path]
    };

    let mut config = CompileConfig::new(&src_dir, Some(&paths_to_check), None);
    config.header_paths = vec![&HEADER_DIR];
    config.name = name.to_string();

    TEST_LOADER.load_language_at_path_with_name(config).unwrap()
}



================================================
FILE: crates/cli/src/tests/helpers/query_helpers.rs
================================================
use std::{cmp::Ordering, fmt::Write, ops::Range};

use rand::prelude::Rng;
use streaming_iterator::{IntoStreamingIterator, StreamingIterator};
use tree_sitter::{
    Language, Node, Parser, Point, Query, QueryCapture, QueryCursor, QueryMatch, Tree, TreeCursor,
};

#[derive(Debug)]
pub struct Pattern {
    kind: Option<&'static str>,
    named: bool,
    field: Option<&'static str>,
    capture: Option<String>,
    children: Vec<Pattern>,
}

#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Match<'a, 'tree> {
    pub captures: Vec<(&'a str, Node<'tree>)>,
    pub last_node: Option<Node<'tree>>,
}

const CAPTURE_NAMES: &[&str] = &[
    "one", "two", "three", "four", "five", "six", "seven", "eight",
];

impl Pattern {
    pub fn random_pattern_in_tree(tree: &Tree, rng: &mut impl Rng) -> (Self, Range<Point>) {
        let mut cursor = tree.walk();

        // Descend to the node at a random byte offset and depth.
        let mut max_depth = 0;
        let byte_offset = rng.gen_range(0..cursor.node().end_byte());
        while cursor.goto_first_child_for_byte(byte_offset).is_some() {
            max_depth += 1;
        }
        let depth = rng.gen_range(0..=max_depth);
        for _ in 0..depth {
            cursor.goto_parent();
        }

        // Build a pattern that matches that node.
        // Sometimes include subsequent siblings of the node.
        let pattern_start = cursor.node().start_position();
        let mut roots = vec![Self::random_pattern_for_node(&mut cursor, rng)];
        while roots.len() < 5 && cursor.goto_next_sibling() {
            if rng.gen_bool(0.2) {
                roots.push(Self::random_pattern_for_node(&mut cursor, rng));
            }
        }
        let pattern_end = cursor.node().end_position();

        let mut pattern = Self {
            kind: None,
            named: true,
            field: None,
            capture: None,
            children: roots,
        };

        if pattern.children.len() == 1 ||
        // In a parenthesized list of sibling patterns, the first
        // sibling can't be an anonymous `_` wildcard.
        (pattern.children[0].kind == Some("_") && !pattern.children[0].named)
        {
            pattern = pattern.children.pop().unwrap();
        }
        // In a parenthesized list of sibling patterns, the first
        // sibling can't have a field name.
        else {
            pattern.children[0].field = None;
        }

        (pattern, pattern_start..pattern_end)
    }

    fn random_pattern_for_node(cursor: &mut TreeCursor, rng: &mut impl Rng) -> Self {
        let node = cursor.node();

        // Sometimes specify the node's type, sometimes use a wildcard.
        let (kind, named) = if rng.gen_bool(0.9) {
            (Some(node.kind()), node.is_named())
        } else {
            (Some("_"), node.is_named() && rng.gen_bool(0.8))
        };

        // Sometimes specify the node's field.
        let field = if rng.gen_bool(0.75) {
            cursor.field_name()
        } else {
            None
        };

        // Sometimes capture the node.
        let capture = if rng.gen_bool(0.7) {
            Some(CAPTURE_NAMES[rng.gen_range(0..CAPTURE_NAMES.len())].to_string())
        } else {
            None
        };

        // Walk the children and include child patterns for some of them.
        let mut children = Vec::new();
        if named && cursor.goto_first_child() {
            let max_children = rng.gen_range(0..4);
            while cursor.goto_next_sibling() {
                if rng.gen_bool(0.6) {
                    let child_ast = Self::random_pattern_for_node(cursor, rng);
                    children.push(child_ast);
                    if children.len() >= max_children {
                        break;
                    }
                }
            }
            cursor.goto_parent();
        }

        Self {
            kind,
            named,
            field,
            capture,
            children,
        }
    }

    fn write_to_string(&self, string: &mut String, indent: usize) {
        if let Some(field) = self.field {
            write!(string, "{field}: ").unwrap();
        }

        if self.named {
            string.push('(');
            let mut has_contents = if let Some(kind) = &self.kind {
                write!(string, "{kind}").unwrap();
                true
            } else {
                false
            };
            for child in &self.children {
                let indent = indent + 2;
                if has_contents {
                    string.push('\n');
                    string.push_str(&" ".repeat(indent));
                }
                child.write_to_string(string, indent);
                has_contents = true;
            }
            string.push(')');
        } else if self.kind == Some("_") {
            string.push('_');
        } else {
            write!(string, "\"{}\"", self.kind.unwrap().replace('\"', "\\\"")).unwrap();
        }

        if let Some(capture) = &self.capture {
            write!(string, " @{capture}").unwrap();
        }
    }

    pub fn matches_in_tree<'tree>(&self, tree: &'tree Tree) -> Vec<Match<'_, 'tree>> {
        let mut matches = Vec::new();

        // Compute the matches naively: walk the tree and
        // retry the entire pattern for each node.
        let mut cursor = tree.walk();
        let mut ascending = false;
        loop {
            if ascending {
                if cursor.goto_next_sibling() {
                    ascending = false;
                } else if !cursor.goto_parent() {
                    break;
                }
            } else {
                let matches_here = self.match_node(&mut cursor);
                matches.extend_from_slice(&matches_here);
                if !cursor.goto_first_child() {
                    ascending = true;
                }
            }
        }

        matches.sort_unstable();
        matches.iter_mut().for_each(|m| m.last_node = None);
        matches.dedup();
        matches
    }

    pub fn match_node<'tree>(&self, cursor: &mut TreeCursor<'tree>) -> Vec<Match<'_, 'tree>> {
        let node = cursor.node();

        // If a kind is specified, check that it matches the node.
        if let Some(kind) = self.kind {
            if kind == "_" {
                if self.named && !node.is_named() {
                    return Vec::new();
                }
            } else if kind != node.kind() || self.named != node.is_named() {
                return Vec::new();
            }
        }

        // If a field is specified, check that it matches the node.
        if let Some(field) = self.field {
            if cursor.field_name() != Some(field) {
                return Vec::new();
            }
        }

        // Create a match for the current node.
        let mat = Match {
            captures: self
                .capture
                .as_ref()
                .map_or_else(Vec::new, |name| vec![(name.as_str(), node)]),
            last_node: Some(node),
        };

        // If there are no child patterns to match, then return this single match.
        if self.children.is_empty() {
            return vec![mat];
        }

        // Find every matching combination of child patterns and child nodes.
        let mut finished_matches = Vec::<Match>::new();
        if cursor.goto_first_child() {
            let mut match_states = vec![(0, mat)];
            loop {
                let mut new_match_states = Vec::new();
                for (pattern_index, mat) in &match_states {
                    let child_pattern = &self.children[*pattern_index];
                    let child_matches = child_pattern.match_node(cursor);
                    for child_match in child_matches {
                        let mut combined_match = mat.clone();
                        combined_match.last_node = child_match.last_node;
                        combined_match
                            .captures
                            .extend_from_slice(&child_match.captures);
                        if pattern_index + 1 < self.children.len() {
                            new_match_states.push((*pattern_index + 1, combined_match));
                        } else {
                            let mut existing = false;
                            for existing_match in &mut finished_matches {
                                if existing_match.captures == combined_match.captures {
                                    if child_pattern.capture.is_some() {
                                        existing_match.last_node = combined_match.last_node;
                                    }
                                    existing = true;
                                }
                            }
                            if !existing {
                                finished_matches.push(combined_match);
                            }
                        }
                    }
                }
                match_states.extend_from_slice(&new_match_states);
                if !cursor.goto_next_sibling() {
                    break;
                }
            }
            cursor.goto_parent();
        }
        finished_matches
    }
}

impl std::fmt::Display for Pattern {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        let mut result = String::new();
        self.write_to_string(&mut result, 0);
        write!(f, "{result}")
    }
}

impl PartialOrd for Match<'_, '_> {
    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
        Some(self.cmp(other))
    }
}

impl Ord for Match<'_, '_> {
    // Tree-sitter returns matches in the order that they terminate
    // during a depth-first walk of the tree. If multiple matches
    // terminate on the same node, those matches are produced in the
    // order that their captures were discovered.
    fn cmp(&self, other: &Self) -> Ordering {
        if let Some((last_node_a, last_node_b)) = self.last_node.zip(other.last_node) {
            let cmp = compare_depth_first(last_node_a, last_node_b);
            if cmp.is_ne() {
                return cmp;
            }
        }

        for (a, b) in self.captures.iter().zip(other.captures.iter()) {
            let cmp = compare_depth_first(a.1, b.1);
            if !cmp.is_eq() {
                return cmp;
            }
        }

        self.captures.len().cmp(&other.captures.len())
    }
}

fn compare_depth_first(a: Node, b: Node) -> Ordering {
    let a = a.byte_range();
    let b = b.byte_range();
    a.start.cmp(&b.start).then_with(|| b.end.cmp(&a.end))
}

pub fn assert_query_matches(
    language: &Language,
    query: &Query,
    source: &str,
    expected: &[(usize, Vec<(&str, &str)>)],
) {
    let mut parser = Parser::new();
    parser.set_language(language).unwrap();
    let tree = parser.parse(source, None).unwrap();
    let mut cursor = QueryCursor::new();
    let matches = cursor.matches(query, tree.root_node(), source.as_bytes());
    pretty_assertions::assert_eq!(expected, collect_matches(matches, query, source));
    pretty_assertions::assert_eq!(false, cursor.did_exceed_match_limit());
}

pub fn collect_matches<'a>(
    mut matches: impl StreamingIterator<Item = QueryMatch<'a, 'a>>,
    query: &'a Query,
    source: &'a str,
) -> Vec<(usize, Vec<(&'a str, &'a str)>)> {
    let mut result = Vec::new();
    while let Some(m) = matches.next() {
        result.push((
            m.pattern_index,
            format_captures(m.captures.iter().into_streaming_iter_ref(), query, source),
        ));
    }
    result
}

pub fn collect_captures<'a>(
    captures: impl StreamingIterator<Item = (QueryMatch<'a, 'a>, usize)>,
    query: &'a Query,
    source: &'a str,
) -> Vec<(&'a str, &'a str)> {
    format_captures(captures.map(|(m, i)| m.captures[*i]), query, source)
}

fn format_captures<'a>(
    mut captures: impl StreamingIterator<Item = QueryCapture<'a>>,
    query: &'a Query,
    source: &'a str,
) -> Vec<(&'a str, &'a str)> {
    let mut result = Vec::new();
    while let Some(capture) = captures.next() {
        result.push((
            query.capture_names()[capture.index as usize],
            capture.node.utf8_text(source.as_bytes()).unwrap(),
        ));
    }
    result
}



================================================
FILE: crates/cli/src/tests/proc_macro/Cargo.toml
================================================
[package]
name = "tree-sitter-tests-proc-macro"
version = "0.0.0"
edition.workspace = true
rust-version.workspace = true
publish = false

[lints]
workspace = true

[lib]
proc-macro = true

[dependencies]
proc-macro2 = "1.0.93"
quote = "1.0.38"
rand = "0.8.5"
syn = { version = "2.0.96", features = ["full"] }



================================================
FILE: crates/cli/src/tests/proc_macro/src/lib.rs
================================================
use proc_macro::TokenStream;
use proc_macro2::Span;
use quote::quote;
use syn::{
    parse::{Parse, ParseStream},
    parse_macro_input, Error, Expr, Ident, ItemFn, LitInt, Token,
};

#[proc_macro_attribute]
pub fn retry(args: TokenStream, input: TokenStream) -> TokenStream {
    let count = parse_macro_input!(args as LitInt);
    let input = parse_macro_input!(input as ItemFn);
    let attrs = &input.attrs;
    let name = &input.sig.ident;

    TokenStream::from(quote! {
        #(#attrs),*
        fn #name() {
            #input

            for i in 0..=#count {
                let result = std::panic::catch_unwind(|| {
                    #name();
                });

                if result.is_ok() {
                    return;
                }

                if i == #count {
                    std::panic::resume_unwind(result.unwrap_err());
                }
            }
        }
    })
}

#[proc_macro_attribute]
pub fn test_with_seed(args: TokenStream, input: TokenStream) -> TokenStream {
    struct Args {
        retry: LitInt,
        seed: Expr,
        seed_fn: Option<Ident>,
    }

    impl Parse for Args {
        fn parse(input: ParseStream) -> syn::Result<Self> {
            let mut retry = None;
            let mut seed = None;
            let mut seed_fn = None;

            while !input.is_empty() {
                let name = input.parse::<Ident>()?;
                match name.to_string().as_str() {
                    "retry" => {
                        input.parse::<Token![=]>()?;
                        retry.replace(input.parse()?);
                    }
                    "seed" => {
                        input.parse::<Token![=]>()?;
                        seed.replace(input.parse()?);
                    }
                    "seed_fn" => {
                        input.parse::<Token![=]>()?;
                        seed_fn.replace(input.parse()?);
                    }
                    x => {
                        return Err(Error::new(
                            name.span(),
                            format!("Unsupported parameter `{x}`"),
                        ))
                    }
                }

                if !input.is_empty() {
                    input.parse::<Token![,]>()?;
                }
            }

            if retry.is_none() {
                retry.replace(LitInt::new("0", Span::mixed_site()));
            }

            Ok(Self {
                retry: retry.expect("`retry` parameter is required"),
                seed: seed.expect("`seed` parameter is required"),
                seed_fn,
            })
        }
    }

    let Args {
        retry,
        seed,
        seed_fn,
    } = parse_macro_input!(args as Args);

    let seed_fn = seed_fn.iter();

    let func = parse_macro_input!(input as ItemFn);
    let attrs = &func.attrs;
    let name = &func.sig.ident;

    TokenStream::from(quote! {
        #[test]
        #(#attrs),*
        fn #name() {
            #func

            let mut seed = #seed;

            for i in 0..=#retry {
                let result = std::panic::catch_unwind(|| {
                    #name(seed);
                });

                if result.is_ok() {
                    return;
                }

                if i == #retry {
                    std::panic::resume_unwind(result.unwrap_err());
                }

                #(
                    seed = #seed_fn();
                )*

                if i < #retry {
                    println!("\nRetry {}/{} with a new seed {}", i + 1, #retry, seed);
                }
            }
        }
    })
}



================================================
FILE: crates/config/README.md
================================================
# Tree-sitter Config

Manages Tree-sitter's configuration file.

You can use a configuration file to control the behavior of the `tree-sitter`
command-line program. This crate implements the logic for finding and the
parsing the contents of the configuration file.



================================================
FILE: crates/config/Cargo.toml
================================================
[package]
name = "tree-sitter-config"
version.workspace = true
description = "User configuration of tree-sitter's command line programs"
authors.workspace = true
edition.workspace = true
rust-version.workspace = true
readme = "README.md"
homepage.workspace = true
repository.workspace = true
documentation = "https://docs.rs/tree-sitter-config"
license.workspace = true
keywords.workspace = true
categories.workspace = true

[lib]
path = "src/tree_sitter_config.rs"

[lints]
workspace = true

[dependencies]
anyhow.workspace = true
etcetera.workspace = true
serde.workspace = true
serde_json.workspace = true



================================================
FILE: crates/config/src/tree_sitter_config.rs
================================================
#![doc = include_str!("../README.md")]

use std::{env, fs, path::PathBuf};

use anyhow::{Context, Result};
use etcetera::BaseStrategy as _;
use serde::{Deserialize, Serialize};
use serde_json::Value;

/// Holds the contents of tree-sitter's configuration file.
///
/// The file typically lives at `~/.config/tree-sitter/config.json`, but see the [`Config::load`][]
/// method for the full details on where it might be located.
///
/// This type holds the generic JSON content of the configuration file.  Individual tree-sitter
/// components will use the [`Config::get`][] method to parse that JSON to extract configuration
/// fields that are specific to that component.
#[derive(Debug)]
pub struct Config {
    pub location: PathBuf,
    pub config: Value,
}

impl Config {
    pub fn find_config_file() -> Result<Option<PathBuf>> {
        if let Ok(path) = env::var("TREE_SITTER_DIR") {
            let mut path = PathBuf::from(path);
            path.push("config.json");
            if !path.exists() {
                return Ok(None);
            }
            if path.is_file() {
                return Ok(Some(path));
            }
        }

        let xdg_path = Self::xdg_config_file()?;
        if xdg_path.is_file() {
            return Ok(Some(xdg_path));
        }

        if cfg!(target_os = "macos") {
            let legacy_apple_path = etcetera::base_strategy::Apple::new()?
                .data_dir() // `$HOME/Library/Application Support/`
                .join("tree-sitter")
                .join("config.json");
            if legacy_apple_path.is_file() {
                fs::create_dir_all(xdg_path.parent().unwrap())?;
                fs::rename(&legacy_apple_path, &xdg_path)?;
                println!(
                    "Warning: your config.json file has been automatically migrated from \"{}\" to \"{}\"",
                    legacy_apple_path.display(),
                    xdg_path.display()
                );
                return Ok(Some(xdg_path));
            }
        }

        let legacy_path = etcetera::home_dir()?
            .join(".tree-sitter")
            .join("config.json");
        if legacy_path.is_file() {
            return Ok(Some(legacy_path));
        }

        Ok(None)
    }

    fn xdg_config_file() -> Result<PathBuf> {
        let xdg_path = etcetera::choose_base_strategy()?
            .config_dir()
            .join("tree-sitter")
            .join("config.json");
        Ok(xdg_path)
    }

    /// Locates and loads in the user's configuration file.  We search for the configuration file
    /// in the following locations, in order:
    ///
    ///   - Location specified by the path parameter if provided
    ///   - `$TREE_SITTER_DIR/config.json`, if the `TREE_SITTER_DIR` environment variable is set
    ///   - `tree-sitter/config.json` in your default user configuration directory, as determined by
    ///     [`etcetera::choose_base_strategy`](https://docs.rs/etcetera/*/etcetera/#basestrategy)
    ///   - `$HOME/.tree-sitter/config.json` as a fallback from where tree-sitter _used_ to store
    ///     its configuration
    pub fn load(path: Option<PathBuf>) -> Result<Self> {
        let location = if let Some(path) = path {
            path
        } else if let Some(path) = Self::find_config_file()? {
            path
        } else {
            return Self::initial();
        };

        let content = fs::read_to_string(&location)
            .with_context(|| format!("Failed to read {}", location.to_string_lossy()))?;
        let config = serde_json::from_str(&content)
            .with_context(|| format!("Bad JSON config {}", location.to_string_lossy()))?;
        Ok(Self { location, config })
    }

    /// Creates an empty initial configuration file.  You can then use the [`Config::add`][] method
    /// to add the component-specific configuration types for any components that want to add
    /// content to the default file, and then use [`Config::save`][] to write the configuration to
    /// disk.
    ///
    /// (Note that this is typically only done by the `tree-sitter init-config` command.)
    pub fn initial() -> Result<Self> {
        let location = if let Ok(path) = env::var("TREE_SITTER_DIR") {
            let mut path = PathBuf::from(path);
            path.push("config.json");
            path
        } else {
            Self::xdg_config_file()?
        };
        let config = serde_json::json!({});
        Ok(Self { location, config })
    }

    /// Saves this configuration to the file that it was originally loaded from.
    pub fn save(&self) -> Result<()> {
        let json = serde_json::to_string_pretty(&self.config)?;
        fs::create_dir_all(self.location.parent().unwrap())?;
        fs::write(&self.location, json)?;
        Ok(())
    }

    /// Parses a component-specific configuration from the configuration file.  The type `C` must
    /// be [deserializable](https://docs.rs/serde/*/serde/trait.Deserialize.html) from a JSON
    /// object, and must only include the fields relevant to that component.
    pub fn get<C>(&self) -> Result<C>
    where
        C: for<'de> Deserialize<'de>,
    {
        let config = serde_json::from_value(self.config.clone())?;
        Ok(config)
    }

    /// Adds a component-specific configuration to the configuration file.  The type `C` must be
    /// [serializable](https://docs.rs/serde/*/serde/trait.Serialize.html) into a JSON object, and
    /// must only include the fields relevant to that component.
    pub fn add<C>(&mut self, config: C) -> Result<()>
    where
        C: Serialize,
    {
        let mut config = serde_json::to_value(&config)?;
        self.config
            .as_object_mut()
            .unwrap()
            .append(config.as_object_mut().unwrap());
        Ok(())
    }
}



================================================
FILE: crates/generate/README.md
================================================
# Tree-sitter Generate

This helper crate implements the logic for the `tree-sitter generate` command,
and can be used by external tools to generate a parser from a grammar file.



================================================
FILE: crates/generate/Cargo.toml
================================================
[package]
name = "tree-sitter-generate"
version.workspace = true
description = "Library for generating C source code from a tree-sitter grammar"
authors.workspace = true
edition.workspace = true
rust-version.workspace = true
readme = "README.md"
homepage.workspace = true
repository.workspace = true
documentation = "https://docs.rs/tree-sitter-generate"
license.workspace = true
keywords.workspace = true
categories.workspace = true

[lib]
path = "src/generate.rs"

[lints]
workspace = true

[dependencies]
anyhow.workspace = true
heck.workspace = true
indexmap.workspace = true
indoc.workspace = true
log.workspace = true
regex.workspace = true
regex-syntax.workspace = true
rustc-hash.workspace = true
semver.workspace = true
serde.workspace = true
serde_json.workspace = true
smallbitvec.workspace = true
thiserror.workspace = true
topological-sort.workspace = true

tree-sitter.workspace = true

[target.'cfg(windows)'.dependencies]
url.workspace = true



================================================
FILE: crates/generate/src/build_tables.rs
================================================
mod build_lex_table;
mod build_parse_table;
mod coincident_tokens;
mod item;
mod item_set_builder;
mod minimize_parse_table;
mod token_conflicts;

use std::collections::{BTreeSet, HashMap};

pub use build_lex_table::LARGE_CHARACTER_RANGE_COUNT;
use build_parse_table::BuildTableResult;
pub use build_parse_table::ParseTableBuilderError;
use log::info;

use self::{
    build_lex_table::build_lex_table,
    build_parse_table::{build_parse_table, ParseStateInfo},
    coincident_tokens::CoincidentTokenIndex,
    item_set_builder::ParseItemSetBuilder,
    minimize_parse_table::minimize_parse_table,
    token_conflicts::TokenConflictMap,
};
use crate::{
    grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar},
    nfa::{CharacterSet, NfaCursor},
    node_types::VariableInfo,
    rules::{AliasMap, Symbol, SymbolType, TokenSet},
    tables::{LexTable, ParseAction, ParseTable, ParseTableEntry},
};

pub struct Tables {
    pub parse_table: ParseTable,
    pub main_lex_table: LexTable,
    pub keyword_lex_table: LexTable,
    pub large_character_sets: Vec<(Option<Symbol>, CharacterSet)>,
}

pub fn build_tables(
    syntax_grammar: &SyntaxGrammar,
    lexical_grammar: &LexicalGrammar,
    simple_aliases: &AliasMap,
    variable_info: &[VariableInfo],
    inlines: &InlinedProductionMap,
    report_symbol_name: Option<&str>,
) -> BuildTableResult<Tables> {
    let item_set_builder = ParseItemSetBuilder::new(syntax_grammar, lexical_grammar, inlines);
    let following_tokens =
        get_following_tokens(syntax_grammar, lexical_grammar, inlines, &item_set_builder);
    let (mut parse_table, parse_state_info) = build_parse_table(
        syntax_grammar,
        lexical_grammar,
        item_set_builder,
        variable_info,
    )?;
    let token_conflict_map = TokenConflictMap::new(lexical_grammar, following_tokens);
    let coincident_token_index = CoincidentTokenIndex::new(&parse_table, lexical_grammar);
    let keywords = identify_keywords(
        lexical_grammar,
        &parse_table,
        syntax_grammar.word_token,
        &token_conflict_map,
        &coincident_token_index,
    );
    populate_error_state(
        &mut parse_table,
        syntax_grammar,
        lexical_grammar,
        &coincident_token_index,
        &token_conflict_map,
        &keywords,
    );
    populate_used_symbols(&mut parse_table, syntax_grammar, lexical_grammar);
    minimize_parse_table(
        &mut parse_table,
        syntax_grammar,
        lexical_grammar,
        simple_aliases,
        &token_conflict_map,
        &keywords,
    );
    let lex_tables = build_lex_table(
        &mut parse_table,
        syntax_grammar,
        lexical_grammar,
        &keywords,
        &coincident_token_index,
        &token_conflict_map,
    );
    populate_external_lex_states(&mut parse_table, syntax_grammar);
    mark_fragile_tokens(&mut parse_table, lexical_grammar, &token_conflict_map);

    if let Some(report_symbol_name) = report_symbol_name {
        report_state_info(
            syntax_grammar,
            lexical_grammar,
            &parse_table,
            &parse_state_info,
            report_symbol_name,
        );
    }

    Ok(Tables {
        parse_table,
        main_lex_table: lex_tables.main_lex_table,
        keyword_lex_table: lex_tables.keyword_lex_table,
        large_character_sets: lex_tables.large_character_sets,
    })
}

fn get_following_tokens(
    syntax_grammar: &SyntaxGrammar,
    lexical_grammar: &LexicalGrammar,
    inlines: &InlinedProductionMap,
    builder: &ParseItemSetBuilder,
) -> Vec<TokenSet> {
    let mut result = vec![TokenSet::new(); lexical_grammar.variables.len()];
    let productions = syntax_grammar
        .variables
        .iter()
        .flat_map(|v| &v.productions)
        .chain(&inlines.productions);
    let all_tokens = (0..result.len())
        .map(Symbol::terminal)
        .collect::<TokenSet>();
    for production in productions {
        for i in 1..production.steps.len() {
            let left_tokens = builder.last_set(&production.steps[i - 1].symbol);
            let right_tokens = builder.first_set(&production.steps[i].symbol);
            let right_reserved_tokens = builder.reserved_first_set(&production.steps[i].symbol);
            for left_token in left_tokens.iter() {
                if left_token.is_terminal() {
                    result[left_token.index].insert_all_terminals(right_tokens);
                    if let Some(reserved_tokens) = right_reserved_tokens {
                        result[left_token.index].insert_all_terminals(reserved_tokens);
                    }
                }
            }
        }
    }
    for extra in &syntax_grammar.extra_symbols {
        if extra.is_terminal() {
            for entry in &mut result {
                entry.insert(*extra);
            }
            result[extra.index] = all_tokens.clone();
        }
    }
    result
}

fn populate_error_state(
    parse_table: &mut ParseTable,
    syntax_grammar: &SyntaxGrammar,
    lexical_grammar: &LexicalGrammar,
    coincident_token_index: &CoincidentTokenIndex,
    token_conflict_map: &TokenConflictMap,
    keywords: &TokenSet,
) {
    let state = &mut parse_table.states[0];
    let n = lexical_grammar.variables.len();

    // First identify the *conflict-free tokens*: tokens that do not overlap with
    // any other token in any way, besides matching exactly the same string.
    let conflict_free_tokens = (0..n)
        .filter_map(|i| {
            let conflicts_with_other_tokens = (0..n).any(|j| {
                j != i
                    && !coincident_token_index.contains(Symbol::terminal(i), Symbol::terminal(j))
                    && token_conflict_map.does_match_shorter_or_longer(i, j)
            });
            if conflicts_with_other_tokens {
                None
            } else {
                info!(
                    "error recovery - token {} has no conflicts",
                    lexical_grammar.variables[i].name
                );
                Some(Symbol::terminal(i))
            }
        })
        .collect::<TokenSet>();

    let recover_entry = ParseTableEntry {
        reusable: false,
        actions: vec![ParseAction::Recover],
    };

    // Exclude from the error-recovery state any token that conflicts with one of
    // the *conflict-free tokens* identified above.
    for i in 0..n {
        let symbol = Symbol::terminal(i);
        if !conflict_free_tokens.contains(&symbol)
            && !keywords.contains(&symbol)
            && syntax_grammar.word_token != Some(symbol)
        {
            if let Some(t) = conflict_free_tokens.iter().find(|t| {
                !coincident_token_index.contains(symbol, *t)
                    && token_conflict_map.does_conflict(symbol.index, t.index)
            }) {
                info!(
                    "error recovery - exclude token {} because of conflict with {}",
                    lexical_grammar.variables[i].name, lexical_grammar.variables[t.index].name
                );
                continue;
            }
        }
        info!(
            "error recovery - include token {}",
            lexical_grammar.variables[i].name
        );
        state
            .terminal_entries
            .entry(symbol)
            .or_insert_with(|| recover_entry.clone());
    }

    for (i, external_token) in syntax_grammar.external_tokens.iter().enumerate() {
        if external_token.corresponding_internal_token.is_none() {
            state
                .terminal_entries
                .entry(Symbol::external(i))
                .or_insert_with(|| recover_entry.clone());
        }
    }

    state.terminal_entries.insert(Symbol::end(), recover_entry);
}

fn populate_used_symbols(
    parse_table: &mut ParseTable,
    syntax_grammar: &SyntaxGrammar,
    lexical_grammar: &LexicalGrammar,
) {
    let mut terminal_usages = vec![false; lexical_grammar.variables.len()];
    let mut non_terminal_usages = vec![false; syntax_grammar.variables.len()];
    let mut external_usages = vec![false; syntax_grammar.external_tokens.len()];
    for state in &parse_table.states {
        for symbol in state.terminal_entries.keys() {
            match symbol.kind {
                SymbolType::Terminal => terminal_usages[symbol.index] = true,
                SymbolType::External => external_usages[symbol.index] = true,
                _ => {}
            }
        }
        for symbol in state.nonterminal_entries.keys() {
            non_terminal_usages[symbol.index] = true;
        }
    }
    parse_table.symbols.push(Symbol::end());
    for (i, value) in terminal_usages.into_iter().enumerate() {
        if value {
            // Assign the grammar's word token a low numerical index. This ensures that
            // it can be stored in a subtree with no heap allocations, even for grammars with
            // very large numbers of tokens. This is an optimization, but it's also important to
            // ensure that a subtree's symbol can be successfully reassigned to the word token
            // without having to move the subtree to the heap.
            // See https://github.com/tree-sitter/tree-sitter/issues/258
            if syntax_grammar.word_token.is_some_and(|t| t.index == i) {
                parse_table.symbols.insert(1, Symbol::terminal(i));
            } else {
                parse_table.symbols.push(Symbol::terminal(i));
            }
        }
    }
    for (i, value) in external_usages.into_iter().enumerate() {
        if value {
            parse_table.symbols.push(Symbol::external(i));
        }
    }
    for (i, value) in non_terminal_usages.into_iter().enumerate() {
        if value {
            parse_table.symbols.push(Symbol::non_terminal(i));
        }
    }
}

fn populate_external_lex_states(parse_table: &mut ParseTable, syntax_grammar: &SyntaxGrammar) {
    let mut external_tokens_by_corresponding_internal_token = HashMap::new();
    for (i, external_token) in syntax_grammar.external_tokens.iter().enumerate() {
        if let Some(symbol) = external_token.corresponding_internal_token {
            external_tokens_by_corresponding_internal_token.insert(symbol.index, i);
        }
    }

    // Ensure that external lex state 0 represents the absence of any
    // external tokens.
    parse_table.external_lex_states.push(TokenSet::new());

    for i in 0..parse_table.states.len() {
        let mut external_tokens = TokenSet::new();
        for token in parse_table.states[i].terminal_entries.keys() {
            if token.is_external() {
                external_tokens.insert(*token);
            } else if token.is_terminal() {
                if let Some(index) =
                    external_tokens_by_corresponding_internal_token.get(&token.index)
                {
                    external_tokens.insert(Symbol::external(*index));
                }
            }
        }

        parse_table.states[i].external_lex_state_id = parse_table
            .external_lex_states
            .iter()
            .position(|tokens| *tokens == external_tokens)
            .unwrap_or_else(|| {
                parse_table.external_lex_states.push(external_tokens);
                parse_table.external_lex_states.len() - 1
            });
    }
}

fn identify_keywords(
    lexical_grammar: &LexicalGrammar,
    parse_table: &ParseTable,
    word_token: Option<Symbol>,
    token_conflict_map: &TokenConflictMap,
    coincident_token_index: &CoincidentTokenIndex,
) -> TokenSet {
    if word_token.is_none() {
        return TokenSet::new();
    }

    let word_token = word_token.unwrap();
    let mut cursor = NfaCursor::new(&lexical_grammar.nfa, Vec::new());

    // First find all of the candidate keyword tokens: tokens that start with
    // letters or underscore and can match the same string as a word token.
    let keyword_candidates = lexical_grammar
        .variables
        .iter()
        .enumerate()
        .filter_map(|(i, variable)| {
            cursor.reset(vec![variable.start_state]);
            if all_chars_are_alphabetical(&cursor)
                && token_conflict_map.does_match_same_string(i, word_token.index)
                && !token_conflict_map.does_match_different_string(i, word_token.index)
            {
                info!(
                    "Keywords - add candidate {}",
                    lexical_grammar.variables[i].name
                );
                Some(Symbol::terminal(i))
            } else {
                None
            }
        })
        .collect::<TokenSet>();

    // Exclude keyword candidates that shadow another keyword candidate.
    let keywords = keyword_candidates
        .iter()
        .filter(|token| {
            for other_token in keyword_candidates.iter() {
                if other_token != *token
                    && token_conflict_map.does_match_same_string(other_token.index, token.index)
                {
                    info!(
                        "Keywords - exclude {} because it matches the same string as {}",
                        lexical_grammar.variables[token.index].name,
                        lexical_grammar.variables[other_token.index].name
                    );
                    return false;
                }
            }
            true
        })
        .collect::<TokenSet>();

    // Exclude keyword candidates for which substituting the keyword capture
    // token would introduce new lexical conflicts with other tokens.
    let keywords = keywords
        .iter()
        .filter(|token| {
            for other_index in 0..lexical_grammar.variables.len() {
                if keyword_candidates.contains(&Symbol::terminal(other_index)) {
                    continue;
                }

                // If the word token was already valid in every state containing
                // this keyword candidate, then substituting the word token won't
                // introduce any new lexical conflicts.
                if coincident_token_index
                    .states_with(*token, Symbol::terminal(other_index))
                    .iter()
                    .all(|state_id| {
                        parse_table.states[*state_id]
                            .terminal_entries
                            .contains_key(&word_token)
                    })
                {
                    continue;
                }

                if !token_conflict_map.has_same_conflict_status(
                    token.index,
                    word_token.index,
                    other_index,
                ) {
                    info!(
                        "Keywords - exclude {} because of conflict with {}",
                        lexical_grammar.variables[token.index].name,
                        lexical_grammar.variables[other_index].name
                    );
                    return false;
                }
            }

            info!(
                "Keywords - include {}",
                lexical_grammar.variables[token.index].name,
            );
            true
        })
        .collect();

    keywords
}

fn mark_fragile_tokens(
    parse_table: &mut ParseTable,
    lexical_grammar: &LexicalGrammar,
    token_conflict_map: &TokenConflictMap,
) {
    let n = lexical_grammar.variables.len();
    let mut valid_tokens_mask = Vec::with_capacity(n);
    for state in &mut parse_table.states {
        valid_tokens_mask.clear();
        valid_tokens_mask.resize(n, false);
        for token in state.terminal_entries.keys() {
            if token.is_terminal() {
                valid_tokens_mask[token.index] = true;
            }
        }
        for (token, entry) in &mut state.terminal_entries {
            if token.is_terminal() {
                for (i, is_valid) in valid_tokens_mask.iter().enumerate() {
                    if *is_valid && token_conflict_map.does_overlap(i, token.index) {
                        entry.reusable = false;
                        break;
                    }
                }
            }
        }
    }
}

fn report_state_info<'a>(
    syntax_grammar: &SyntaxGrammar,
    lexical_grammar: &LexicalGrammar,
    parse_table: &ParseTable,
    parse_state_info: &[ParseStateInfo<'a>],
    report_symbol_name: &'a str,
) {
    let mut all_state_indices = BTreeSet::new();
    let mut symbols_with_state_indices = (0..syntax_grammar.variables.len())
        .map(|i| (Symbol::non_terminal(i), BTreeSet::new()))
        .collect::<Vec<_>>();

    for (i, state) in parse_table.states.iter().enumerate() {
        all_state_indices.insert(i);
        let item_set = &parse_state_info[state.id];
        for entry in &item_set.1.entries {
            if !entry.item.is_augmented() {
                symbols_with_state_indices[entry.item.variable_index as usize]
                    .1
                    .insert(i);
            }
        }
    }

    symbols_with_state_indices.sort_unstable_by_key(|(_, states)| -(states.len() as i32));

    let max_symbol_name_length = syntax_grammar
        .variables
        .iter()
        .map(|v| v.name.len())
        .max()
        .unwrap();
    for (symbol, states) in &symbols_with_state_indices {
        eprintln!(
            "{:width$}\t{}",
            syntax_grammar.variables[symbol.index].name,
            states.len(),
            width = max_symbol_name_length
        );
    }
    eprintln!();

    let state_indices = if report_symbol_name == "*" {
        Some(&all_state_indices)
    } else {
        symbols_with_state_indices
            .iter()
            .find_map(|(symbol, state_indices)| {
                if syntax_grammar.variables[symbol.index].name == report_symbol_name {
                    Some(state_indices)
                } else {
                    None
                }
            })
    };

    if let Some(state_indices) = state_indices {
        let mut state_indices = state_indices.iter().copied().collect::<Vec<_>>();
        state_indices.sort_unstable_by_key(|i| (parse_table.states[*i].core_id, *i));

        for state_index in state_indices {
            let id = parse_table.states[state_index].id;
            let (preceding_symbols, item_set) = &parse_state_info[id];
            eprintln!("state index: {state_index}");
            eprintln!("state id: {id}");
            eprint!("symbol sequence:");
            for symbol in preceding_symbols {
                let name = if symbol.is_terminal() {
                    &lexical_grammar.variables[symbol.index].name
                } else if symbol.is_external() {
                    &syntax_grammar.external_tokens[symbol.index].name
                } else {
                    &syntax_grammar.variables[symbol.index].name
                };
                eprint!(" {name}");
            }
            eprintln!(
                "\nitems:\n{}",
                item::ParseItemSetDisplay(item_set, syntax_grammar, lexical_grammar),
            );
        }
    }
}

fn all_chars_are_alphabetical(cursor: &NfaCursor) -> bool {
    cursor.transition_chars().all(|(chars, is_sep)| {
        if is_sep {
            true
        } else {
            chars.chars().all(|c| c.is_alphabetic() || c == '_')
        }
    })
}



================================================
FILE: crates/generate/src/dedup.rs
================================================
pub fn split_state_id_groups<S>(
    states: &[S],
    state_ids_by_group_id: &mut Vec<Vec<usize>>,
    group_ids_by_state_id: &mut [usize],
    start_group_id: usize,
    mut should_split: impl FnMut(&S, &S, &[usize]) -> bool,
) -> bool {
    let mut result = false;

    let mut group_id = start_group_id;
    while group_id < state_ids_by_group_id.len() {
        let state_ids = &state_ids_by_group_id[group_id];
        let mut split_state_ids = Vec::new();

        let mut i = 0;
        while i < state_ids.len() {
            let left_state_id = state_ids[i];
            if split_state_ids.contains(&left_state_id) {
                i += 1;
                continue;
            }

            let left_state = &states[left_state_id];

            // Identify all of the other states in the group that are incompatible with
            // this state.
            let mut j = i + 1;
            while j < state_ids.len() {
                let right_state_id = state_ids[j];
                if split_state_ids.contains(&right_state_id) {
                    j += 1;
                    continue;
                }
                let right_state = &states[right_state_id];

                if should_split(left_state, right_state, group_ids_by_state_id) {
                    split_state_ids.push(right_state_id);
                }

                j += 1;
            }

            i += 1;
        }

        // If any states were removed from the group, add them all as a new group.
        if !split_state_ids.is_empty() {
            result = true;
            state_ids_by_group_id[group_id].retain(|i| !split_state_ids.contains(i));

            let new_group_id = state_ids_by_group_id.len();
            for id in &split_state_ids {
                group_ids_by_state_id[*id] = new_group_id;
            }

            state_ids_by_group_id.push(split_state_ids);
        }

        group_id += 1;
    }

    result
}



================================================
FILE: crates/generate/src/dsl.js
================================================
function alias(rule, value) {
  const result = {
    type: "ALIAS",
    content: normalize(rule),
    named: false,
    value: null
  };

  switch (value.constructor) {
    case String:
      result.named = false;
      result.value = value;
      return result;
    case ReferenceError:
      result.named = true;
      result.value = value.symbol.name;
      return result;
    case Object:
    case GrammarSymbol:
      if (typeof value.type === 'string' && value.type === 'SYMBOL') {
        result.named = true;
        result.value = value.name;
        return result;
      }
  }

  throw new Error(`Invalid alias value ${value}`);
}

function blank() {
  return {
    type: "BLANK"
  };
}

function field(name, rule) {
  return {
    type: "FIELD",
    name,
    content: normalize(rule)
  }
}

function choice(...elements) {
  return {
    type: "CHOICE",
    members: elements.map(normalize)
  };
}

function optional(value) {
  checkArguments(arguments, arguments.length, optional, 'optional');
  return choice(value, blank());
}

function prec(number, rule) {
  checkPrecedence(number);
  checkArguments(
    arguments,
    arguments.length - 1,
    prec,
    'prec',
    ' and a precedence argument'
  );

  return {
    type: "PREC",
    value: number,
    content: normalize(rule)
  };
}

prec.left = function(number, rule) {
  if (rule == null) {
    rule = number;
    number = 0;
  }

  checkPrecedence(number);
  checkArguments(
    arguments,
    arguments.length - 1,
    prec.left,
    'prec.left',
    ' and an optional precedence argument'
  );

  return {
    type: "PREC_LEFT",
    value: number,
    content: normalize(rule)
  };
}

prec.right = function(number, rule) {
  if (rule == null) {
    rule = number;
    number = 0;
  }

  checkPrecedence(number);
  checkArguments(
    arguments,
    arguments.length - 1,
    prec.right,
    'prec.right',
    ' and an optional precedence argument'
  );

  return {
    type: "PREC_RIGHT",
    value: number,
    content: normalize(rule)
  };
}

prec.dynamic = function(number, rule) {
  checkPrecedence(number);
  checkArguments(
    arguments,
    arguments.length - 1,
    prec.dynamic,
    'prec.dynamic',
    ' and a precedence argument'
  );

  return {
    type: "PREC_DYNAMIC",
    value: number,
    content: normalize(rule)
  };
}

function repeat(rule) {
  checkArguments(arguments, arguments.length, repeat, 'repeat');
  return {
    type: "REPEAT",
    content: normalize(rule)
  };
}

function repeat1(rule) {
  checkArguments(arguments, arguments.length, repeat1, 'repeat1');
  return {
    type: "REPEAT1",
    content: normalize(rule)
  };
}

function seq(...elements) {
  return {
    type: "SEQ",
    members: elements.map(normalize)
  };
}

class GrammarSymbol {
  constructor(name) {
    this.type = "SYMBOL";
    this.name = name;
  }
}

function reserved(wordset, rule) {
  if (typeof wordset !== 'string') {
    throw new Error('Invalid reserved word set name: ' + wordset)
  }
  return {
    type: "RESERVED",
    content: normalize(rule),
    context_name: wordset,
  }
}

function sym(name) {
  return new GrammarSymbol(name);
}

function token(value) {
  checkArguments(arguments, arguments.length, token, 'token', '', 'literal');
  return {
    type: "TOKEN",
    content: normalize(value)
  };
}

token.immediate = function(value) {
  checkArguments(arguments, arguments.length, token.immediate, 'token.immediate', '', 'literal');
  return {
    type: "IMMEDIATE_TOKEN",
    content: normalize(value)
  };
}

function normalize(value) {
  if (typeof value == "undefined")
    throw new Error("Undefined symbol");

  switch (value.constructor) {
    case String:
      return {
        type: 'STRING',
        value
      };
    case RegExp:
      return value.flags ? {
        type: 'PATTERN',
        value: value.source,
        flags: value.flags
      } : {
        type: 'PATTERN',
        value: value.source
      };
    case RustRegex:
      return {
        type: 'PATTERN',
        value: value.value
      };
    case ReferenceError:
      throw value
    default:
      if (typeof value.type === 'string') {
        return value;
      } else {
        throw new TypeError(`Invalid rule: ${value}`);
      }
  }
}

function RuleBuilder(ruleMap) {
  return new Proxy({}, {
    get(_, propertyName) {
      const symbol = sym(propertyName);

      if (!ruleMap || Object.prototype.hasOwnProperty.call(ruleMap, propertyName)) {
        return symbol;
      } else {
        const error = new ReferenceError(`Undefined symbol '${propertyName}'`);
        error.symbol = symbol;
        return error;
      }
    }
  })
}

function grammar(baseGrammar, options) {
  let inherits = undefined;

  if (!options) {
    options = baseGrammar;
    baseGrammar = {
      name: null,
      rules: {},
      extras: [normalize(/\s/)],
      conflicts: [],
      externals: [],
      inline: [],
      supertypes: [],
      precedences: [],
      reserved: {},
    };
  } else {
    baseGrammar = baseGrammar.grammar;
    inherits = baseGrammar.name;
  }

  let externals = baseGrammar.externals;
  if (options.externals) {
    if (typeof options.externals !== "function") {
      throw new Error("Grammar's 'externals' property must be a function.");
    }

    const externalsRuleBuilder = RuleBuilder(null)
    const externalRules = options.externals.call(externalsRuleBuilder, externalsRuleBuilder, baseGrammar.externals);

    if (!Array.isArray(externalRules)) {
      throw new Error("Grammar's 'externals' property must return an array of rules.");
    }

    externals = externalRules.map(normalize);
  }

  const ruleMap = {};
  for (const key of Object.keys(options.rules)) {
    ruleMap[key] = true;
  }
  for (const key of Object.keys(baseGrammar.rules)) {
    ruleMap[key] = true;
  }
  for (const external of externals) {
    if (typeof external.name === 'string') {
      ruleMap[external.name] = true;
    }
  }

  const ruleBuilder = RuleBuilder(ruleMap);

  const name = options.name;
  if (typeof name !== "string") {
    throw new Error("Grammar's 'name' property must be a string.");
  }

  if (!/^[a-zA-Z_]\w*$/.test(name)) {
    throw new Error("Grammar's 'name' property must not start with a digit and cannot contain non-word characters.");
  }

  if (inherits && typeof inherits !== "string") {
    throw new Error("Base grammar's 'name' property must be a string.");
  }

  if (inherits && !/^[a-zA-Z_]\w*$/.test(name)) {
    throw new Error("Base grammar's 'name' property must not start with a digit and cannot contain non-word characters.");
  }

  const rules = Object.assign({}, baseGrammar.rules);
  if (options.rules) {
    if (typeof options.rules !== "object") {
      throw new Error("Grammar's 'rules' property must be an object.");
    }

    for (const ruleName of Object.keys(options.rules)) {
      const ruleFn = options.rules[ruleName];
      if (typeof ruleFn !== "function") {
        throw new Error(`Grammar rules must all be functions. '${ruleName}' rule is not.`);
      }
      const rule = ruleFn.call(ruleBuilder, ruleBuilder, baseGrammar.rules[ruleName]);
      if (rule === undefined) {
        throw new Error(`Rule '${ruleName}' returned undefined.`);
      }
      rules[ruleName] = normalize(rule);
    }
  }

  let reserved = baseGrammar.reserved;
  if (options.reserved) {
    if (typeof options.reserved !== "object") {
      throw new Error("Grammar's 'reserved' property must be an object.");
    }

    for (const reservedWordSetName of Object.keys(options.reserved)) {
      const reservedWordSetFn = options.reserved[reservedWordSetName]
      if (typeof reservedWordSetFn !== "function") {
        throw new Error(`Grammar reserved word sets must all be functions. '${reservedWordSetName}' is not.`);
      }

      const reservedTokens = reservedWordSetFn.call(ruleBuilder, ruleBuilder, baseGrammar.reserved[reservedWordSetName]);

      if (!Array.isArray(reservedTokens)) {
        throw new Error(`Grammar's reserved word set functions must all return arrays of rules. '${reservedWordSetName}' does not.`);
      }

      reserved[reservedWordSetName] = reservedTokens.map(normalize);
    }
  }

  let extras = baseGrammar.extras.slice();
  if (options.extras) {
    if (typeof options.extras !== "function") {
      throw new Error("Grammar's 'extras' property must be a function.");
    }

    extras = options.extras
      .call(ruleBuilder, ruleBuilder, baseGrammar.extras)

    if (!Array.isArray(extras)) {
      throw new Error("Grammar's 'extras' function must return an array.")
    }

    extras = extras.map(normalize);
  }

  let word = baseGrammar.word;
  if (options.word) {
    word = options.word.call(ruleBuilder, ruleBuilder).name;
    if (typeof word != 'string') {
      throw new Error("Grammar's 'word' property must be a named rule.");
    }

    if (word === 'ReferenceError') {
      throw new Error("Grammar's 'word' property must be a valid rule name.");
    }
  }

  let conflicts = baseGrammar.conflicts;
  if (options.conflicts) {
    if (typeof options.conflicts !== "function") {
      throw new Error("Grammar's 'conflicts' property must be a function.");
    }

    const baseConflictRules = baseGrammar.conflicts.map(conflict => conflict.map(sym));
    const conflictRules = options.conflicts.call(ruleBuilder, ruleBuilder, baseConflictRules);

    if (!Array.isArray(conflictRules)) {
      throw new Error("Grammar's conflicts must be an array of arrays of rules.");
    }

    conflicts = conflictRules.map(conflictSet => {
      if (!Array.isArray(conflictSet)) {
        throw new Error("Grammar's conflicts must be an array of arrays of rules.");
      }

      return conflictSet.map(symbol => normalize(symbol).name);
    });
  }

  let inline = baseGrammar.inline;
  if (options.inline) {
    if (typeof options.inline !== "function") {
      throw new Error("Grammar's 'inline' property must be a function.");
    }

    const baseInlineRules = baseGrammar.inline.map(sym);
    const inlineRules = options.inline.call(ruleBuilder, ruleBuilder, baseInlineRules);

    if (!Array.isArray(inlineRules)) {
      throw new Error("Grammar's inline must be an array of rules.");
    }

    inline = inlineRules.filter((symbol, index, self) => {
      if (self.findIndex(s => s.name === symbol.name) !== index) {
        console.log(`Warning: duplicate inline rule '${symbol.name}'`);
        return false;
      }
      if (symbol.name === 'ReferenceError') {
        console.log(`Warning: inline rule '${symbol.symbol.name}' is not defined.`);
        return false;
      }
      return true;
    }).map(symbol => symbol.name);
  }

  let supertypes = baseGrammar.supertypes;
  if (options.supertypes) {
    if (typeof options.supertypes !== "function") {
      throw new Error("Grammar's 'supertypes' property must be a function.");
    }

    const baseSupertypeRules = baseGrammar.supertypes.map(sym);
    const supertypeRules = options.supertypes.call(ruleBuilder, ruleBuilder, baseSupertypeRules);

    if (!Array.isArray(supertypeRules)) {
      throw new Error("Grammar's supertypes must be an array of rules.");
    }

    supertypes = supertypeRules.map(symbol => {
      if (symbol.name === 'ReferenceError') {
        throw new Error(`Supertype rule \`${symbol.symbol.name}\` is not defined.`);
      }
      return symbol.name;
    });
  }

  let precedences = baseGrammar.precedences;
  if (options.precedences) {
    if (typeof options.precedences !== "function") {
      throw new Error("Grammar's 'precedences' property must be a function");
    }
    precedences = options.precedences.call(ruleBuilder, ruleBuilder, baseGrammar.precedences);
    if (!Array.isArray(precedences)) {
      throw new Error("Grammar's precedences must be an array of arrays of rules.");
    }
    precedences = precedences.map(list => {
      if (!Array.isArray(list)) {
        throw new Error("Grammar's precedences must be an array of arrays of rules.");
      }
      return list.map(normalize);
    });
  }

  if (Object.keys(rules).length === 0) {
    throw new Error("Grammar must have at least one rule.");
  }

  return {
    grammar: {
      name,
      inherits,
      word,
      rules,
      extras,
      conflicts,
      precedences,
      externals,
      inline,
      supertypes,
      reserved,
    },
  };
}

class RustRegex {
  constructor(value) {
    this.value = value;
  }
}

function checkArguments(args, ruleCount, caller, callerName, suffix = '', argType = 'rule') {
  // Allow for .map() usage where additional arguments are index and the entire array.
  const isMapCall = ruleCount === 3 && typeof args[1] === 'number' && Array.isArray(args[2]);
  if (isMapCall) {
    ruleCount = typeof args[2] === 'number' ? 1 : args[2].length;
  }
  if (ruleCount > 1 && !isMapCall) {
    const error = new Error([
      `The \`${callerName}\` function only takes one ${argType} argument${suffix}.`,
      `You passed in multiple ${argType}s. Did you mean to call \`seq\`?\n`
    ].join('\n'));
    Error.captureStackTrace(error, caller);
    throw error
  }
}

function checkPrecedence(value) {
  if (value == null) {
    throw new Error('Missing precedence value');
  }
}

function getEnv(name) {
  if (globalThis.process) return process.env[name]; // Node/Bun
  if (globalThis.Deno) return Deno.env.get(name); // Deno
  throw Error("Unsupported JS runtime");
}

globalThis.alias = alias;
globalThis.blank = blank;
globalThis.choice = choice;
globalThis.optional = optional;
globalThis.prec = prec;
globalThis.repeat = repeat;
globalThis.repeat1 = repeat1;
globalThis.reserved = reserved;
globalThis.seq = seq;
globalThis.sym = sym;
globalThis.token = token;
globalThis.grammar = grammar;
globalThis.field = field;
globalThis.RustRegex = RustRegex;

const result = await import(getEnv("TREE_SITTER_GRAMMAR_PATH"));
const object = {
  "$schema": "https://tree-sitter.github.io/tree-sitter/assets/schemas/grammar.schema.json",
  ...(result.default?.grammar ?? result.grammar)
};
const output = JSON.stringify(object);

if (globalThis.process) { // Node/Bun
  process.stdout.write(output);
} else if (globalThis.Deno) { // Deno
  Deno.stdout.writeSync(new TextEncoder().encode(output));
} else {
  throw Error("Unsupported JS runtime");
}



================================================
FILE: crates/generate/src/generate.rs
================================================
use std::{
    collections::HashMap,
    env, fs,
    io::Write,
    path::{Path, PathBuf},
    process::{Command, Stdio},
    sync::LazyLock,
};

use anyhow::Result;
use node_types::VariableInfo;
use regex::{Regex, RegexBuilder};
use rules::{Alias, Symbol};
use semver::Version;
use serde::{Deserialize, Serialize};
use thiserror::Error;

mod build_tables;
mod dedup;
mod grammars;
mod nfa;
mod node_types;
pub mod parse_grammar;
mod prepare_grammar;
mod render;
mod rules;
mod tables;

use build_tables::build_tables;
pub use build_tables::ParseTableBuilderError;
use grammars::{InlinedProductionMap, InputGrammar, LexicalGrammar, SyntaxGrammar};
pub use node_types::{SuperTypeCycleError, VariableInfoError};
use parse_grammar::parse_grammar;
pub use parse_grammar::ParseGrammarError;
use prepare_grammar::prepare_grammar;
pub use prepare_grammar::PrepareGrammarError;
use render::render_c_code;
pub use render::{ABI_VERSION_MAX, ABI_VERSION_MIN};

static JSON_COMMENT_REGEX: LazyLock<Regex> = LazyLock::new(|| {
    RegexBuilder::new("^\\s*//.*")
        .multi_line(true)
        .build()
        .unwrap()
});

struct JSONStageOutput {
    node_types_json: String,
    syntax_grammar: SyntaxGrammar,
    lexical_grammar: LexicalGrammar,
    inlines: InlinedProductionMap,
    simple_aliases: HashMap<Symbol, Alias>,
    variable_info: Vec<VariableInfo>,
}

struct GeneratedParser {
    c_code: String,
    node_types_json: String,
}

pub const ALLOC_HEADER: &str = include_str!("templates/alloc.h");
pub const ARRAY_HEADER: &str = include_str!("templates/array.h");

pub type GenerateResult<T> = Result<T, GenerateError>;

#[derive(Debug, Error, Serialize)]
pub enum GenerateError {
    #[error("Error with specified path -- {0}")]
    GrammarPath(String),
    #[error("{0}")]
    IO(String),
    #[error(transparent)]
    LoadGrammarFile(#[from] LoadGrammarError),
    #[error(transparent)]
    ParseGrammar(#[from] ParseGrammarError),
    #[error(transparent)]
    Prepare(#[from] PrepareGrammarError),
    #[error(transparent)]
    VariableInfo(#[from] VariableInfoError),
    #[error(transparent)]
    BuildTables(#[from] ParseTableBuilderError),
    #[error(transparent)]
    ParseVersion(#[from] ParseVersionError),
    #[error(transparent)]
    SuperTypeCycle(#[from] SuperTypeCycleError),
}

impl From<std::io::Error> for GenerateError {
    fn from(value: std::io::Error) -> Self {
        Self::IO(value.to_string())
    }
}

pub type LoadGrammarFileResult<T> = Result<T, LoadGrammarError>;

#[derive(Debug, Error, Serialize)]
pub enum LoadGrammarError {
    #[error("Path to a grammar file with `.js` or `.json` extension is required")]
    InvalidPath,
    #[error("Failed to load grammar.js -- {0}")]
    LoadJSGrammarFile(#[from] JSError),
    #[error("Failed to load grammar.json -- {0}")]
    IO(String),
    #[error("Unknown grammar file extension: {0:?}")]
    FileExtension(PathBuf),
}

impl From<std::io::Error> for LoadGrammarError {
    fn from(value: std::io::Error) -> Self {
        Self::IO(value.to_string())
    }
}

#[derive(Debug, Error, Serialize)]
pub enum ParseVersionError {
    #[error("{0}")]
    Version(String),
    #[error("{0}")]
    JSON(String),
    #[error("{0}")]
    IO(String),
}

pub type JSResult<T> = Result<T, JSError>;

#[derive(Debug, Error, Serialize)]
pub enum JSError {
    #[error("Failed to run `{runtime}` -- {error}")]
    JSRuntimeSpawn { runtime: String, error: String },
    #[error("Got invalid UTF8 from `{runtime}` -- {error}")]
    JSRuntimeUtf8 { runtime: String, error: String },
    #[error("`{runtime}` process exited with status {code}")]
    JSRuntimeExit { runtime: String, code: i32 },
    #[error("{0}")]
    IO(String),
    #[error("Could not parse this package's version as semver -- {0}")]
    Semver(String),
    #[error("Failed to serialze grammar JSON -- {0}")]
    Serialzation(String),
}

impl From<std::io::Error> for JSError {
    fn from(value: std::io::Error) -> Self {
        Self::IO(value.to_string())
    }
}

impl From<serde_json::Error> for JSError {
    fn from(value: serde_json::Error) -> Self {
        Self::Serialzation(value.to_string())
    }
}

impl From<semver::Error> for JSError {
    fn from(value: semver::Error) -> Self {
        Self::Semver(value.to_string())
    }
}

pub fn generate_parser_in_directory<T, U, V>(
    repo_path: T,
    out_path: Option<U>,
    grammar_path: Option<V>,
    mut abi_version: usize,
    report_symbol_name: Option<&str>,
    js_runtime: Option<&str>,
    generate_parser: bool,
) -> GenerateResult<()>
where
    T: Into<PathBuf>,
    U: Into<PathBuf>,
    V: Into<PathBuf>,
{
    let mut repo_path: PathBuf = repo_path.into();

    // Populate a new empty grammar directory.
    let grammar_path = if let Some(path) = grammar_path {
        let path_buf: PathBuf = path.into();
        if !path_buf
            .try_exists()
            .map_err(|e| GenerateError::GrammarPath(e.to_string()))?
        {
            fs::create_dir_all(&path_buf)?;
            repo_path = path_buf;
            repo_path.join("grammar.js")
        } else {
            path_buf
        }
    } else {
        repo_path.join("grammar.js")
    };

    // Read the grammar file.
    let grammar_json = load_grammar_file(&grammar_path, js_runtime)?;

    let src_path = out_path.map_or_else(|| repo_path.join("src"), |p| p.into());
    let header_path = src_path.join("tree_sitter");

    // Ensure that the output directory exists
    fs::create_dir_all(&src_path)?;

    if grammar_path.file_name().unwrap() != "grammar.json" {
        fs::write(src_path.join("grammar.json"), &grammar_json).map_err(|e| {
            GenerateError::IO(format!(
                "Failed to write grammar.json to {} -- {e}",
                src_path.display()
            ))
        })?;
    }

    // If our job is only to generate `grammar.json` and not `parser.c`, stop here.
    let input_grammar = parse_grammar(&grammar_json)?;

    if !generate_parser {
        let node_types_json = generate_node_types_from_grammar(&input_grammar)?.node_types_json;
        write_file(&src_path.join("node-types.json"), node_types_json)?;
        return Ok(());
    }

    let semantic_version = read_grammar_version(&repo_path)?;

    if semantic_version.is_none() && abi_version > ABI_VERSION_MIN {
        println!("Warning: No `tree-sitter.json` file found in your grammar, this file is required to generate with ABI {abi_version}. Using ABI version {ABI_VERSION_MIN} instead.");
        println!("This file can be set up with `tree-sitter init`. For more information, see https://tree-sitter.github.io/tree-sitter/cli/init.");
        abi_version = ABI_VERSION_MIN;
    }

    // Generate the parser and related files.
    let GeneratedParser {
        c_code,
        node_types_json,
    } = generate_parser_for_grammar_with_opts(
        &input_grammar,
        abi_version,
        semantic_version.map(|v| (v.major as u8, v.minor as u8, v.patch as u8)),
        report_symbol_name,
    )?;

    write_file(&src_path.join("parser.c"), c_code)?;
    write_file(&src_path.join("node-types.json"), node_types_json)?;
    fs::create_dir_all(&header_path)?;
    write_file(&header_path.join("alloc.h"), ALLOC_HEADER)?;
    write_file(&header_path.join("array.h"), ARRAY_HEADER)?;
    write_file(&header_path.join("parser.h"), tree_sitter::PARSER_HEADER)?;

    Ok(())
}

pub fn generate_parser_for_grammar(
    grammar_json: &str,
    semantic_version: Option<(u8, u8, u8)>,
) -> GenerateResult<(String, String)> {
    let grammar_json = JSON_COMMENT_REGEX.replace_all(grammar_json, "\n");
    let input_grammar = parse_grammar(&grammar_json)?;
    let parser = generate_parser_for_grammar_with_opts(
        &input_grammar,
        tree_sitter::LANGUAGE_VERSION,
        semantic_version,
        None,
    )?;
    Ok((input_grammar.name, parser.c_code))
}

fn generate_node_types_from_grammar(
    input_grammar: &InputGrammar,
) -> GenerateResult<JSONStageOutput> {
    let (syntax_grammar, lexical_grammar, inlines, simple_aliases) =
        prepare_grammar(input_grammar)?;
    let variable_info =
        node_types::get_variable_info(&syntax_grammar, &lexical_grammar, &simple_aliases)?;
    let node_types_json = node_types::generate_node_types_json(
        &syntax_grammar,
        &lexical_grammar,
        &simple_aliases,
        &variable_info,
    )?;
    Ok(JSONStageOutput {
        node_types_json: serde_json::to_string_pretty(&node_types_json).unwrap(),
        syntax_grammar,
        lexical_grammar,
        inlines,
        simple_aliases,
        variable_info,
    })
}

fn generate_parser_for_grammar_with_opts(
    input_grammar: &InputGrammar,
    abi_version: usize,
    semantic_version: Option<(u8, u8, u8)>,
    report_symbol_name: Option<&str>,
) -> GenerateResult<GeneratedParser> {
    let JSONStageOutput {
        syntax_grammar,
        lexical_grammar,
        inlines,
        simple_aliases,
        variable_info,
        node_types_json,
    } = generate_node_types_from_grammar(input_grammar)?;
    let supertype_symbol_map =
        node_types::get_supertype_symbol_map(&syntax_grammar, &simple_aliases, &variable_info);
    let tables = build_tables(
        &syntax_grammar,
        &lexical_grammar,
        &simple_aliases,
        &variable_info,
        &inlines,
        report_symbol_name,
    )?;
    let c_code = render_c_code(
        &input_grammar.name,
        tables,
        syntax_grammar,
        lexical_grammar,
        simple_aliases,
        abi_version,
        semantic_version,
        supertype_symbol_map,
    );
    Ok(GeneratedParser {
        c_code,
        node_types_json,
    })
}

/// This will read the `tree-sitter.json` config file and attempt to extract the version.
///
/// If the file is not found in the current directory or any of its parent directories, this will
/// return `None` to maintain backwards compatibility. If the file is found but the version cannot
/// be parsed as semver, this will return an error.
fn read_grammar_version(repo_path: &Path) -> Result<Option<Version>, ParseVersionError> {
    #[derive(Deserialize)]
    struct TreeSitterJson {
        metadata: Metadata,
    }

    #[derive(Deserialize)]
    struct Metadata {
        version: String,
    }

    let filename = "tree-sitter.json";
    let mut path = repo_path.join(filename);

    loop {
        let json = path
            .exists()
            .then(|| {
                let contents = fs::read_to_string(path.as_path()).map_err(|e| {
                    ParseVersionError::IO(format!("Failed to read `{}` -- {e}", path.display()))
                })?;
                serde_json::from_str::<TreeSitterJson>(&contents).map_err(|e| {
                    ParseVersionError::JSON(format!("Failed to parse `{}` -- {e}", path.display()))
                })
            })
            .transpose()?;
        if let Some(json) = json {
            return Version::parse(&json.metadata.version)
                .map_err(|e| {
                    ParseVersionError::Version(format!(
                        "Failed to parse `{}` version as semver -- {e}",
                        path.display()
                    ))
                })
                .map(Some);
        }
        path.pop(); // filename
        if !path.pop() {
            return Ok(None);
        }
        path.push(filename);
    }
}

pub fn load_grammar_file(
    grammar_path: &Path,
    js_runtime: Option<&str>,
) -> LoadGrammarFileResult<String> {
    if grammar_path.is_dir() {
        Err(LoadGrammarError::InvalidPath)?;
    }
    match grammar_path.extension().and_then(|e| e.to_str()) {
        Some("js") => Ok(load_js_grammar_file(grammar_path, js_runtime)?),
        Some("json") => Ok(fs::read_to_string(grammar_path)?),
        _ => Err(LoadGrammarError::FileExtension(grammar_path.to_owned()))?,
    }
}

fn load_js_grammar_file(grammar_path: &Path, js_runtime: Option<&str>) -> JSResult<String> {
    let grammar_path = fs::canonicalize(grammar_path)?;

    #[cfg(windows)]
    let grammar_path = url::Url::from_file_path(grammar_path)
        .expect("Failed to convert path to URL")
        .to_string();

    let js_runtime = js_runtime.unwrap_or("node");

    let mut js_command = Command::new(js_runtime);
    match js_runtime {
        "node" => {
            js_command.args(["--input-type=module", "-"]);
        }
        "bun" => {
            js_command.arg("-");
        }
        "deno" => {
            js_command.args(["run", "--allow-all", "-"]);
        }
        _ => {}
    }

    let mut js_process = js_command
        .env("TREE_SITTER_GRAMMAR_PATH", grammar_path)
        .stdin(Stdio::piped())
        .stdout(Stdio::piped())
        .spawn()
        .map_err(|e| JSError::JSRuntimeSpawn {
            runtime: js_runtime.to_string(),
            error: e.to_string(),
        })?;

    let mut js_stdin = js_process
        .stdin
        .take()
        .ok_or_else(|| JSError::IO(format!("Failed to open stdin for `{js_runtime}`")))?;

    let cli_version = Version::parse(env!("CARGO_PKG_VERSION"))?;
    write!(
        js_stdin,
        "globalThis.TREE_SITTER_CLI_VERSION_MAJOR = {};
         globalThis.TREE_SITTER_CLI_VERSION_MINOR = {};
         globalThis.TREE_SITTER_CLI_VERSION_PATCH = {};",
        cli_version.major, cli_version.minor, cli_version.patch,
    )
    .map_err(|e| {
        JSError::IO(format!(
            "Failed to write tree-sitter version to `{js_runtime}`'s stdin -- {e}"
        ))
    })?;
    js_stdin.write(include_bytes!("./dsl.js")).map_err(|e| {
        JSError::IO(format!(
            "Failed to write grammar dsl to `{js_runtime}`'s stdin -- {e}"
        ))
    })?;
    drop(js_stdin);

    let output = js_process
        .wait_with_output()
        .map_err(|e| JSError::IO(format!("Failed to read output from `{js_runtime}` -- {e}")))?;
    match output.status.code() {
        None => panic!("`{js_runtime}` process was killed"),
        Some(0) => {
            let stdout = String::from_utf8(output.stdout).map_err(|e| JSError::JSRuntimeUtf8 {
                runtime: js_runtime.to_string(),
                error: e.to_string(),
            })?;

            let mut grammar_json = &stdout[..];

            if let Some(pos) = stdout.rfind('\n') {
                // If there's a newline, split the last line from the rest of the output
                let node_output = &stdout[..pos];
                grammar_json = &stdout[pos + 1..];

                let mut stdout = std::io::stdout().lock();
                stdout.write_all(node_output.as_bytes())?;
                stdout.write_all(b"\n")?;
                stdout.flush()?;
            }

            Ok(serde_json::to_string_pretty(&serde_json::from_str::<
                serde_json::Value,
            >(grammar_json)?)?)
        }
        Some(code) => Err(JSError::JSRuntimeExit {
            runtime: js_runtime.to_string(),
            code,
        }),
    }
}

pub fn write_file(path: &Path, body: impl AsRef<[u8]>) -> GenerateResult<()> {
    fs::write(path, body)
        .map_err(|e| GenerateError::IO(format!("Failed to write {:?} -- {e}", path.file_name())))
}



================================================
FILE: crates/generate/src/grammars.rs
================================================
use std::{collections::HashMap, fmt};

use super::{
    nfa::Nfa,
    rules::{Alias, Associativity, Precedence, Rule, Symbol, TokenSet},
};

#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum VariableType {
    Hidden,
    Auxiliary,
    Anonymous,
    Named,
}

// Input grammar

#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Variable {
    pub name: String,
    pub kind: VariableType,
    pub rule: Rule,
}

#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum PrecedenceEntry {
    Name(String),
    Symbol(String),
}

#[derive(Debug, Default, PartialEq, Eq)]
pub struct InputGrammar {
    pub name: String,
    pub variables: Vec<Variable>,
    pub extra_symbols: Vec<Rule>,
    pub expected_conflicts: Vec<Vec<String>>,
    pub precedence_orderings: Vec<Vec<PrecedenceEntry>>,
    pub external_tokens: Vec<Rule>,
    pub variables_to_inline: Vec<String>,
    pub supertype_symbols: Vec<String>,
    pub word_token: Option<String>,
    pub reserved_words: Vec<ReservedWordContext<Rule>>,
}

#[derive(Debug, Default, PartialEq, Eq)]
pub struct ReservedWordContext<T> {
    pub name: String,
    pub reserved_words: Vec<T>,
}

// Extracted lexical grammar

#[derive(Debug, PartialEq, Eq)]
pub struct LexicalVariable {
    pub name: String,
    pub kind: VariableType,
    pub implicit_precedence: i32,
    pub start_state: u32,
}

#[derive(Debug, Default, PartialEq, Eq)]
pub struct LexicalGrammar {
    pub nfa: Nfa,
    pub variables: Vec<LexicalVariable>,
}

// Extracted syntax grammar

#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct ProductionStep {
    pub symbol: Symbol,
    pub precedence: Precedence,
    pub associativity: Option<Associativity>,
    pub alias: Option<Alias>,
    pub field_name: Option<String>,
    pub reserved_word_set_id: ReservedWordSetId,
}

#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct ReservedWordSetId(pub usize);

impl fmt::Display for ReservedWordSetId {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        self.0.fmt(f)
    }
}

pub const NO_RESERVED_WORDS: ReservedWordSetId = ReservedWordSetId(usize::MAX);

#[derive(Clone, Debug, Default, PartialEq, Eq)]
pub struct Production {
    pub steps: Vec<ProductionStep>,
    pub dynamic_precedence: i32,
}

#[derive(Default)]
pub struct InlinedProductionMap {
    pub productions: Vec<Production>,
    pub production_map: HashMap<(*const Production, u32), Vec<usize>>,
}

#[derive(Clone, Debug, PartialEq, Eq)]
pub struct SyntaxVariable {
    pub name: String,
    pub kind: VariableType,
    pub productions: Vec<Production>,
}

#[derive(Clone, Debug, PartialEq, Eq)]
pub struct ExternalToken {
    pub name: String,
    pub kind: VariableType,
    pub corresponding_internal_token: Option<Symbol>,
}

#[derive(Debug, Default)]
pub struct SyntaxGrammar {
    pub variables: Vec<SyntaxVariable>,
    pub extra_symbols: Vec<Symbol>,
    pub expected_conflicts: Vec<Vec<Symbol>>,
    pub external_tokens: Vec<ExternalToken>,
    pub supertype_symbols: Vec<Symbol>,
    pub variables_to_inline: Vec<Symbol>,
    pub word_token: Option<Symbol>,
    pub precedence_orderings: Vec<Vec<PrecedenceEntry>>,
    pub reserved_word_sets: Vec<TokenSet>,
}

#[cfg(test)]
impl ProductionStep {
    #[must_use]
    pub fn new(symbol: Symbol) -> Self {
        Self {
            symbol,
            precedence: Precedence::None,
            associativity: None,
            alias: None,
            field_name: None,
            reserved_word_set_id: ReservedWordSetId::default(),
        }
    }

    pub fn with_prec(
        mut self,
        precedence: Precedence,
        associativity: Option<Associativity>,
    ) -> Self {
        self.precedence = precedence;
        self.associativity = associativity;
        self
    }

    pub fn with_alias(mut self, value: &str, is_named: bool) -> Self {
        self.alias = Some(Alias {
            value: value.to_string(),
            is_named,
        });
        self
    }

    pub fn with_field_name(mut self, name: &str) -> Self {
        self.field_name = Some(name.to_string());
        self
    }
}

impl Production {
    pub fn first_symbol(&self) -> Option<Symbol> {
        self.steps.first().map(|s| s.symbol)
    }
}

#[cfg(test)]
impl Variable {
    pub fn named(name: &str, rule: Rule) -> Self {
        Self {
            name: name.to_string(),
            kind: VariableType::Named,
            rule,
        }
    }

    pub fn auxiliary(name: &str, rule: Rule) -> Self {
        Self {
            name: name.to_string(),
            kind: VariableType::Auxiliary,
            rule,
        }
    }

    pub fn hidden(name: &str, rule: Rule) -> Self {
        Self {
            name: name.to_string(),
            kind: VariableType::Hidden,
            rule,
        }
    }

    pub fn anonymous(name: &str, rule: Rule) -> Self {
        Self {
            name: name.to_string(),
            kind: VariableType::Anonymous,
            rule,
        }
    }
}

impl VariableType {
    pub fn is_visible(self) -> bool {
        self == Self::Named || self == Self::Anonymous
    }
}

impl LexicalGrammar {
    pub fn variable_indices_for_nfa_states<'a>(
        &'a self,
        state_ids: &'a [u32],
    ) -> impl Iterator<Item = usize> + 'a {
        let mut prev = None;
        state_ids.iter().filter_map(move |state_id| {
            let variable_id = self.variable_index_for_nfa_state(*state_id);
            if prev == Some(variable_id) {
                None
            } else {
                prev = Some(variable_id);
                prev
            }
        })
    }

    pub fn variable_index_for_nfa_state(&self, state_id: u32) -> usize {
        self.variables
            .iter()
            .position(|v| v.start_state >= state_id)
            .unwrap()
    }
}

impl SyntaxVariable {
    pub fn is_auxiliary(&self) -> bool {
        self.kind == VariableType::Auxiliary
    }

    pub fn is_hidden(&self) -> bool {
        self.kind == VariableType::Hidden || self.kind == VariableType::Auxiliary
    }
}

impl InlinedProductionMap {
    pub fn inlined_productions<'a>(
        &'a self,
        production: &Production,
        step_index: u32,
    ) -> Option<impl Iterator<Item = &'a Production> + 'a> {
        self.production_map
            .get(&(std::ptr::from_ref::<Production>(production), step_index))
            .map(|production_indices| {
                production_indices
                    .iter()
                    .copied()
                    .map(move |index| &self.productions[index])
            })
    }
}

impl fmt::Display for PrecedenceEntry {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        match self {
            Self::Name(n) => write!(f, "'{n}'"),
            Self::Symbol(s) => write!(f, "$.{s}"),
        }
    }
}



================================================
FILE: crates/generate/src/nfa.rs
================================================
use std::{
    char,
    cmp::{max, Ordering},
    fmt,
    iter::ExactSizeIterator,
    mem::{self, swap},
    ops::{Range, RangeInclusive},
};

/// A set of characters represented as a vector of ranges.
#[derive(Clone, Default, PartialEq, Eq, Hash)]
pub struct CharacterSet {
    ranges: Vec<Range<u32>>,
}

/// A state in an NFA representing a regular grammar.
#[derive(Debug, PartialEq, Eq)]
pub enum NfaState {
    Advance {
        chars: CharacterSet,
        state_id: u32,
        is_sep: bool,
        precedence: i32,
    },
    Split(u32, u32),
    Accept {
        variable_index: usize,
        precedence: i32,
    },
}

#[derive(PartialEq, Eq, Default)]
pub struct Nfa {
    pub states: Vec<NfaState>,
}

#[derive(Debug)]
pub struct NfaCursor<'a> {
    pub(crate) state_ids: Vec<u32>,
    nfa: &'a Nfa,
}

#[derive(Debug, PartialEq, Eq)]
pub struct NfaTransition {
    pub characters: CharacterSet,
    pub is_separator: bool,
    pub precedence: i32,
    pub states: Vec<u32>,
}

const END: u32 = char::MAX as u32 + 1;

impl CharacterSet {
    /// Create a character set with a single character.
    pub const fn empty() -> Self {
        Self { ranges: Vec::new() }
    }

    /// Create a character set with a given *inclusive* range of characters.
    #[allow(clippy::single_range_in_vec_init)]
    #[cfg(test)]
    fn from_range(mut first: char, mut last: char) -> Self {
        if first > last {
            swap(&mut first, &mut last);
        }
        Self {
            ranges: vec![(first as u32)..(last as u32 + 1)],
        }
    }

    /// Create a character set with a single character.
    #[allow(clippy::single_range_in_vec_init)]
    pub fn from_char(c: char) -> Self {
        Self {
            ranges: vec![(c as u32)..(c as u32 + 1)],
        }
    }

    /// Create a character set containing all characters *not* present
    /// in this character set.
    pub fn negate(mut self) -> Self {
        let mut i = 0;
        let mut previous_end = 0;
        while i < self.ranges.len() {
            let range = &mut self.ranges[i];
            let start = previous_end;
            previous_end = range.end;
            if start < range.start {
                self.ranges[i] = start..range.start;
                i += 1;
            } else {
                self.ranges.remove(i);
            }
        }
        if previous_end < END {
            self.ranges.push(previous_end..END);
        }
        self
    }

    pub fn add_char(mut self, c: char) -> Self {
        self.add_int_range(0, c as u32, c as u32 + 1);
        self
    }

    pub fn add_range(mut self, start: char, end: char) -> Self {
        self.add_int_range(0, start as u32, end as u32 + 1);
        self
    }

    pub fn add(mut self, other: &Self) -> Self {
        let mut index = 0;
        for range in &other.ranges {
            index = self.add_int_range(index, range.start, range.end);
        }
        self
    }

    pub fn assign(&mut self, other: &Self) {
        self.ranges.clear();
        self.ranges.extend_from_slice(&other.ranges);
    }

    fn add_int_range(&mut self, mut i: usize, start: u32, end: u32) -> usize {
        while i < self.ranges.len() {
            let range = &mut self.ranges[i];
            if range.start > end {
                self.ranges.insert(i, start..end);
                return i;
            }
            if range.end >= start {
                range.end = range.end.max(end);
                range.start = range.start.min(start);

                // Join this range with the next range if needed.
                while i + 1 < self.ranges.len() && self.ranges[i + 1].start <= self.ranges[i].end {
                    self.ranges[i].end = self.ranges[i].end.max(self.ranges[i + 1].end);
                    self.ranges.remove(i + 1);
                }

                return i;
            }
            i += 1;
        }
        self.ranges.push(start..end);
        i
    }

    pub fn does_intersect(&self, other: &Self) -> bool {
        let mut left_ranges = self.ranges.iter();
        let mut right_ranges = other.ranges.iter();
        let mut left_range = left_ranges.next();
        let mut right_range = right_ranges.next();
        while let (Some(left), Some(right)) = (&left_range, &right_range) {
            if left.end <= right.start {
                left_range = left_ranges.next();
            } else if left.start >= right.end {
                right_range = right_ranges.next();
            } else {
                return true;
            }
        }
        false
    }

    /// Get the set of characters that are present in both this set
    /// and the other set. Remove those common characters from both
    /// of the operands.
    pub fn remove_intersection(&mut self, other: &mut Self) -> Self {
        let mut intersection = Vec::new();
        let mut left_i = 0;
        let mut right_i = 0;
        while left_i < self.ranges.len() && right_i < other.ranges.len() {
            let left = &mut self.ranges[left_i];
            let right = &mut other.ranges[right_i];

            match left.start.cmp(&right.start) {
                Ordering::Less => {
                    // [ L ]
                    //     [ R ]
                    if left.end <= right.start {
                        left_i += 1;
                        continue;
                    }

                    match left.end.cmp(&right.end) {
                        // [ L ]
                        //   [ R ]
                        Ordering::Less => {
                            intersection.push(right.start..left.end);
                            swap(&mut left.end, &mut right.start);
                            left_i += 1;
                        }

                        // [  L  ]
                        //   [ R ]
                        Ordering::Equal => {
                            intersection.push(right.clone());
                            left.end = right.start;
                            other.ranges.remove(right_i);
                        }

                        // [   L   ]
                        //   [ R ]
                        Ordering::Greater => {
                            intersection.push(right.clone());
                            let new_range = left.start..right.start;
                            left.start = right.end;
                            self.ranges.insert(left_i, new_range);
                            other.ranges.remove(right_i);
                            left_i += 1;
                        }
                    }
                }
                // [ L ]
                // [  R  ]
                Ordering::Equal if left.end < right.end => {
                    intersection.push(left.start..left.end);
                    right.start = left.end;
                    self.ranges.remove(left_i);
                }
                // [ L ]
                // [ R ]
                Ordering::Equal if left.end == right.end => {
                    intersection.push(left.clone());
                    self.ranges.remove(left_i);
                    other.ranges.remove(right_i);
                }
                // [  L  ]
                // [ R ]
                Ordering::Equal if left.end > right.end => {
                    intersection.push(right.clone());
                    left.start = right.end;
                    other.ranges.remove(right_i);
                }
                Ordering::Equal => {}
                Ordering::Greater => {
                    //     [ L ]
                    // [ R ]
                    if left.start >= right.end {
                        right_i += 1;
                        continue;
                    }

                    match left.end.cmp(&right.end) {
                        //   [ L ]
                        // [   R   ]
                        Ordering::Less => {
                            intersection.push(left.clone());
                            let new_range = right.start..left.start;
                            right.start = left.end;
                            other.ranges.insert(right_i, new_range);
                            self.ranges.remove(left_i);
                            right_i += 1;
                        }

                        //   [ L ]
                        // [  R  ]
                        Ordering::Equal => {
                            intersection.push(left.clone());
                            right.end = left.start;
                            self.ranges.remove(left_i);
                        }

                        //   [   L   ]
                        // [   R   ]
                        Ordering::Greater => {
                            intersection.push(left.start..right.end);
                            swap(&mut left.start, &mut right.end);
                            right_i += 1;
                        }
                    }
                }
            }
        }
        Self {
            ranges: intersection,
        }
    }

    /// Produces a `CharacterSet` containing every character in `self` that is not present in
    /// `other`.
    pub fn difference(mut self, mut other: Self) -> Self {
        self.remove_intersection(&mut other);
        self
    }

    /// Produces a `CharacterSet` containing every character that is in _exactly one_ of `self` or
    /// `other`, but is not present in both sets.
    #[cfg(test)]
    fn symmetric_difference(mut self, mut other: Self) -> Self {
        self.remove_intersection(&mut other);
        self.add(&other)
    }

    pub fn char_codes(&self) -> impl Iterator<Item = u32> + '_ {
        self.ranges.iter().flat_map(Clone::clone)
    }

    pub fn chars(&self) -> impl Iterator<Item = char> + '_ {
        self.char_codes().filter_map(char::from_u32)
    }

    pub fn range_count(&self) -> usize {
        self.ranges.len()
    }

    pub fn ranges(&self) -> impl Iterator<Item = RangeInclusive<char>> + '_ {
        self.ranges.iter().filter_map(|range| {
            let start = range.clone().find_map(char::from_u32)?;
            let end = (range.start..range.end).rev().find_map(char::from_u32)?;
            Some(start..=end)
        })
    }

    pub fn is_empty(&self) -> bool {
        self.ranges.is_empty()
    }

    /// Get a reduced list of character ranges, assuming that a given
    /// set of characters can be safely ignored.
    pub fn simplify_ignoring(&self, ruled_out_characters: &Self) -> Self {
        let mut prev_range: Option<Range<u32>> = None;
        Self {
            ranges: self
                .ranges
                .iter()
                .map(|range| Some(range.clone()))
                .chain([None])
                .filter_map(move |range| {
                    if let Some(range) = &range {
                        if ruled_out_characters.contains_codepoint_range(range.clone()) {
                            return None;
                        }

                        if let Some(prev_range) = &mut prev_range {
                            if ruled_out_characters
                                .contains_codepoint_range(prev_range.end..range.start)
                            {
                                prev_range.end = range.end;
                                return None;
                            }
                        }
                    }

                    let result = prev_range.clone();
                    prev_range = range;
                    result
                })
                .collect(),
        }
    }

    pub fn contains_codepoint_range(&self, seek_range: Range<u32>) -> bool {
        let ix = match self.ranges.binary_search_by(|probe| {
            if probe.end <= seek_range.start {
                Ordering::Less
            } else if probe.start > seek_range.start {
                Ordering::Greater
            } else {
                Ordering::Equal
            }
        }) {
            Ok(ix) | Err(ix) => ix,
        };
        self.ranges
            .get(ix)
            .is_some_and(|range| range.start <= seek_range.start && range.end >= seek_range.end)
    }

    pub fn contains(&self, c: char) -> bool {
        self.contains_codepoint_range(c as u32..c as u32 + 1)
    }
}

impl Ord for CharacterSet {
    fn cmp(&self, other: &Self) -> Ordering {
        let count_cmp = self
            .ranges
            .iter()
            .map(ExactSizeIterator::len)
            .sum::<usize>()
            .cmp(&other.ranges.iter().map(ExactSizeIterator::len).sum());
        if count_cmp != Ordering::Equal {
            return count_cmp;
        }

        for (left_range, right_range) in self.ranges.iter().zip(other.ranges.iter()) {
            let cmp = left_range.len().cmp(&right_range.len());
            if cmp != Ordering::Equal {
                return cmp;
            }

            for (left, right) in left_range.clone().zip(right_range.clone()) {
                let cmp = left.cmp(&right);
                if cmp != Ordering::Equal {
                    return cmp;
                }
            }
        }
        Ordering::Equal
    }
}

impl PartialOrd for CharacterSet {
    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
        Some(self.cmp(other))
    }
}

impl fmt::Debug for CharacterSet {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        write!(f, "CharacterSet [")?;
        let mut set = self.clone();
        if self.contains(char::MAX) {
            write!(f, "^ ")?;
            set = set.negate();
        }
        for (i, range) in set.ranges().enumerate() {
            if i > 0 {
                write!(f, ", ")?;
            }
            write!(f, "{range:?}")?;
        }
        write!(f, "]")?;
        Ok(())
    }
}

impl Nfa {
    #[must_use]
    pub const fn new() -> Self {
        Self { states: Vec::new() }
    }

    pub fn last_state_id(&self) -> u32 {
        self.states.len() as u32 - 1
    }
}

impl fmt::Debug for Nfa {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        writeln!(f, "Nfa {{ states: {{")?;
        for (i, state) in self.states.iter().enumerate() {
            writeln!(f, "  {i}: {state:?},")?;
        }
        write!(f, "}} }}")?;
        Ok(())
    }
}

impl<'a> NfaCursor<'a> {
    pub fn new(nfa: &'a Nfa, mut states: Vec<u32>) -> Self {
        let mut result = Self {
            nfa,
            state_ids: Vec::new(),
        };
        result.add_states(&mut states);
        result
    }

    pub fn reset(&mut self, mut states: Vec<u32>) {
        self.state_ids.clear();
        self.add_states(&mut states);
    }

    pub fn force_reset(&mut self, states: Vec<u32>) {
        self.state_ids = states;
    }

    pub fn transition_chars(&self) -> impl Iterator<Item = (&CharacterSet, bool)> {
        self.raw_transitions().map(|t| (t.0, t.1))
    }

    pub fn transitions(&self) -> Vec<NfaTransition> {
        Self::group_transitions(self.raw_transitions())
    }

    fn raw_transitions(&self) -> impl Iterator<Item = (&CharacterSet, bool, i32, u32)> {
        self.state_ids.iter().filter_map(move |id| {
            if let NfaState::Advance {
                chars,
                state_id,
                precedence,
                is_sep,
            } = &self.nfa.states[*id as usize]
            {
                Some((chars, *is_sep, *precedence, *state_id))
            } else {
                None
            }
        })
    }

    fn group_transitions<'b>(
        iter: impl Iterator<Item = (&'b CharacterSet, bool, i32, u32)>,
    ) -> Vec<NfaTransition> {
        let mut result = Vec::<NfaTransition>::new();
        for (chars, is_sep, prec, state) in iter {
            let mut chars = chars.clone();
            let mut i = 0;
            while i < result.len() && !chars.is_empty() {
                let intersection = result[i].characters.remove_intersection(&mut chars);
                if !intersection.is_empty() {
                    let mut intersection_states = result[i].states.clone();
                    if let Err(j) = intersection_states.binary_search(&state) {
                        intersection_states.insert(j, state);
                    }
                    let intersection_transition = NfaTransition {
                        characters: intersection,
                        is_separator: result[i].is_separator && is_sep,
                        precedence: max(result[i].precedence, prec),
                        states: intersection_states,
                    };
                    if result[i].characters.is_empty() {
                        result[i] = intersection_transition;
                    } else {
                        result.insert(i, intersection_transition);
                        i += 1;
                    }
                }
                i += 1;
            }
            if !chars.is_empty() {
                result.push(NfaTransition {
                    characters: chars,
                    precedence: prec,
                    states: vec![state],
                    is_separator: is_sep,
                });
            }
        }

        let mut i = 0;
        while i < result.len() {
            for j in 0..i {
                if result[j].states == result[i].states
                    && result[j].is_separator == result[i].is_separator
                    && result[j].precedence == result[i].precedence
                {
                    let characters = mem::take(&mut result[j].characters);
                    result[j].characters = characters.add(&result[i].characters);
                    result.remove(i);
                    i -= 1;
                    break;
                }
            }
            i += 1;
        }

        result.sort_unstable_by(|a, b| a.characters.cmp(&b.characters));
        result
    }

    pub fn completions(&self) -> impl Iterator<Item = (usize, i32)> + '_ {
        self.state_ids.iter().filter_map(move |state_id| {
            if let NfaState::Accept {
                variable_index,
                precedence,
            } = self.nfa.states[*state_id as usize]
            {
                Some((variable_index, precedence))
            } else {
                None
            }
        })
    }

    pub fn add_states(&mut self, new_state_ids: &mut Vec<u32>) {
        let mut i = 0;
        while i < new_state_ids.len() {
            let state_id = new_state_ids[i];
            let state = &self.nfa.states[state_id as usize];
            if let NfaState::Split(left, right) = state {
                let mut has_left = false;
                let mut has_right = false;
                for new_state_id in new_state_ids.iter() {
                    if *new_state_id == *left {
                        has_left = true;
                    }
                    if *new_state_id == *right {
                        has_right = true;
                    }
                }
                if !has_left {
                    new_state_ids.push(*left);
                }
                if !has_right {
                    new_state_ids.push(*right);
                }
            } else if let Err(i) = self.state_ids.binary_search(&state_id) {
                self.state_ids.insert(i, state_id);
            }
            i += 1;
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_adding_ranges() {
        let mut set = CharacterSet::empty()
            .add_range('c', 'm')
            .add_range('q', 's');

        // within existing range
        set = set.add_char('d');
        assert_eq!(
            set,
            CharacterSet::empty()
                .add_range('c', 'm')
                .add_range('q', 's')
        );

        // at end of existing range
        set = set.add_char('m');
        assert_eq!(
            set,
            CharacterSet::empty()
                .add_range('c', 'm')
                .add_range('q', 's')
        );

        // adjacent to end of existing range
        set = set.add_char('n');
        assert_eq!(
            set,
            CharacterSet::empty()
                .add_range('c', 'n')
                .add_range('q', 's')
        );

        // filling gap between existing ranges
        set = set.add_range('o', 'p');
        assert_eq!(set, CharacterSet::empty().add_range('c', 's'));

        set = CharacterSet::empty()
            .add_range('c', 'f')
            .add_range('i', 'l')
            .add_range('n', 'r');
        set = set.add_range('d', 'o');
        assert_eq!(set, CharacterSet::empty().add_range('c', 'r'));
    }

    #[test]
    fn test_adding_sets() {
        let set1 = CharacterSet::empty()
            .add_range('c', 'f')
            .add_range('i', 'l');
        let set2 = CharacterSet::empty().add_range('b', 'g').add_char('h');
        assert_eq!(
            set1.add(&set2),
            CharacterSet::empty()
                .add_range('b', 'g')
                .add_range('h', 'l')
        );
    }

    #[test]
    fn test_group_transitions() {
        let table = [
            // overlapping character classes
            (
                vec![
                    (CharacterSet::empty().add_range('a', 'f'), false, 0, 1),
                    (CharacterSet::empty().add_range('d', 'i'), false, 1, 2),
                ],
                vec![
                    NfaTransition {
                        characters: CharacterSet::empty().add_range('a', 'c'),
                        is_separator: false,
                        precedence: 0,
                        states: vec![1],
                    },
                    NfaTransition {
                        characters: CharacterSet::empty().add_range('d', 'f'),
                        is_separator: false,
                        precedence: 1,
                        states: vec![1, 2],
                    },
                    NfaTransition {
                        characters: CharacterSet::empty().add_range('g', 'i'),
                        is_separator: false,
                        precedence: 1,
                        states: vec![2],
                    },
                ],
            ),
            // large character class followed by many individual characters
            (
                vec![
                    (CharacterSet::empty().add_range('a', 'z'), false, 0, 1),
                    (CharacterSet::empty().add_char('d'), false, 0, 2),
                    (CharacterSet::empty().add_char('i'), false, 0, 3),
                    (CharacterSet::empty().add_char('f'), false, 0, 4),
                ],
                vec![
                    NfaTransition {
                        characters: CharacterSet::empty().add_char('d'),
                        is_separator: false,
                        precedence: 0,
                        states: vec![1, 2],
                    },
                    NfaTransition {
                        characters: CharacterSet::empty().add_char('f'),
                        is_separator: false,
                        precedence: 0,
                        states: vec![1, 4],
                    },
                    NfaTransition {
                        characters: CharacterSet::empty().add_char('i'),
                        is_separator: false,
                        precedence: 0,
                        states: vec![1, 3],
                    },
                    NfaTransition {
                        characters: CharacterSet::empty()
                            .add_range('a', 'c')
                            .add_char('e')
                            .add_range('g', 'h')
                            .add_range('j', 'z'),
                        is_separator: false,
                        precedence: 0,
                        states: vec![1],
                    },
                ],
            ),
            // negated character class followed by an individual character
            (
                vec![
                    (CharacterSet::empty().add_char('0'), false, 0, 1),
                    (CharacterSet::empty().add_char('b'), false, 0, 2),
                    (
                        CharacterSet::empty().add_range('a', 'f').negate(),
                        false,
                        0,
                        3,
                    ),
                    (CharacterSet::empty().add_char('c'), false, 0, 4),
                ],
                vec![
                    NfaTransition {
                        characters: CharacterSet::empty().add_char('0'),
                        precedence: 0,
                        states: vec![1, 3],
                        is_separator: false,
                    },
                    NfaTransition {
                        characters: CharacterSet::empty().add_char('b'),
                        precedence: 0,
                        states: vec![2],
                        is_separator: false,
                    },
                    NfaTransition {
                        characters: CharacterSet::empty().add_char('c'),
                        precedence: 0,
                        states: vec![4],
                        is_separator: false,
                    },
                    NfaTransition {
                        characters: CharacterSet::empty()
                            .add_range('a', 'f')
                            .add_char('0')
                            .negate(),
                        precedence: 0,
                        states: vec![3],
                        is_separator: false,
                    },
                ],
            ),
            // multiple negated character classes
            (
                vec![
                    (CharacterSet::from_char('a'), false, 0, 1),
                    (CharacterSet::from_range('a', 'c').negate(), false, 0, 2),
                    (CharacterSet::from_char('g'), false, 0, 6),
                    (CharacterSet::from_range('d', 'f').negate(), false, 0, 3),
                    (CharacterSet::from_range('g', 'i').negate(), false, 0, 4),
                    (CharacterSet::from_char('g'), false, 0, 5),
                ],
                vec![
                    NfaTransition {
                        characters: CharacterSet::from_char('a'),
                        precedence: 0,
                        states: vec![1, 3, 4],
                        is_separator: false,
                    },
                    NfaTransition {
                        characters: CharacterSet::from_char('g'),
                        precedence: 0,
                        states: vec![2, 3, 5, 6],
                        is_separator: false,
                    },
                    NfaTransition {
                        characters: CharacterSet::from_range('b', 'c'),
                        precedence: 0,
                        states: vec![3, 4],
                        is_separator: false,
                    },
                    NfaTransition {
                        characters: CharacterSet::from_range('h', 'i'),
                        precedence: 0,
                        states: vec![2, 3],
                        is_separator: false,
                    },
                    NfaTransition {
                        characters: CharacterSet::from_range('d', 'f'),
                        precedence: 0,
                        states: vec![2, 4],
                        is_separator: false,
                    },
                    NfaTransition {
                        characters: CharacterSet::from_range('a', 'i').negate(),
                        precedence: 0,
                        states: vec![2, 3, 4],
                        is_separator: false,
                    },
                ],
            ),
            // disjoint characters with same state
            (
                vec![
                    (CharacterSet::from_char('a'), false, 0, 1),
                    (CharacterSet::from_char('b'), false, 0, 2),
                    (CharacterSet::from_char('c'), false, 0, 1),
                    (CharacterSet::from_char('d'), false, 0, 1),
                    (CharacterSet::from_char('e'), false, 0, 2),
                ],
                vec![
                    NfaTransition {
                        characters: CharacterSet::empty().add_char('b').add_char('e'),
                        precedence: 0,
                        states: vec![2],
                        is_separator: false,
                    },
                    NfaTransition {
                        characters: CharacterSet::empty().add_char('a').add_range('c', 'd'),
                        precedence: 0,
                        states: vec![1],
                        is_separator: false,
                    },
                ],
            ),
        ];

        for (i, row) in table.iter().enumerate() {
            assert_eq!(
                NfaCursor::group_transitions(
                    row.0
                        .iter()
                        .map(|(chars, is_sep, prec, state)| (chars, *is_sep, *prec, *state))
                ),
                row.1,
                "row {i}",
            );
        }
    }

    #[test]
    fn test_character_set_intersection_difference_ops() {
        struct Row {
            left: CharacterSet,
            right: CharacterSet,
            left_only: CharacterSet,
            right_only: CharacterSet,
            intersection: CharacterSet,
        }

        let rows = [
            // [ L ]
            //     [ R ]
            Row {
                left: CharacterSet::from_range('a', 'f'),
                right: CharacterSet::from_range('g', 'm'),
                left_only: CharacterSet::from_range('a', 'f'),
                right_only: CharacterSet::from_range('g', 'm'),
                intersection: CharacterSet::empty(),
            },
            // [ L ]
            //   [ R ]
            Row {
                left: CharacterSet::from_range('a', 'f'),
                right: CharacterSet::from_range('c', 'i'),
                left_only: CharacterSet::from_range('a', 'b'),
                right_only: CharacterSet::from_range('g', 'i'),
                intersection: CharacterSet::from_range('c', 'f'),
            },
            // [  L  ]
            //   [ R ]
            Row {
                left: CharacterSet::from_range('a', 'f'),
                right: CharacterSet::from_range('d', 'f'),
                left_only: CharacterSet::from_range('a', 'c'),
                right_only: CharacterSet::empty(),
                intersection: CharacterSet::from_range('d', 'f'),
            },
            // [   L   ]
            //   [ R ]
            Row {
                left: CharacterSet::from_range('a', 'm'),
                right: CharacterSet::from_range('d', 'f'),
                left_only: CharacterSet::empty()
                    .add_range('a', 'c')
                    .add_range('g', 'm'),
                right_only: CharacterSet::empty(),
                intersection: CharacterSet::from_range('d', 'f'),
            },
            // [    L    ]
            //         [R]
            Row {
                left: CharacterSet::from_range(',', '/'),
                right: CharacterSet::from_char('/'),
                left_only: CharacterSet::from_range(',', '.'),
                right_only: CharacterSet::empty(),
                intersection: CharacterSet::from_char('/'),
            },
            // [    L    ]
            //         [R]
            Row {
                left: CharacterSet::from_range(',', '/'),
                right: CharacterSet::from_char('/'),
                left_only: CharacterSet::from_range(',', '.'),
                right_only: CharacterSet::empty(),
                intersection: CharacterSet::from_char('/'),
            },
            // [ L1 ] [ L2 ]
            //    [  R  ]
            Row {
                left: CharacterSet::empty()
                    .add_range('a', 'e')
                    .add_range('h', 'l'),
                right: CharacterSet::from_range('c', 'i'),
                left_only: CharacterSet::empty()
                    .add_range('a', 'b')
                    .add_range('j', 'l'),
                right_only: CharacterSet::from_range('f', 'g'),
                intersection: CharacterSet::empty()
                    .add_range('c', 'e')
                    .add_range('h', 'i'),
            },
        ];

        for (i, row) in rows.iter().enumerate() {
            let mut left = row.left.clone();
            let mut right = row.right.clone();
            assert_eq!(
                left.remove_intersection(&mut right),
                row.intersection,
                "row {i}a: {:?} && {:?}",
                row.left,
                row.right
            );
            assert_eq!(
                left, row.left_only,
                "row {i}a: {:?} - {:?}",
                row.left, row.right
            );
            assert_eq!(
                right, row.right_only,
                "row {i}a: {:?} - {:?}",
                row.right, row.left
            );

            let mut left = row.left.clone();
            let mut right = row.right.clone();
            assert_eq!(
                right.remove_intersection(&mut left),
                row.intersection,
                "row {i}b: {:?} && {:?}",
                row.left,
                row.right
            );
            assert_eq!(
                left, row.left_only,
                "row {i}b: {:?} - {:?}",
                row.left, row.right
            );
            assert_eq!(
                right, row.right_only,
                "row {i}b: {:?} - {:?}",
                row.right, row.left
            );

            assert_eq!(
                row.left.clone().difference(row.right.clone()),
                row.left_only,
                "row {i}b: {:?} -- {:?}",
                row.left,
                row.right
            );

            let symm_difference = row.left_only.clone().add(&row.right_only);
            assert_eq!(
                row.left.clone().symmetric_difference(row.right.clone()),
                symm_difference,
                "row {i}b: {:?} ~~ {:?}",
                row.left,
                row.right
            );
        }
    }

    #[test]
    fn test_character_set_does_intersect() {
        let (a, b) = (CharacterSet::empty(), CharacterSet::empty());
        assert!(!a.does_intersect(&b));
        assert!(!b.does_intersect(&a));

        let (a, b) = (
            CharacterSet::empty().add_char('a'),
            CharacterSet::empty().add_char('a'),
        );
        assert!(a.does_intersect(&b));
        assert!(b.does_intersect(&a));

        let (a, b) = (
            CharacterSet::empty().add_char('b'),
            CharacterSet::empty().add_char('a').add_char('c'),
        );
        assert!(!a.does_intersect(&b));
        assert!(!b.does_intersect(&a));

        let (a, b) = (
            CharacterSet::from_char('b'),
            CharacterSet::from_range('a', 'c'),
        );
        assert!(a.does_intersect(&b));
        assert!(b.does_intersect(&a));

        let (a, b) = (
            CharacterSet::from_char('b'),
            CharacterSet::from_range('a', 'c').negate(),
        );
        assert!(!a.does_intersect(&b));
        assert!(!b.does_intersect(&a));

        let (a, b) = (
            CharacterSet::from_char('a').negate(),
            CharacterSet::from_char('a').negate(),
        );
        assert!(a.does_intersect(&b));
        assert!(b.does_intersect(&a));

        let (a, b) = (
            CharacterSet::from_char('c'),
            CharacterSet::from_char('a').negate(),
        );
        assert!(a.does_intersect(&b));
        assert!(b.does_intersect(&a));

        let (a, b) = (
            CharacterSet::from_range('c', 'f'),
            CharacterSet::from_char('f'),
        );
        assert!(a.does_intersect(&b));
        assert!(b.does_intersect(&a));
    }

    #[test]
    #[allow(clippy::single_range_in_vec_init)]
    fn test_character_set_simplify_ignoring() {
        struct Row {
            chars: Vec<char>,
            ruled_out_chars: Vec<char>,
            expected_ranges: Vec<Range<char>>,
        }

        let table = [
            Row {
                chars: vec!['a'],
                ruled_out_chars: vec![],
                expected_ranges: vec!['a'..'a'],
            },
            Row {
                chars: vec!['a', 'b', 'c', 'e', 'z'],
                ruled_out_chars: vec![],
                expected_ranges: vec!['a'..'c', 'e'..'e', 'z'..'z'],
            },
            Row {
                chars: vec!['a', 'b', 'c', 'e', 'h', 'z'],
                ruled_out_chars: vec!['d', 'f', 'g'],
                expected_ranges: vec!['a'..'h', 'z'..'z'],
            },
            Row {
                chars: vec!['a', 'b', 'c', 'g', 'h', 'i'],
                ruled_out_chars: vec!['d', 'j'],
                expected_ranges: vec!['a'..'c', 'g'..'i'],
            },
            Row {
                chars: vec!['c', 'd', 'e', 'g', 'h'],
                ruled_out_chars: vec!['a', 'b', 'c', 'd', 'e', 'f'],
                expected_ranges: vec!['g'..'h'],
            },
            Row {
                chars: vec!['I', 'N'],
                ruled_out_chars: vec!['A', 'I', 'N', 'Z'],
                expected_ranges: vec![],
            },
        ];

        for Row {
            chars,
            ruled_out_chars,
            expected_ranges,
        } in &table
        {
            let ruled_out_chars = ruled_out_chars
                .iter()
                .fold(CharacterSet::empty(), |set, c| set.add_char(*c));
            let mut set = CharacterSet::empty();
            for c in chars {
                set = set.add_char(*c);
            }
            let actual = set.simplify_ignoring(&ruled_out_chars);
            let expected = expected_ranges
                .iter()
                .fold(CharacterSet::empty(), |set, range| {
                    set.add_range(range.start, range.end)
                });
            assert_eq!(
                actual, expected,
                "chars: {chars:?}, ruled out chars: {ruled_out_chars:?}"
            );
        }
    }
}



================================================
FILE: crates/generate/src/parse_grammar.rs
================================================
use std::collections::HashSet;

use anyhow::Result;
use serde::{Deserialize, Serialize};
use serde_json::{Map, Value};
use thiserror::Error;

use super::{
    grammars::{InputGrammar, PrecedenceEntry, Variable, VariableType},
    rules::{Precedence, Rule},
};
use crate::grammars::ReservedWordContext;

#[derive(Deserialize)]
#[serde(tag = "type")]
#[allow(non_camel_case_types)]
#[allow(clippy::upper_case_acronyms)]
enum RuleJSON {
    ALIAS {
        content: Box<RuleJSON>,
        named: bool,
        value: String,
    },
    BLANK,
    STRING {
        value: String,
    },
    PATTERN {
        value: String,
        flags: Option<String>,
    },
    SYMBOL {
        name: String,
    },
    CHOICE {
        members: Vec<RuleJSON>,
    },
    FIELD {
        name: String,
        content: Box<RuleJSON>,
    },
    SEQ {
        members: Vec<RuleJSON>,
    },
    REPEAT {
        content: Box<RuleJSON>,
    },
    REPEAT1 {
        content: Box<RuleJSON>,
    },
    PREC_DYNAMIC {
        value: i32,
        content: Box<RuleJSON>,
    },
    PREC_LEFT {
        value: PrecedenceValueJSON,
        content: Box<RuleJSON>,
    },
    PREC_RIGHT {
        value: PrecedenceValueJSON,
        content: Box<RuleJSON>,
    },
    PREC {
        value: PrecedenceValueJSON,
        content: Box<RuleJSON>,
    },
    TOKEN {
        content: Box<RuleJSON>,
    },
    IMMEDIATE_TOKEN {
        content: Box<RuleJSON>,
    },
    RESERVED {
        context_name: String,
        content: Box<RuleJSON>,
    },
}

#[derive(Deserialize)]
#[serde(untagged)]
enum PrecedenceValueJSON {
    Integer(i32),
    Name(String),
}

#[derive(Deserialize)]
pub struct GrammarJSON {
    pub name: String,
    rules: Map<String, Value>,
    #[serde(default)]
    precedences: Vec<Vec<RuleJSON>>,
    #[serde(default)]
    conflicts: Vec<Vec<String>>,
    #[serde(default)]
    externals: Vec<RuleJSON>,
    #[serde(default)]
    extras: Vec<RuleJSON>,
    #[serde(default)]
    inline: Vec<String>,
    #[serde(default)]
    supertypes: Vec<String>,
    #[serde(default)]
    word: Option<String>,
    #[serde(default)]
    reserved: Map<String, Value>,
}

pub type ParseGrammarResult<T> = Result<T, ParseGrammarError>;

#[derive(Debug, Error, Serialize)]
pub enum ParseGrammarError {
    #[error("{0}")]
    Serialization(String),
    #[error("Rules in the `extras` array must not contain empty strings")]
    InvalidExtra,
    #[error("Invalid rule in precedences array. Only strings and symbols are allowed")]
    Unexpected,
    #[error("Reserved word sets must be arrays")]
    InvalidReservedWordSet,
    #[error("Grammar Error: Unexpected rule `{0}` in `token()` call")]
    UnexpectedRule(String),
}

impl From<serde_json::Error> for ParseGrammarError {
    fn from(value: serde_json::Error) -> Self {
        Self::Serialization(value.to_string())
    }
}

/// Check if a rule is referenced by another rule.
///
/// This function is used to determine if a variable is used in a given rule,
/// and `is_other` indicates if the rule is an external, and if it is,
/// to not assume that a named symbol that is equal to itself means it's being referenced.
///
/// For example, if we have an external rule **and** a normal rule both called `foo`,
/// `foo` should not be thought of as directly used unless it's used within another rule.
fn rule_is_referenced(rule: &Rule, target: &str, is_external: bool) -> bool {
    match rule {
        Rule::NamedSymbol(name) => name == target && !is_external,
        Rule::Choice(rules) | Rule::Seq(rules) => {
            rules.iter().any(|r| rule_is_referenced(r, target, false))
        }
        Rule::Metadata { rule, .. } | Rule::Reserved { rule, .. } => {
            rule_is_referenced(rule, target, is_external)
        }
        Rule::Repeat(inner) => rule_is_referenced(inner, target, false),
        Rule::Blank | Rule::String(_) | Rule::Pattern(_, _) | Rule::Symbol(_) => false,
    }
}

fn variable_is_used(
    grammar_rules: &[(String, Rule)],
    extras: &[Rule],
    externals: &[Rule],
    target_name: &str,
    in_progress: &mut HashSet<String>,
) -> bool {
    let root = &grammar_rules.first().unwrap().0;
    if target_name == root {
        return true;
    }

    if extras
        .iter()
        .any(|rule| rule_is_referenced(rule, target_name, false))
    {
        return true;
    }

    if externals
        .iter()
        .any(|rule| rule_is_referenced(rule, target_name, true))
    {
        return true;
    }

    in_progress.insert(target_name.to_string());
    let result = grammar_rules
        .iter()
        .filter(|(key, _)| *key != target_name)
        .any(|(name, rule)| {
            if !rule_is_referenced(rule, target_name, false) || in_progress.contains(name) {
                return false;
            }
            variable_is_used(grammar_rules, extras, externals, name, in_progress)
        });
    in_progress.remove(target_name);

    result
}

pub(crate) fn parse_grammar(input: &str) -> ParseGrammarResult<InputGrammar> {
    let mut grammar_json = serde_json::from_str::<GrammarJSON>(input)?;

    let mut extra_symbols =
        grammar_json
            .extras
            .into_iter()
            .try_fold(Vec::<Rule>::new(), |mut acc, item| {
                let rule = parse_rule(item, false)?;
                if let Rule::String(ref value) = rule {
                    if value.is_empty() {
                        Err(ParseGrammarError::InvalidExtra)?;
                    }
                }
                acc.push(rule);
                ParseGrammarResult::Ok(acc)
            })?;

    let mut external_tokens = grammar_json
        .externals
        .into_iter()
        .map(|e| parse_rule(e, false))
        .collect::<ParseGrammarResult<Vec<_>>>()?;

    let mut precedence_orderings = Vec::with_capacity(grammar_json.precedences.len());
    for list in grammar_json.precedences {
        let mut ordering = Vec::with_capacity(list.len());
        for entry in list {
            ordering.push(match entry {
                RuleJSON::STRING { value } => PrecedenceEntry::Name(value),
                RuleJSON::SYMBOL { name } => PrecedenceEntry::Symbol(name),
                _ => Err(ParseGrammarError::Unexpected)?,
            });
        }
        precedence_orderings.push(ordering);
    }

    let mut variables = Vec::with_capacity(grammar_json.rules.len());

    let rules = grammar_json
        .rules
        .into_iter()
        .map(|(n, r)| Ok((n, parse_rule(serde_json::from_value(r)?, false)?)))
        .collect::<ParseGrammarResult<Vec<_>>>()?;

    let mut in_progress = HashSet::new();

    for (name, rule) in &rules {
        if grammar_json.word.as_ref().is_none_or(|w| w != name)
            && !variable_is_used(
                &rules,
                &extra_symbols,
                &external_tokens,
                name,
                &mut in_progress,
            )
        {
            grammar_json.conflicts.retain(|r| !r.contains(name));
            grammar_json.supertypes.retain(|r| r != name);
            grammar_json.inline.retain(|r| r != name);
            extra_symbols.retain(|r| !rule_is_referenced(r, name, true));
            external_tokens.retain(|r| !rule_is_referenced(r, name, true));
            precedence_orderings.retain(|r| {
                !r.iter().any(|e| {
                    let PrecedenceEntry::Symbol(s) = e else {
                        return false;
                    };
                    s == name
                })
            });
            continue;
        }
        variables.push(Variable {
            name: name.clone(),
            kind: VariableType::Named,
            rule: rule.clone(),
        });
    }

    let reserved_words = grammar_json
        .reserved
        .into_iter()
        .map(|(name, rule_values)| {
            let Value::Array(rule_values) = rule_values else {
                Err(ParseGrammarError::InvalidReservedWordSet)?
            };

            let mut reserved_words = Vec::with_capacity(rule_values.len());
            for value in rule_values {
                reserved_words.push(parse_rule(serde_json::from_value(value)?, false)?);
            }
            Ok(ReservedWordContext {
                name,
                reserved_words,
            })
        })
        .collect::<ParseGrammarResult<Vec<_>>>()?;

    Ok(InputGrammar {
        name: grammar_json.name,
        word_token: grammar_json.word,
        expected_conflicts: grammar_json.conflicts,
        supertype_symbols: grammar_json.supertypes,
        variables_to_inline: grammar_json.inline,
        precedence_orderings,
        variables,
        extra_symbols,
        external_tokens,
        reserved_words,
    })
}

fn parse_rule(json: RuleJSON, is_token: bool) -> ParseGrammarResult<Rule> {
    match json {
        RuleJSON::ALIAS {
            content,
            value,
            named,
        } => parse_rule(*content, is_token).map(|r| Rule::alias(r, value, named)),
        RuleJSON::BLANK => Ok(Rule::Blank),
        RuleJSON::STRING { value } => Ok(Rule::String(value)),
        RuleJSON::PATTERN { value, flags } => Ok(Rule::Pattern(
            value,
            flags.map_or(String::new(), |f| {
                f.matches(|c| {
                    if c == 'i' {
                        true
                    } else {
                        // silently ignore unicode flags
                        if c != 'u' && c != 'v' {
                            eprintln!("Warning: unsupported flag {c}");
                        }
                        false
                    }
                })
                .collect()
            }),
        )),
        RuleJSON::SYMBOL { name } => {
            if is_token {
                Err(ParseGrammarError::UnexpectedRule(name))?
            } else {
                Ok(Rule::NamedSymbol(name))
            }
        }
        RuleJSON::CHOICE { members } => members
            .into_iter()
            .map(|m| parse_rule(m, is_token))
            .collect::<ParseGrammarResult<Vec<_>>>()
            .map(Rule::choice),
        RuleJSON::FIELD { content, name } => {
            parse_rule(*content, is_token).map(|r| Rule::field(name, r))
        }
        RuleJSON::SEQ { members } => members
            .into_iter()
            .map(|m| parse_rule(m, is_token))
            .collect::<ParseGrammarResult<Vec<_>>>()
            .map(Rule::seq),
        RuleJSON::REPEAT1 { content } => parse_rule(*content, is_token).map(Rule::repeat),
        RuleJSON::REPEAT { content } => {
            parse_rule(*content, is_token).map(|m| Rule::choice(vec![Rule::repeat(m), Rule::Blank]))
        }
        RuleJSON::PREC { value, content } => {
            parse_rule(*content, is_token).map(|r| Rule::prec(value.into(), r))
        }
        RuleJSON::PREC_LEFT { value, content } => {
            parse_rule(*content, is_token).map(|r| Rule::prec_left(value.into(), r))
        }
        RuleJSON::PREC_RIGHT { value, content } => {
            parse_rule(*content, is_token).map(|r| Rule::prec_right(value.into(), r))
        }
        RuleJSON::PREC_DYNAMIC { value, content } => {
            parse_rule(*content, is_token).map(|r| Rule::prec_dynamic(value, r))
        }
        RuleJSON::RESERVED {
            content,
            context_name,
        } => parse_rule(*content, is_token).map(|r| Rule::Reserved {
            rule: Box::new(r),
            context_name,
        }),
        RuleJSON::TOKEN { content } => parse_rule(*content, true).map(Rule::token),
        RuleJSON::IMMEDIATE_TOKEN { content } => {
            parse_rule(*content, is_token).map(Rule::immediate_token)
        }
    }
}

impl From<PrecedenceValueJSON> for Precedence {
    fn from(val: PrecedenceValueJSON) -> Self {
        match val {
            PrecedenceValueJSON::Integer(i) => Self::Integer(i),
            PrecedenceValueJSON::Name(i) => Self::Name(i),
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_parse_grammar() {
        let grammar = parse_grammar(
            r#"{
            "name": "my_lang",
            "rules": {
                "file": {
                    "type": "REPEAT1",
                    "content": {
                        "type": "SYMBOL",
                        "name": "statement"
                    }
                },
                "statement": {
                    "type": "STRING",
                    "value": "foo"
                }
            }
        }"#,
        )
        .unwrap();

        assert_eq!(grammar.name, "my_lang");
        assert_eq!(
            grammar.variables,
            vec![
                Variable {
                    name: "file".to_string(),
                    kind: VariableType::Named,
                    rule: Rule::repeat(Rule::NamedSymbol("statement".to_string()))
                },
                Variable {
                    name: "statement".to_string(),
                    kind: VariableType::Named,
                    rule: Rule::String("foo".to_string())
                },
            ]
        );
    }
}



================================================
FILE: crates/generate/src/prepare_grammar.rs
================================================
mod expand_repeats;
mod expand_tokens;
mod extract_default_aliases;
mod extract_tokens;
mod flatten_grammar;
mod intern_symbols;
mod process_inlines;

use std::{
    cmp::Ordering,
    collections::{hash_map, HashMap, HashSet},
    mem,
};

use anyhow::Result;
pub use expand_tokens::ExpandTokensError;
pub use extract_tokens::ExtractTokensError;
pub use flatten_grammar::FlattenGrammarError;
pub use intern_symbols::InternSymbolsError;
pub use process_inlines::ProcessInlinesError;
use serde::Serialize;
use thiserror::Error;

pub use self::expand_tokens::expand_tokens;
use self::{
    expand_repeats::expand_repeats, extract_default_aliases::extract_default_aliases,
    extract_tokens::extract_tokens, flatten_grammar::flatten_grammar,
    intern_symbols::intern_symbols, process_inlines::process_inlines,
};
use super::{
    grammars::{
        ExternalToken, InlinedProductionMap, InputGrammar, LexicalGrammar, PrecedenceEntry,
        SyntaxGrammar, Variable,
    },
    rules::{AliasMap, Precedence, Rule, Symbol},
};
use crate::grammars::ReservedWordContext;

pub struct IntermediateGrammar<T, U> {
    variables: Vec<Variable>,
    extra_symbols: Vec<T>,
    expected_conflicts: Vec<Vec<Symbol>>,
    precedence_orderings: Vec<Vec<PrecedenceEntry>>,
    external_tokens: Vec<U>,
    variables_to_inline: Vec<Symbol>,
    supertype_symbols: Vec<Symbol>,
    word_token: Option<Symbol>,
    reserved_word_sets: Vec<ReservedWordContext<T>>,
}

pub type InternedGrammar = IntermediateGrammar<Rule, Variable>;

pub type ExtractedSyntaxGrammar = IntermediateGrammar<Symbol, ExternalToken>;

#[derive(Debug, PartialEq, Eq)]
pub struct ExtractedLexicalGrammar {
    pub variables: Vec<Variable>,
    pub separators: Vec<Rule>,
}

impl<T, U> Default for IntermediateGrammar<T, U> {
    fn default() -> Self {
        Self {
            variables: Vec::default(),
            extra_symbols: Vec::default(),
            expected_conflicts: Vec::default(),
            precedence_orderings: Vec::default(),
            external_tokens: Vec::default(),
            variables_to_inline: Vec::default(),
            supertype_symbols: Vec::default(),
            word_token: Option::default(),
            reserved_word_sets: Vec::default(),
        }
    }
}

pub type PrepareGrammarResult<T> = Result<T, PrepareGrammarError>;

#[derive(Debug, Error, Serialize)]
#[error(transparent)]
pub enum PrepareGrammarError {
    ValidatePrecedences(#[from] ValidatePrecedenceError),
    InternSymbols(#[from] InternSymbolsError),
    ExtractTokens(#[from] ExtractTokensError),
    FlattenGrammar(#[from] FlattenGrammarError),
    ExpandTokens(#[from] ExpandTokensError),
    ProcessInlines(#[from] ProcessInlinesError),
}

pub type ValidatePrecedenceResult<T> = Result<T, ValidatePrecedenceError>;

#[derive(Debug, Error, Serialize)]
#[error(transparent)]
pub enum ValidatePrecedenceError {
    Undeclared(#[from] UndeclaredPrecedenceError),
    Ordering(#[from] ConflictingPrecedenceOrderingError),
}

#[derive(Debug, Error, Serialize)]
pub struct UndeclaredPrecedenceError {
    pub precedence: String,
    pub rule: String,
}

impl std::fmt::Display for UndeclaredPrecedenceError {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(
            f,
            "Undeclared precedence '{}' in rule '{}'",
            self.precedence, self.rule
        )?;
        Ok(())
    }
}

#[derive(Debug, Error, Serialize)]
pub struct ConflictingPrecedenceOrderingError {
    pub precedence_1: String,
    pub precedence_2: String,
}

impl std::fmt::Display for ConflictingPrecedenceOrderingError {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(
            f,
            "Conflicting orderings for precedences {} and {}",
            self.precedence_1, self.precedence_2
        )?;
        Ok(())
    }
}

/// Transform an input grammar into separate components that are ready
/// for parse table construction.
pub fn prepare_grammar(
    input_grammar: &InputGrammar,
) -> PrepareGrammarResult<(
    SyntaxGrammar,
    LexicalGrammar,
    InlinedProductionMap,
    AliasMap,
)> {
    validate_precedences(input_grammar)?;

    let interned_grammar = intern_symbols(input_grammar)?;
    let (syntax_grammar, lexical_grammar) = extract_tokens(interned_grammar)?;
    let syntax_grammar = expand_repeats(syntax_grammar);
    let mut syntax_grammar = flatten_grammar(syntax_grammar)?;
    let lexical_grammar = expand_tokens(lexical_grammar)?;
    let default_aliases = extract_default_aliases(&mut syntax_grammar, &lexical_grammar);
    let inlines = process_inlines(&syntax_grammar, &lexical_grammar)?;
    Ok((syntax_grammar, lexical_grammar, inlines, default_aliases))
}

/// Check that all of the named precedences used in the grammar are declared
/// within the `precedences` lists, and also that there are no conflicting
/// precedence orderings declared in those lists.
fn validate_precedences(grammar: &InputGrammar) -> ValidatePrecedenceResult<()> {
    // Check that no rule contains a named precedence that is not present in
    // any of the `precedences` lists.
    fn validate(
        rule_name: &str,
        rule: &Rule,
        names: &HashSet<&String>,
    ) -> ValidatePrecedenceResult<()> {
        match rule {
            Rule::Repeat(rule) => validate(rule_name, rule, names),
            Rule::Seq(elements) | Rule::Choice(elements) => elements
                .iter()
                .try_for_each(|e| validate(rule_name, e, names)),
            Rule::Metadata { rule, params } => {
                if let Precedence::Name(n) = &params.precedence {
                    if !names.contains(n) {
                        Err(UndeclaredPrecedenceError {
                            precedence: n.to_string(),
                            rule: rule_name.to_string(),
                        })?;
                    }
                }
                validate(rule_name, rule, names)?;
                Ok(())
            }
            _ => Ok(()),
        }
    }

    // For any two precedence names `a` and `b`, if `a` comes before `b`
    // in some list, then it cannot come *after* `b` in any list.
    let mut pairs = HashMap::new();
    for list in &grammar.precedence_orderings {
        for (i, mut entry1) in list.iter().enumerate() {
            for mut entry2 in list.iter().skip(i + 1) {
                if entry2 == entry1 {
                    continue;
                }
                let mut ordering = Ordering::Greater;
                if entry1 > entry2 {
                    ordering = Ordering::Less;
                    mem::swap(&mut entry1, &mut entry2);
                }
                match pairs.entry((entry1, entry2)) {
                    hash_map::Entry::Vacant(e) => {
                        e.insert(ordering);
                    }
                    hash_map::Entry::Occupied(e) => {
                        if e.get() != &ordering {
                            Err(ConflictingPrecedenceOrderingError {
                                precedence_1: entry1.to_string(),
                                precedence_2: entry2.to_string(),
                            })?;
                        }
                    }
                }
            }
        }
    }

    let precedence_names = grammar
        .precedence_orderings
        .iter()
        .flat_map(|l| l.iter())
        .filter_map(|p| {
            if let PrecedenceEntry::Name(n) = p {
                Some(n)
            } else {
                None
            }
        })
        .collect::<HashSet<&String>>();
    for variable in &grammar.variables {
        validate(&variable.name, &variable.rule, &precedence_names)?;
    }

    Ok(())
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::grammars::VariableType;

    #[test]
    fn test_validate_precedences_with_undeclared_precedence() {
        let grammar = InputGrammar {
            precedence_orderings: vec![
                vec![
                    PrecedenceEntry::Name("a".to_string()),
                    PrecedenceEntry::Name("b".to_string()),
                ],
                vec![
                    PrecedenceEntry::Name("b".to_string()),
                    PrecedenceEntry::Name("c".to_string()),
                    PrecedenceEntry::Name("d".to_string()),
                ],
            ],
            variables: vec![
                Variable {
                    name: "v1".to_string(),
                    kind: VariableType::Named,
                    rule: Rule::Seq(vec![
                        Rule::prec_left(Precedence::Name("b".to_string()), Rule::string("w")),
                        Rule::prec(Precedence::Name("c".to_string()), Rule::string("x")),
                    ]),
                },
                Variable {
                    name: "v2".to_string(),
                    kind: VariableType::Named,
                    rule: Rule::repeat(Rule::Choice(vec![
                        Rule::prec_left(Precedence::Name("omg".to_string()), Rule::string("y")),
                        Rule::prec(Precedence::Name("c".to_string()), Rule::string("z")),
                    ])),
                },
            ],
            ..Default::default()
        };

        let result = validate_precedences(&grammar);
        assert_eq!(
            result.unwrap_err().to_string(),
            "Undeclared precedence 'omg' in rule 'v2'",
        );
    }

    #[test]
    fn test_validate_precedences_with_conflicting_order() {
        let grammar = InputGrammar {
            precedence_orderings: vec![
                vec![
                    PrecedenceEntry::Name("a".to_string()),
                    PrecedenceEntry::Name("b".to_string()),
                ],
                vec![
                    PrecedenceEntry::Name("b".to_string()),
                    PrecedenceEntry::Name("c".to_string()),
                    PrecedenceEntry::Name("a".to_string()),
                ],
            ],
            variables: vec![
                Variable {
                    name: "v1".to_string(),
                    kind: VariableType::Named,
                    rule: Rule::Seq(vec![
                        Rule::prec_left(Precedence::Name("b".to_string()), Rule::string("w")),
                        Rule::prec(Precedence::Name("c".to_string()), Rule::string("x")),
                    ]),
                },
                Variable {
                    name: "v2".to_string(),
                    kind: VariableType::Named,
                    rule: Rule::repeat(Rule::Choice(vec![
                        Rule::prec_left(Precedence::Name("a".to_string()), Rule::string("y")),
                        Rule::prec(Precedence::Name("c".to_string()), Rule::string("z")),
                    ])),
                },
            ],
            ..Default::default()
        };

        let result = validate_precedences(&grammar);
        assert_eq!(
            result.unwrap_err().to_string(),
            "Conflicting orderings for precedences 'a' and 'b'",
        );
    }
}



================================================
FILE: crates/generate/src/rules.rs
================================================
use std::{collections::HashMap, fmt};

use serde::Serialize;
use smallbitvec::SmallBitVec;

use super::grammars::VariableType;

#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize)]
pub enum SymbolType {
    External,
    End,
    EndOfNonTerminalExtra,
    Terminal,
    NonTerminal,
}

#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize)]
pub enum Associativity {
    Left,
    Right,
}

#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize)]
pub struct Alias {
    pub value: String,
    pub is_named: bool,
}

#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Default, Serialize)]
pub enum Precedence {
    #[default]
    None,
    Integer(i32),
    Name(String),
}

pub type AliasMap = HashMap<Symbol, Alias>;

#[derive(Clone, Debug, Default, PartialEq, Eq, Hash, Serialize)]
pub struct MetadataParams {
    pub precedence: Precedence,
    pub dynamic_precedence: i32,
    pub associativity: Option<Associativity>,
    pub is_token: bool,
    pub is_main_token: bool,
    pub alias: Option<Alias>,
    pub field_name: Option<String>,
}

#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize)]
pub struct Symbol {
    pub kind: SymbolType,
    pub index: usize,
}

#[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize)]
pub enum Rule {
    Blank,
    String(String),
    Pattern(String, String),
    NamedSymbol(String),
    Symbol(Symbol),
    Choice(Vec<Rule>),
    Metadata {
        params: MetadataParams,
        rule: Box<Rule>,
    },
    Repeat(Box<Rule>),
    Seq(Vec<Rule>),
    Reserved {
        rule: Box<Rule>,
        context_name: String,
    },
}

// Because tokens are represented as small (~400 max) unsigned integers,
// sets of tokens can be efficiently represented as bit vectors with each
// index corresponding to a token, and each value representing whether or not
// the token is present in the set.
#[derive(Default, Clone, PartialEq, Eq, Hash)]
pub struct TokenSet {
    terminal_bits: SmallBitVec,
    external_bits: SmallBitVec,
    eof: bool,
    end_of_nonterminal_extra: bool,
}

impl fmt::Debug for TokenSet {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        f.debug_list().entries(self.iter()).finish()
    }
}

impl PartialOrd for TokenSet {
    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
        Some(self.cmp(other))
    }
}

impl Ord for TokenSet {
    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
        self.terminal_bits
            .iter()
            .cmp(other.terminal_bits.iter())
            .then_with(|| self.external_bits.iter().cmp(other.external_bits.iter()))
            .then_with(|| self.eof.cmp(&other.eof))
            .then_with(|| {
                self.end_of_nonterminal_extra
                    .cmp(&other.end_of_nonterminal_extra)
            })
    }
}

impl Rule {
    pub fn field(name: String, content: Self) -> Self {
        add_metadata(content, move |params| {
            params.field_name = Some(name);
        })
    }

    pub fn alias(content: Self, value: String, is_named: bool) -> Self {
        add_metadata(content, move |params| {
            params.alias = Some(Alias { value, is_named });
        })
    }

    pub fn token(content: Self) -> Self {
        add_metadata(content, |params| {
            params.is_token = true;
        })
    }

    pub fn immediate_token(content: Self) -> Self {
        add_metadata(content, |params| {
            params.is_token = true;
            params.is_main_token = true;
        })
    }

    pub fn prec(value: Precedence, content: Self) -> Self {
        add_metadata(content, |params| {
            params.precedence = value;
        })
    }

    pub fn prec_left(value: Precedence, content: Self) -> Self {
        add_metadata(content, |params| {
            params.associativity = Some(Associativity::Left);
            params.precedence = value;
        })
    }

    pub fn prec_right(value: Precedence, content: Self) -> Self {
        add_metadata(content, |params| {
            params.associativity = Some(Associativity::Right);
            params.precedence = value;
        })
    }

    pub fn prec_dynamic(value: i32, content: Self) -> Self {
        add_metadata(content, |params| {
            params.dynamic_precedence = value;
        })
    }

    pub fn repeat(rule: Self) -> Self {
        Self::Repeat(Box::new(rule))
    }

    pub fn choice(rules: Vec<Self>) -> Self {
        let mut elements = Vec::with_capacity(rules.len());
        for rule in rules {
            choice_helper(&mut elements, rule);
        }
        Self::Choice(elements)
    }

    pub const fn seq(rules: Vec<Self>) -> Self {
        Self::Seq(rules)
    }

    pub fn is_empty(&self) -> bool {
        match self {
            Self::Blank | Self::Pattern(..) | Self::NamedSymbol(_) | Self::Symbol(_) => false,
            Self::String(string) => string.is_empty(),
            Self::Metadata { rule, .. } | Self::Repeat(rule) | Self::Reserved { rule, .. } => {
                rule.is_empty()
            }
            Self::Choice(rules) => rules.iter().any(Self::is_empty),
            Self::Seq(rules) => rules.iter().all(Self::is_empty),
        }
    }
}

impl Alias {
    #[must_use]
    pub const fn kind(&self) -> VariableType {
        if self.is_named {
            VariableType::Named
        } else {
            VariableType::Anonymous
        }
    }
}

impl Precedence {
    #[must_use]
    pub const fn is_none(&self) -> bool {
        matches!(self, Self::None)
    }
}

#[cfg(test)]
impl Rule {
    #[must_use]
    pub const fn terminal(index: usize) -> Self {
        Self::Symbol(Symbol::terminal(index))
    }

    #[must_use]
    pub const fn non_terminal(index: usize) -> Self {
        Self::Symbol(Symbol::non_terminal(index))
    }

    #[must_use]
    pub const fn external(index: usize) -> Self {
        Self::Symbol(Symbol::external(index))
    }

    #[must_use]
    pub fn named(name: &'static str) -> Self {
        Self::NamedSymbol(name.to_string())
    }

    #[must_use]
    pub fn string(value: &'static str) -> Self {
        Self::String(value.to_string())
    }

    #[must_use]
    pub fn pattern(value: &'static str, flags: &'static str) -> Self {
        Self::Pattern(value.to_string(), flags.to_string())
    }
}

impl Symbol {
    #[must_use]
    pub fn is_terminal(&self) -> bool {
        self.kind == SymbolType::Terminal
    }

    #[must_use]
    pub fn is_non_terminal(&self) -> bool {
        self.kind == SymbolType::NonTerminal
    }

    #[must_use]
    pub fn is_external(&self) -> bool {
        self.kind == SymbolType::External
    }

    #[must_use]
    pub fn is_eof(&self) -> bool {
        self.kind == SymbolType::End
    }

    #[must_use]
    pub const fn non_terminal(index: usize) -> Self {
        Self {
            kind: SymbolType::NonTerminal,
            index,
        }
    }

    #[must_use]
    pub const fn terminal(index: usize) -> Self {
        Self {
            kind: SymbolType::Terminal,
            index,
        }
    }

    #[must_use]
    pub const fn external(index: usize) -> Self {
        Self {
            kind: SymbolType::External,
            index,
        }
    }

    #[must_use]
    pub const fn end() -> Self {
        Self {
            kind: SymbolType::End,
            index: 0,
        }
    }

    #[must_use]
    pub const fn end_of_nonterminal_extra() -> Self {
        Self {
            kind: SymbolType::EndOfNonTerminalExtra,
            index: 0,
        }
    }
}

impl From<Symbol> for Rule {
    fn from(symbol: Symbol) -> Self {
        Self::Symbol(symbol)
    }
}

impl TokenSet {
    #[must_use]
    pub const fn new() -> Self {
        Self {
            terminal_bits: SmallBitVec::new(),
            external_bits: SmallBitVec::new(),
            eof: false,
            end_of_nonterminal_extra: false,
        }
    }

    pub fn iter(&self) -> impl Iterator<Item = Symbol> + '_ {
        self.terminal_bits
            .iter()
            .enumerate()
            .filter_map(|(i, value)| {
                if value {
                    Some(Symbol::terminal(i))
                } else {
                    None
                }
            })
            .chain(
                self.external_bits
                    .iter()
                    .enumerate()
                    .filter_map(|(i, value)| {
                        if value {
                            Some(Symbol::external(i))
                        } else {
                            None
                        }
                    }),
            )
            .chain(if self.eof { Some(Symbol::end()) } else { None })
            .chain(if self.end_of_nonterminal_extra {
                Some(Symbol::end_of_nonterminal_extra())
            } else {
                None
            })
    }

    pub fn terminals(&self) -> impl Iterator<Item = Symbol> + '_ {
        self.terminal_bits
            .iter()
            .enumerate()
            .filter_map(|(i, value)| {
                if value {
                    Some(Symbol::terminal(i))
                } else {
                    None
                }
            })
    }

    pub fn contains(&self, symbol: &Symbol) -> bool {
        match symbol.kind {
            SymbolType::NonTerminal => panic!("Cannot store non-terminals in a TokenSet"),
            SymbolType::Terminal => self.terminal_bits.get(symbol.index).unwrap_or(false),
            SymbolType::External => self.external_bits.get(symbol.index).unwrap_or(false),
            SymbolType::End => self.eof,
            SymbolType::EndOfNonTerminalExtra => self.end_of_nonterminal_extra,
        }
    }

    pub fn contains_terminal(&self, index: usize) -> bool {
        self.terminal_bits.get(index).unwrap_or(false)
    }

    pub fn insert(&mut self, other: Symbol) {
        let vec = match other.kind {
            SymbolType::NonTerminal => panic!("Cannot store non-terminals in a TokenSet"),
            SymbolType::Terminal => &mut self.terminal_bits,
            SymbolType::External => &mut self.external_bits,
            SymbolType::End => {
                self.eof = true;
                return;
            }
            SymbolType::EndOfNonTerminalExtra => {
                self.end_of_nonterminal_extra = true;
                return;
            }
        };
        if other.index >= vec.len() {
            vec.resize(other.index + 1, false);
        }
        vec.set(other.index, true);
    }

    pub fn remove(&mut self, other: &Symbol) -> bool {
        let vec = match other.kind {
            SymbolType::NonTerminal => panic!("Cannot store non-terminals in a TokenSet"),
            SymbolType::Terminal => &mut self.terminal_bits,
            SymbolType::External => &mut self.external_bits,
            SymbolType::End => {
                return if self.eof {
                    self.eof = false;
                    true
                } else {
                    false
                }
            }
            SymbolType::EndOfNonTerminalExtra => {
                return if self.end_of_nonterminal_extra {
                    self.end_of_nonterminal_extra = false;
                    true
                } else {
                    false
                };
            }
        };
        if other.index < vec.len() && vec[other.index] {
            vec.set(other.index, false);
            while vec.last() == Some(false) {
                vec.pop();
            }
            return true;
        }
        false
    }

    pub fn is_empty(&self) -> bool {
        !self.eof
            && !self.end_of_nonterminal_extra
            && !self.terminal_bits.iter().any(|a| a)
            && !self.external_bits.iter().any(|a| a)
    }

    pub fn len(&self) -> usize {
        self.eof as usize
            + self.end_of_nonterminal_extra as usize
            + self.terminal_bits.iter().filter(|b| *b).count()
            + self.external_bits.iter().filter(|b| *b).count()
    }

    pub fn insert_all_terminals(&mut self, other: &Self) -> bool {
        let mut result = false;
        if other.terminal_bits.len() > self.terminal_bits.len() {
            self.terminal_bits.resize(other.terminal_bits.len(), false);
        }
        for (i, element) in other.terminal_bits.iter().enumerate() {
            if element {
                result |= !self.terminal_bits[i];
                self.terminal_bits.set(i, element);
            }
        }
        result
    }

    fn insert_all_externals(&mut self, other: &Self) -> bool {
        let mut result = false;
        if other.external_bits.len() > self.external_bits.len() {
            self.external_bits.resize(other.external_bits.len(), false);
        }
        for (i, element) in other.external_bits.iter().enumerate() {
            if element {
                result |= !self.external_bits[i];
                self.external_bits.set(i, element);
            }
        }
        result
    }

    pub fn insert_all(&mut self, other: &Self) -> bool {
        let mut result = false;
        if other.eof {
            result |= !self.eof;
            self.eof = true;
        }
        if other.end_of_nonterminal_extra {
            result |= !self.end_of_nonterminal_extra;
            self.end_of_nonterminal_extra = true;
        }
        result |= self.insert_all_terminals(other);
        result |= self.insert_all_externals(other);
        result
    }
}

impl FromIterator<Symbol> for TokenSet {
    fn from_iter<T: IntoIterator<Item = Symbol>>(iter: T) -> Self {
        let mut result = Self::new();
        for symbol in iter {
            result.insert(symbol);
        }
        result
    }
}

fn add_metadata<T: FnOnce(&mut MetadataParams)>(input: Rule, f: T) -> Rule {
    match input {
        Rule::Metadata { rule, mut params } if !params.is_token => {
            f(&mut params);
            Rule::Metadata { rule, params }
        }
        _ => {
            let mut params = MetadataParams::default();
            f(&mut params);
            Rule::Metadata {
                rule: Box::new(input),
                params,
            }
        }
    }
}

fn choice_helper(result: &mut Vec<Rule>, rule: Rule) {
    match rule {
        Rule::Choice(elements) => {
            for element in elements {
                choice_helper(result, element);
            }
        }
        _ => {
            if !result.contains(&rule) {
                result.push(rule);
            }
        }
    }
}

impl fmt::Display for Precedence {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match self {
            Self::Integer(i) => write!(f, "{i}"),
            Self::Name(s) => write!(f, "'{s}'"),
            Self::None => write!(f, "none"),
        }
    }
}



================================================
FILE: crates/generate/src/tables.rs
================================================
use std::collections::BTreeMap;

use super::{
    nfa::CharacterSet,
    rules::{Alias, Symbol, TokenSet},
};
pub type ProductionInfoId = usize;
pub type ParseStateId = usize;
pub type LexStateId = usize;

use std::hash::BuildHasherDefault;

use indexmap::IndexMap;
use rustc_hash::FxHasher;

#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum ParseAction {
    Accept,
    Shift {
        state: ParseStateId,
        is_repetition: bool,
    },
    ShiftExtra,
    Recover,
    Reduce {
        symbol: Symbol,
        child_count: usize,
        dynamic_precedence: i32,
        production_id: ProductionInfoId,
    },
}

#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum GotoAction {
    Goto(ParseStateId),
    ShiftExtra,
}

#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub struct ParseTableEntry {
    pub actions: Vec<ParseAction>,
    pub reusable: bool,
}

#[derive(Clone, Debug, Default, PartialEq, Eq)]
pub struct ParseState {
    pub id: ParseStateId,
    pub terminal_entries: IndexMap<Symbol, ParseTableEntry, BuildHasherDefault<FxHasher>>,
    pub nonterminal_entries: IndexMap<Symbol, GotoAction, BuildHasherDefault<FxHasher>>,
    pub reserved_words: TokenSet,
    pub lex_state_id: usize,
    pub external_lex_state_id: usize,
    pub core_id: usize,
}

#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
pub struct FieldLocation {
    pub index: usize,
    pub inherited: bool,
}

#[derive(Debug, Default, PartialEq, Eq)]
pub struct ProductionInfo {
    pub alias_sequence: Vec<Option<Alias>>,
    pub field_map: BTreeMap<String, Vec<FieldLocation>>,
}

#[derive(Debug, Default, PartialEq, Eq)]
pub struct ParseTable {
    pub states: Vec<ParseState>,
    pub symbols: Vec<Symbol>,
    pub production_infos: Vec<ProductionInfo>,
    pub max_aliased_production_length: usize,
    pub external_lex_states: Vec<TokenSet>,
}

#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub struct AdvanceAction {
    pub state: LexStateId,
    pub in_main_token: bool,
}

#[derive(Clone, Debug, Default, PartialEq, Eq, PartialOrd, Ord)]
pub struct LexState {
    pub accept_action: Option<Symbol>,
    pub eof_action: Option<AdvanceAction>,
    pub advance_actions: Vec<(CharacterSet, AdvanceAction)>,
}

#[derive(Debug, PartialEq, Eq, Default)]
pub struct LexTable {
    pub states: Vec<LexState>,
}

impl ParseTableEntry {
    #[must_use]
    pub const fn new() -> Self {
        Self {
            reusable: true,
            actions: Vec::new(),
        }
    }
}

impl ParseState {
    pub fn is_end_of_non_terminal_extra(&self) -> bool {
        self.terminal_entries
            .contains_key(&Symbol::end_of_nonterminal_extra())
    }

    pub fn referenced_states(&self) -> impl Iterator<Item = ParseStateId> + '_ {
        self.terminal_entries
            .iter()
            .flat_map(|(_, entry)| {
                entry.actions.iter().filter_map(|action| match action {
                    ParseAction::Shift { state, .. } => Some(*state),
                    _ => None,
                })
            })
            .chain(self.nonterminal_entries.iter().filter_map(|(_, action)| {
                if let GotoAction::Goto(state) = action {
                    Some(*state)
                } else {
                    None
                }
            }))
    }

    pub fn update_referenced_states<F>(&mut self, mut f: F)
    where
        F: FnMut(usize, &Self) -> usize,
    {
        let mut updates = Vec::new();
        for (symbol, entry) in &self.terminal_entries {
            for (i, action) in entry.actions.iter().enumerate() {
                if let ParseAction::Shift { state, .. } = action {
                    let result = f(*state, self);
                    if result != *state {
                        updates.push((*symbol, i, result));
                    }
                }
            }
        }
        for (symbol, action) in &self.nonterminal_entries {
            if let GotoAction::Goto(other_state) = action {
                let result = f(*other_state, self);
                if result != *other_state {
                    updates.push((*symbol, 0, result));
                }
            }
        }
        for (symbol, action_index, new_state) in updates {
            if symbol.is_non_terminal() {
                self.nonterminal_entries
                    .insert(symbol, GotoAction::Goto(new_state));
            } else {
                let entry = self.terminal_entries.get_mut(&symbol).unwrap();
                if let ParseAction::Shift { is_repetition, .. } = entry.actions[action_index] {
                    entry.actions[action_index] = ParseAction::Shift {
                        state: new_state,
                        is_repetition,
                    };
                }
            }
        }
    }
}



================================================
FILE: crates/generate/src/build_tables/build_lex_table.rs
================================================
use std::{
    collections::{hash_map::Entry, HashMap, VecDeque},
    mem,
};

use log::info;

use super::{coincident_tokens::CoincidentTokenIndex, token_conflicts::TokenConflictMap};
use crate::{
    dedup::split_state_id_groups,
    grammars::{LexicalGrammar, SyntaxGrammar},
    nfa::{CharacterSet, NfaCursor},
    rules::{Symbol, TokenSet},
    tables::{AdvanceAction, LexState, LexTable, ParseStateId, ParseTable},
};

pub const LARGE_CHARACTER_RANGE_COUNT: usize = 8;

pub struct LexTables {
    pub main_lex_table: LexTable,
    pub keyword_lex_table: LexTable,
    pub large_character_sets: Vec<(Option<Symbol>, CharacterSet)>,
}

pub fn build_lex_table(
    parse_table: &mut ParseTable,
    syntax_grammar: &SyntaxGrammar,
    lexical_grammar: &LexicalGrammar,
    keywords: &TokenSet,
    coincident_token_index: &CoincidentTokenIndex,
    token_conflict_map: &TokenConflictMap,
) -> LexTables {
    let keyword_lex_table = if syntax_grammar.word_token.is_some() {
        let mut builder = LexTableBuilder::new(lexical_grammar);
        builder.add_state_for_tokens(keywords);
        builder.table
    } else {
        LexTable::default()
    };

    let mut parse_state_ids_by_token_set = Vec::<(TokenSet, Vec<ParseStateId>)>::new();
    for (i, state) in parse_table.states.iter().enumerate() {
        let tokens = state
            .terminal_entries
            .keys()
            .copied()
            .chain(state.reserved_words.iter())
            .filter_map(|token| {
                if token.is_terminal() {
                    if keywords.contains(&token) {
                        syntax_grammar.word_token
                    } else {
                        Some(token)
                    }
                } else if token.is_eof() {
                    Some(token)
                } else {
                    None
                }
            })
            .collect();

        let mut did_merge = false;
        for entry in &mut parse_state_ids_by_token_set {
            if merge_token_set(
                &mut entry.0,
                &tokens,
                lexical_grammar,
                token_conflict_map,
                coincident_token_index,
            ) {
                did_merge = true;
                entry.1.push(i);
                break;
            }
        }

        if !did_merge {
            parse_state_ids_by_token_set.push((tokens, vec![i]));
        }
    }

    let mut builder = LexTableBuilder::new(lexical_grammar);
    for (tokens, parse_state_ids) in parse_state_ids_by_token_set {
        let lex_state_id = builder.add_state_for_tokens(&tokens);
        for id in parse_state_ids {
            parse_table.states[id].lex_state_id = lex_state_id;
        }
    }

    let mut main_lex_table = mem::take(&mut builder.table);
    minimize_lex_table(&mut main_lex_table, parse_table);
    sort_states(&mut main_lex_table, parse_table);

    let mut large_character_sets = Vec::new();
    for (variable_ix, _variable) in lexical_grammar.variables.iter().enumerate() {
        let symbol = Symbol::terminal(variable_ix);
        builder.reset();
        builder.add_state_for_tokens(&TokenSet::from_iter([symbol]));
        for state in &builder.table.states {
            let mut characters = CharacterSet::empty();
            for (chars, action) in &state.advance_actions {
                if action.in_main_token {
                    characters = characters.add(chars);
                    continue;
                }

                if chars.range_count() > LARGE_CHARACTER_RANGE_COUNT
                    && !large_character_sets.iter().any(|(_, set)| set == chars)
                {
                    large_character_sets.push((None, chars.clone()));
                }
            }

            if characters.range_count() > LARGE_CHARACTER_RANGE_COUNT
                && !large_character_sets
                    .iter()
                    .any(|(_, set)| *set == characters)
            {
                large_character_sets.push((Some(symbol), characters));
            }
        }
    }

    LexTables {
        main_lex_table,
        keyword_lex_table,
        large_character_sets,
    }
}

struct QueueEntry {
    state_id: usize,
    nfa_states: Vec<u32>,
    eof_valid: bool,
}

struct LexTableBuilder<'a> {
    lexical_grammar: &'a LexicalGrammar,
    cursor: NfaCursor<'a>,
    table: LexTable,
    state_queue: VecDeque<QueueEntry>,
    state_ids_by_nfa_state_set: HashMap<(Vec<u32>, bool), usize>,
}

impl<'a> LexTableBuilder<'a> {
    fn new(lexical_grammar: &'a LexicalGrammar) -> Self {
        Self {
            lexical_grammar,
            cursor: NfaCursor::new(&lexical_grammar.nfa, vec![]),
            table: LexTable::default(),
            state_queue: VecDeque::new(),
            state_ids_by_nfa_state_set: HashMap::new(),
        }
    }

    fn reset(&mut self) {
        self.table = LexTable::default();
        self.state_queue.clear();
        self.state_ids_by_nfa_state_set.clear();
    }

    fn add_state_for_tokens(&mut self, tokens: &TokenSet) -> usize {
        let mut eof_valid = false;
        let nfa_states = tokens
            .iter()
            .filter_map(|token| {
                if token.is_terminal() {
                    Some(self.lexical_grammar.variables[token.index].start_state)
                } else {
                    eof_valid = true;
                    None
                }
            })
            .collect();
        let (state_id, is_new) = self.add_state(nfa_states, eof_valid);

        if is_new {
            info!(
                "entry point state: {state_id}, tokens: {:?}",
                tokens
                    .iter()
                    .map(|t| &self.lexical_grammar.variables[t.index].name)
                    .collect::<Vec<_>>()
            );
        }

        while let Some(QueueEntry {
            state_id,
            nfa_states,
            eof_valid,
        }) = self.state_queue.pop_front()
        {
            self.populate_state(state_id, nfa_states, eof_valid);
        }
        state_id
    }

    fn add_state(&mut self, nfa_states: Vec<u32>, eof_valid: bool) -> (usize, bool) {
        self.cursor.reset(nfa_states);
        match self
            .state_ids_by_nfa_state_set
            .entry((self.cursor.state_ids.clone(), eof_valid))
        {
            Entry::Occupied(o) => (*o.get(), false),
            Entry::Vacant(v) => {
                let state_id = self.table.states.len();
                self.table.states.push(LexState::default());
                self.state_queue.push_back(QueueEntry {
                    state_id,
                    nfa_states: v.key().0.clone(),
                    eof_valid,
                });
                v.insert(state_id);
                (state_id, true)
            }
        }
    }

    fn populate_state(&mut self, state_id: usize, nfa_states: Vec<u32>, eof_valid: bool) {
        self.cursor.force_reset(nfa_states);

        // The EOF state is represented as an empty list of NFA states.
        let mut completion = None;
        for (id, prec) in self.cursor.completions() {
            if let Some((prev_id, prev_precedence)) = completion {
                if TokenConflictMap::prefer_token(
                    self.lexical_grammar,
                    (prev_precedence, prev_id),
                    (prec, id),
                ) {
                    continue;
                }
            }
            completion = Some((id, prec));
        }

        let transitions = self.cursor.transitions();
        let has_sep = self.cursor.transition_chars().any(|(_, sep)| sep);

        // If EOF is a valid lookahead token, add a transition predicated on the null
        // character that leads to the empty set of NFA states.
        if eof_valid {
            let (next_state_id, _) = self.add_state(Vec::new(), false);
            self.table.states[state_id].eof_action = Some(AdvanceAction {
                state: next_state_id,
                in_main_token: true,
            });
        }

        for transition in transitions {
            if let Some((completed_id, completed_precedence)) = completion {
                if !TokenConflictMap::prefer_transition(
                    self.lexical_grammar,
                    &transition,
                    completed_id,
                    completed_precedence,
                    has_sep,
                ) {
                    continue;
                }
            }

            let (next_state_id, _) =
                self.add_state(transition.states, eof_valid && transition.is_separator);
            self.table.states[state_id].advance_actions.push((
                transition.characters,
                AdvanceAction {
                    state: next_state_id,
                    in_main_token: !transition.is_separator,
                },
            ));
        }

        if let Some((complete_id, _)) = completion {
            self.table.states[state_id].accept_action = Some(Symbol::terminal(complete_id));
        } else if self.cursor.state_ids.is_empty() {
            self.table.states[state_id].accept_action = Some(Symbol::end());
        }
    }
}

fn merge_token_set(
    tokens: &mut TokenSet,
    other: &TokenSet,
    lexical_grammar: &LexicalGrammar,
    token_conflict_map: &TokenConflictMap,
    coincident_token_index: &CoincidentTokenIndex,
) -> bool {
    for i in 0..lexical_grammar.variables.len() {
        let symbol = Symbol::terminal(i);
        let set_without_terminal = match (tokens.contains_terminal(i), other.contains_terminal(i)) {
            (true, false) => other,
            (false, true) => tokens,
            _ => continue,
        };

        for existing_token in set_without_terminal.terminals() {
            if token_conflict_map.does_conflict(i, existing_token.index)
                || token_conflict_map.does_match_prefix(i, existing_token.index)
            {
                return false;
            }
            if !coincident_token_index.contains(symbol, existing_token)
                && (token_conflict_map.does_overlap(existing_token.index, i)
                    || token_conflict_map.does_overlap(i, existing_token.index))
            {
                return false;
            }
        }
    }

    tokens.insert_all(other);
    true
}

fn minimize_lex_table(table: &mut LexTable, parse_table: &mut ParseTable) {
    // Initially group the states by their accept action and their
    // valid lookahead characters.
    let mut state_ids_by_signature = HashMap::new();
    for (i, state) in table.states.iter().enumerate() {
        let signature = (
            i == 0,
            state.accept_action,
            state.eof_action.is_some(),
            state
                .advance_actions
                .iter()
                .map(|(characters, action)| (characters.clone(), action.in_main_token))
                .collect::<Vec<_>>(),
        );
        state_ids_by_signature
            .entry(signature)
            .or_insert(Vec::new())
            .push(i);
    }
    let mut state_ids_by_group_id = state_ids_by_signature
        .into_iter()
        .map(|e| e.1)
        .collect::<Vec<_>>();
    state_ids_by_group_id.sort();
    let error_group_index = state_ids_by_group_id
        .iter()
        .position(|g| g.contains(&0))
        .unwrap();
    state_ids_by_group_id.swap(error_group_index, 0);

    let mut group_ids_by_state_id = vec![0; table.states.len()];
    for (group_id, state_ids) in state_ids_by_group_id.iter().enumerate() {
        for state_id in state_ids {
            group_ids_by_state_id[*state_id] = group_id;
        }
    }

    while split_state_id_groups(
        &table.states,
        &mut state_ids_by_group_id,
        &mut group_ids_by_state_id,
        1,
        lex_states_differ,
    ) {}

    let mut new_states = Vec::with_capacity(state_ids_by_group_id.len());
    for state_ids in &state_ids_by_group_id {
        let mut new_state = LexState::default();
        mem::swap(&mut new_state, &mut table.states[state_ids[0]]);

        for (_, advance_action) in &mut new_state.advance_actions {
            advance_action.state = group_ids_by_state_id[advance_action.state];
        }
        if let Some(eof_action) = &mut new_state.eof_action {
            eof_action.state = group_ids_by_state_id[eof_action.state];
        }
        new_states.push(new_state);
    }

    for state in &mut parse_table.states {
        state.lex_state_id = group_ids_by_state_id[state.lex_state_id];
    }

    table.states = new_states;
}

fn lex_states_differ(left: &LexState, right: &LexState, group_ids_by_state_id: &[usize]) -> bool {
    left.advance_actions
        .iter()
        .zip(right.advance_actions.iter())
        .any(|(left, right)| {
            group_ids_by_state_id[left.1.state] != group_ids_by_state_id[right.1.state]
        })
}

fn sort_states(table: &mut LexTable, parse_table: &mut ParseTable) {
    // Get a mapping of old state index -> new_state_index
    let mut old_ids_by_new_id = (0..table.states.len()).collect::<Vec<_>>();
    old_ids_by_new_id[1..].sort_by_key(|id| &table.states[*id]);

    // Get the inverse mapping
    let mut new_ids_by_old_id = vec![0; old_ids_by_new_id.len()];
    for (id, old_id) in old_ids_by_new_id.iter().enumerate() {
        new_ids_by_old_id[*old_id] = id;
    }

    // Reorder the parse states and update their references to reflect
    // the new ordering.
    table.states = old_ids_by_new_id
        .iter()
        .map(|old_id| {
            let mut state = LexState::default();
            mem::swap(&mut state, &mut table.states[*old_id]);
            for (_, advance_action) in &mut state.advance_actions {
                advance_action.state = new_ids_by_old_id[advance_action.state];
            }
            if let Some(eof_action) = &mut state.eof_action {
                eof_action.state = new_ids_by_old_id[eof_action.state];
            }
            state
        })
        .collect();

    // Update the parse table's lex state references
    for state in &mut parse_table.states {
        state.lex_state_id = new_ids_by_old_id[state.lex_state_id];
    }
}



================================================
FILE: crates/generate/src/build_tables/build_parse_table.rs
================================================
use std::{
    cmp::Ordering,
    collections::{BTreeMap, BTreeSet, HashMap, HashSet, VecDeque},
    hash::BuildHasherDefault,
};

use indexmap::{map::Entry, IndexMap};
use rustc_hash::FxHasher;
use serde::Serialize;
use thiserror::Error;

use super::{
    item::{ParseItem, ParseItemSet, ParseItemSetCore, ParseItemSetEntry},
    item_set_builder::ParseItemSetBuilder,
};
use crate::{
    grammars::{LexicalGrammar, PrecedenceEntry, ReservedWordSetId, SyntaxGrammar, VariableType},
    node_types::VariableInfo,
    rules::{Associativity, Precedence, Symbol, SymbolType, TokenSet},
    tables::{
        FieldLocation, GotoAction, ParseAction, ParseState, ParseStateId, ParseTable,
        ParseTableEntry, ProductionInfo, ProductionInfoId,
    },
};

// For conflict reporting, each parse state is associated with an example
// sequence of symbols that could lead to that parse state.
type SymbolSequence = Vec<Symbol>;

type AuxiliarySymbolSequence = Vec<AuxiliarySymbolInfo>;
pub type ParseStateInfo<'a> = (SymbolSequence, ParseItemSet<'a>);

#[derive(Clone, PartialEq)]
struct AuxiliarySymbolInfo {
    auxiliary_symbol: Symbol,
    parent_symbols: Vec<Symbol>,
}

#[derive(Debug, Default)]
struct ReductionInfo {
    precedence: Precedence,
    symbols: Vec<Symbol>,
    has_left_assoc: bool,
    has_right_assoc: bool,
    has_non_assoc: bool,
}

struct ParseStateQueueEntry {
    state_id: ParseStateId,
    preceding_auxiliary_symbols: AuxiliarySymbolSequence,
}

struct ParseTableBuilder<'a> {
    item_set_builder: ParseItemSetBuilder<'a>,
    syntax_grammar: &'a SyntaxGrammar,
    lexical_grammar: &'a LexicalGrammar,
    variable_info: &'a [VariableInfo],
    core_ids_by_core: HashMap<ParseItemSetCore<'a>, usize>,
    state_ids_by_item_set: IndexMap<ParseItemSet<'a>, ParseStateId, BuildHasherDefault<FxHasher>>,
    parse_state_info_by_id: Vec<ParseStateInfo<'a>>,
    parse_state_queue: VecDeque<ParseStateQueueEntry>,
    non_terminal_extra_states: Vec<(Symbol, usize)>,
    actual_conflicts: HashSet<Vec<Symbol>>,
    parse_table: ParseTable,
}

pub type BuildTableResult<T> = Result<T, ParseTableBuilderError>;

#[derive(Debug, Error, Serialize)]
pub enum ParseTableBuilderError {
    #[error("Unresolved conflict for symbol sequence:\n\n{0}")]
    Conflict(#[from] ConflictError),
    #[error("Extra rules must have unambiguous endings. Conflicting rules: {0}")]
    AmbiguousExtra(#[from] AmbiguousExtraError),
    #[error(
        "The non-terminal rule `{0}` is used in a non-terminal `extra` rule, which is not allowed."
    )]
    ImproperNonTerminalExtra(String),
}

#[derive(Default, Debug, Serialize)]
pub struct ConflictError {
    pub symbol_sequence: Vec<String>,
    pub conflicting_lookahead: String,
    pub possible_interpretations: Vec<Interpretation>,
    pub possible_resolutions: Vec<Resolution>,
}

#[derive(Default, Debug, Serialize)]
pub struct Interpretation {
    pub preceding_symbols: Vec<String>,
    pub variable_name: String,
    pub production_step_symbols: Vec<String>,
    pub step_index: u32,
    pub done: bool,
    pub conflicting_lookahead: String,
    pub precedence: Option<String>,
    pub associativity: Option<String>,
}

#[derive(Debug, Serialize)]
pub enum Resolution {
    Precedence { symbols: Vec<String> },
    Associativity { symbols: Vec<String> },
    AddConflict { symbols: Vec<String> },
}

#[derive(Debug, Serialize)]
pub struct AmbiguousExtraError {
    pub parent_symbols: Vec<String>,
}

impl std::fmt::Display for ConflictError {
    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
        for symbol in &self.symbol_sequence {
            write!(f, "  {symbol}")?;
        }
        writeln!(f, "  •  {}  …\n", self.conflicting_lookahead)?;

        writeln!(f, "Possible interpretations:\n")?;
        let mut interpretations = self
            .possible_interpretations
            .iter()
            .map(|i| {
                let line = i.to_string();
                let prec_line = if let (Some(precedence), Some(associativity)) =
                    (&i.precedence, &i.associativity)
                {
                    Some(format!(
                        "(precedence: {precedence}, associativity: {associativity})",
                    ))
                } else {
                    i.precedence
                        .as_ref()
                        .map(|precedence| format!("(precedence: {precedence})"))
                };

                (line, prec_line)
            })
            .collect::<Vec<_>>();
        let max_interpretation_length = interpretations
            .iter()
            .map(|i| i.0.chars().count())
            .max()
            .unwrap();
        interpretations.sort_unstable();
        for (i, (line, prec_suffix)) in interpretations.into_iter().enumerate() {
            write!(f, "  {}:", i + 1).unwrap();
            write!(f, "{line}")?;
            if let Some(prec_suffix) = prec_suffix {
                write!(
                    f,
                    "{:1$}",
                    "",
                    max_interpretation_length.saturating_sub(line.chars().count()) + 2
                )?;
                write!(f, "{prec_suffix}")?;
            }
            writeln!(f)?;
        }

        writeln!(f, "\nPossible resolutions:\n")?;
        for (i, resolution) in self.possible_resolutions.iter().enumerate() {
            writeln!(f, "  {}:  {resolution}", i + 1)?;
        }
        Ok(())
    }
}

impl std::fmt::Display for Interpretation {
    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
        for symbol in &self.preceding_symbols {
            write!(f, "  {symbol}")?;
        }
        write!(f, "  ({}", self.variable_name)?;
        for (i, symbol) in self.production_step_symbols.iter().enumerate() {
            if i == self.step_index as usize {
                write!(f, "  •")?;
            }
            write!(f, "  {symbol}")?;
        }
        write!(f, ")")?;
        if self.done {
            write!(f, "  •  {}  …", self.conflicting_lookahead)?;
        }
        Ok(())
    }
}

impl std::fmt::Display for Resolution {
    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
        match self {
            Self::Precedence { symbols } => {
                write!(f, "Specify a higher precedence in ")?;
                for (i, symbol) in symbols.iter().enumerate() {
                    if i > 0 {
                        write!(f, " and ")?;
                    }
                    write!(f, "`{symbol}`")?;
                }
                write!(f, " than in the other rules.")?;
            }
            Self::Associativity { symbols } => {
                write!(f, "Specify a left or right associativity in ")?;
                for (i, symbol) in symbols.iter().enumerate() {
                    if i > 0 {
                        write!(f, ", ")?;
                    }
                    write!(f, "`{symbol}`")?;
                }
            }
            Self::AddConflict { symbols } => {
                write!(f, "Add a conflict for these rules: ")?;
                for (i, symbol) in symbols.iter().enumerate() {
                    if i > 0 {
                        write!(f, ", ")?;
                    }
                    write!(f, "`{symbol}`")?;
                }
            }
        }
        Ok(())
    }
}

impl std::fmt::Display for AmbiguousExtraError {
    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
        for (i, symbol) in self.parent_symbols.iter().enumerate() {
            if i > 0 {
                write!(f, ", ")?;
            }
            write!(f, "{symbol}")?;
        }
        Ok(())
    }
}

impl std::error::Error for ConflictError {}
impl std::error::Error for AmbiguousExtraError {}

impl<'a> ParseTableBuilder<'a> {
    fn new(
        syntax_grammar: &'a SyntaxGrammar,
        lexical_grammar: &'a LexicalGrammar,
        item_set_builder: ParseItemSetBuilder<'a>,
        variable_info: &'a [VariableInfo],
    ) -> Self {
        Self {
            syntax_grammar,
            lexical_grammar,
            item_set_builder,
            variable_info,
            non_terminal_extra_states: Vec::new(),
            state_ids_by_item_set: IndexMap::default(),
            core_ids_by_core: HashMap::new(),
            parse_state_info_by_id: Vec::new(),
            parse_state_queue: VecDeque::new(),
            actual_conflicts: syntax_grammar.expected_conflicts.iter().cloned().collect(),
            parse_table: ParseTable {
                states: Vec::new(),
                symbols: Vec::new(),
                external_lex_states: Vec::new(),
                production_infos: Vec::new(),
                max_aliased_production_length: 1,
            },
        }
    }

    fn build(mut self) -> BuildTableResult<(ParseTable, Vec<ParseStateInfo<'a>>)> {
        // Ensure that the empty alias sequence has index 0.
        self.parse_table
            .production_infos
            .push(ProductionInfo::default());

        // Add the error state at index 0.
        self.add_parse_state(&Vec::new(), &Vec::new(), ParseItemSet::default());

        // Add the starting state at index 1.
        self.add_parse_state(
            &Vec::new(),
            &Vec::new(),
            ParseItemSet {
                entries: vec![ParseItemSetEntry {
                    item: ParseItem::start(),
                    lookaheads: std::iter::once(Symbol::end()).collect(),
                    following_reserved_word_set: ReservedWordSetId::default(),
                }],
            },
        );

        // Compute the possible item sets for non-terminal extras.
        let mut non_terminal_extra_item_sets_by_first_terminal = BTreeMap::new();
        for extra_non_terminal in self
            .syntax_grammar
            .extra_symbols
            .iter()
            .filter(|s| s.is_non_terminal())
        {
            let variable = &self.syntax_grammar.variables[extra_non_terminal.index];
            for production in &variable.productions {
                non_terminal_extra_item_sets_by_first_terminal
                    .entry(production.first_symbol().unwrap())
                    .or_insert_with(ParseItemSet::default)
                    .insert(ParseItem {
                        variable_index: extra_non_terminal.index as u32,
                        production,
                        step_index: 1,
                        has_preceding_inherited_fields: false,
                    })
                    .lookaheads
                    .insert(Symbol::end_of_nonterminal_extra());
            }
        }

        let non_terminal_sets_len = non_terminal_extra_item_sets_by_first_terminal.len();
        self.non_terminal_extra_states
            .reserve(non_terminal_sets_len);
        self.parse_state_info_by_id.reserve(non_terminal_sets_len);
        self.parse_table.states.reserve(non_terminal_sets_len);
        self.parse_state_queue.reserve(non_terminal_sets_len);
        // Add a state for each starting terminal of a non-terminal extra rule.
        for (terminal, item_set) in non_terminal_extra_item_sets_by_first_terminal {
            if terminal.is_non_terminal() {
                Err(ParseTableBuilderError::ImproperNonTerminalExtra(
                    self.symbol_name(&terminal),
                ))?;
            }

            self.non_terminal_extra_states
                .push((terminal, self.parse_table.states.len()));
            self.add_parse_state(&Vec::new(), &Vec::new(), item_set);
        }

        while let Some(entry) = self.parse_state_queue.pop_front() {
            let item_set = self
                .item_set_builder
                .transitive_closure(&self.parse_state_info_by_id[entry.state_id].1);

            self.add_actions(
                self.parse_state_info_by_id[entry.state_id].0.clone(),
                entry.preceding_auxiliary_symbols,
                entry.state_id,
                &item_set,
            )?;
        }

        if !self.actual_conflicts.is_empty() {
            println!("Warning: unnecessary conflicts");
            for conflict in &self.actual_conflicts {
                println!(
                    "  {}",
                    conflict
                        .iter()
                        .map(|symbol| format!("`{}`", self.symbol_name(symbol)))
                        .collect::<Vec<_>>()
                        .join(", ")
                );
            }
        }

        Ok((self.parse_table, self.parse_state_info_by_id))
    }

    fn add_parse_state(
        &mut self,
        preceding_symbols: &SymbolSequence,
        preceding_auxiliary_symbols: &AuxiliarySymbolSequence,
        item_set: ParseItemSet<'a>,
    ) -> ParseStateId {
        match self.state_ids_by_item_set.entry(item_set) {
            // If an equivalent item set has already been processed, then return
            // the existing parse state index.
            Entry::Occupied(o) => *o.get(),

            // Otherwise, insert a new parse state and add it to the queue of
            // parse states to populate.
            Entry::Vacant(v) => {
                let core = v.key().core();
                let core_count = self.core_ids_by_core.len();
                let core_id = *self.core_ids_by_core.entry(core).or_insert(core_count);

                let state_id = self.parse_table.states.len();
                self.parse_state_info_by_id
                    .push((preceding_symbols.clone(), v.key().clone()));

                self.parse_table.states.push(ParseState {
                    id: state_id,
                    lex_state_id: 0,
                    external_lex_state_id: 0,
                    terminal_entries: IndexMap::default(),
                    nonterminal_entries: IndexMap::default(),
                    reserved_words: TokenSet::default(),
                    core_id,
                });
                self.parse_state_queue.push_back(ParseStateQueueEntry {
                    state_id,
                    preceding_auxiliary_symbols: preceding_auxiliary_symbols.clone(),
                });
                v.insert(state_id);
                state_id
            }
        }
    }

    fn add_actions(
        &mut self,
        mut preceding_symbols: SymbolSequence,
        mut preceding_auxiliary_symbols: AuxiliarySymbolSequence,
        state_id: ParseStateId,
        item_set: &ParseItemSet<'a>,
    ) -> BuildTableResult<()> {
        let mut terminal_successors = BTreeMap::new();
        let mut non_terminal_successors = BTreeMap::new();
        let mut lookaheads_with_conflicts = TokenSet::new();
        let mut reduction_infos = HashMap::<Symbol, ReductionInfo>::new();

        // Each item in the item set contributes to either or a Shift action or a Reduce
        // action in this state.
        for ParseItemSetEntry {
            item,
            lookaheads,
            following_reserved_word_set: reserved_lookaheads,
        } in &item_set.entries
        {
            // If the item is unfinished, then this state has a transition for the item's
            // next symbol. Advance the item to its next step and insert the resulting
            // item into the successor item set.
            if let Some(next_symbol) = item.symbol() {
                let mut successor = item.successor();
                let successor_set = if next_symbol.is_non_terminal() {
                    let variable = &self.syntax_grammar.variables[next_symbol.index];

                    // Keep track of where auxiliary non-terminals (repeat symbols) are
                    // used within visible symbols. This information may be needed later
                    // for conflict resolution.
                    if variable.is_auxiliary() {
                        preceding_auxiliary_symbols
                            .push(self.get_auxiliary_node_info(item_set, next_symbol));
                    }

                    // For most parse items, the symbols associated with the preceding children
                    // don't matter: they have no effect on the REDUCE action that would be
                    // performed at the end of the item. But the symbols *do* matter for
                    // children that are hidden and have fields, because those fields are
                    // "inherited" by the parent node.
                    //
                    // If this item has consumed a hidden child with fields, then the symbols
                    // of its preceding children need to be taken into account when comparing
                    // it with other items.
                    if variable.is_hidden()
                        && !self.variable_info[next_symbol.index].fields.is_empty()
                    {
                        successor.has_preceding_inherited_fields = true;
                    }

                    non_terminal_successors
                        .entry(next_symbol)
                        .or_insert_with(ParseItemSet::default)
                } else {
                    terminal_successors
                        .entry(next_symbol)
                        .or_insert_with(ParseItemSet::default)
                };
                let successor_entry = successor_set.insert(successor);
                successor_entry.lookaheads.insert_all(lookaheads);
                successor_entry.following_reserved_word_set = successor_entry
                    .following_reserved_word_set
                    .max(*reserved_lookaheads);
            }
            // If the item is finished, then add a Reduce action to this state based
            // on this item.
            else {
                let symbol = Symbol::non_terminal(item.variable_index as usize);
                let action = if item.is_augmented() {
                    ParseAction::Accept
                } else {
                    ParseAction::Reduce {
                        symbol,
                        child_count: item.step_index as usize,
                        dynamic_precedence: item.production.dynamic_precedence,
                        production_id: self.get_production_id(item),
                    }
                };

                let precedence = item.precedence();
                let associativity = item.associativity();
                for lookahead in lookaheads.iter() {
                    let table_entry = self.parse_table.states[state_id]
                        .terminal_entries
                        .entry(lookahead)
                        .or_insert_with(ParseTableEntry::new);
                    let reduction_info = reduction_infos.entry(lookahead).or_default();

                    // While inserting Reduce actions, eagerly resolve conflicts related
                    // to precedence: avoid inserting lower-precedence reductions, and
                    // clear the action list when inserting higher-precedence reductions.
                    if table_entry.actions.is_empty() {
                        table_entry.actions.push(action);
                    } else {
                        match Self::compare_precedence(
                            self.syntax_grammar,
                            precedence,
                            &[symbol],
                            &reduction_info.precedence,
                            &reduction_info.symbols,
                        ) {
                            Ordering::Greater => {
                                table_entry.actions.clear();
                                table_entry.actions.push(action);
                                lookaheads_with_conflicts.remove(&lookahead);
                                *reduction_info = ReductionInfo::default();
                            }
                            Ordering::Equal => {
                                table_entry.actions.push(action);
                                lookaheads_with_conflicts.insert(lookahead);
                            }
                            Ordering::Less => continue,
                        }
                    }

                    reduction_info.precedence.clone_from(precedence);
                    if let Err(i) = reduction_info.symbols.binary_search(&symbol) {
                        reduction_info.symbols.insert(i, symbol);
                    }
                    match associativity {
                        Some(Associativity::Left) => reduction_info.has_left_assoc = true,
                        Some(Associativity::Right) => reduction_info.has_right_assoc = true,
                        None => reduction_info.has_non_assoc = true,
                    }
                }
            }
        }

        preceding_auxiliary_symbols.dedup();

        // Having computed the successor item sets for each symbol, add a new
        // parse state for each of these item sets, and add a corresponding Shift
        // action to this state.
        for (symbol, next_item_set) in terminal_successors {
            preceding_symbols.push(symbol);
            let next_state_id = self.add_parse_state(
                &preceding_symbols,
                &preceding_auxiliary_symbols,
                next_item_set,
            );
            preceding_symbols.pop();

            let entry = self.parse_table.states[state_id]
                .terminal_entries
                .entry(symbol);
            if let Entry::Occupied(e) = &entry {
                if !e.get().actions.is_empty() {
                    lookaheads_with_conflicts.insert(symbol);
                }
            }

            entry
                .or_insert_with(ParseTableEntry::new)
                .actions
                .push(ParseAction::Shift {
                    state: next_state_id,
                    is_repetition: false,
                });
        }

        for (symbol, next_item_set) in non_terminal_successors {
            preceding_symbols.push(symbol);
            let next_state_id = self.add_parse_state(
                &preceding_symbols,
                &preceding_auxiliary_symbols,
                next_item_set,
            );
            preceding_symbols.pop();
            self.parse_table.states[state_id]
                .nonterminal_entries
                .insert(symbol, GotoAction::Goto(next_state_id));
        }

        // For any symbol with multiple actions, perform conflict resolution.
        // This will either
        // * choose one action over the others using precedence or associativity
        // * keep multiple actions if this conflict has been whitelisted in the grammar
        // * fail, terminating the parser generation process
        for symbol in lookaheads_with_conflicts.iter() {
            self.handle_conflict(
                item_set,
                state_id,
                &preceding_symbols,
                &preceding_auxiliary_symbols,
                symbol,
                reduction_infos.get(&symbol).unwrap(),
            )?;
        }

        // Add actions for the grammar's `extra` symbols.
        let state = &mut self.parse_table.states[state_id];
        let is_end_of_non_terminal_extra = state.is_end_of_non_terminal_extra();

        // If this state represents the end of a non-terminal extra rule, then make sure that
        // it doesn't have other successor states. Non-terminal extra rules must have
        // unambiguous endings.
        if is_end_of_non_terminal_extra {
            if state.terminal_entries.len() > 1 {
                let parent_symbols = item_set
                    .entries
                    .iter()
                    .filter_map(|ParseItemSetEntry { item, .. }| {
                        if !item.is_augmented() && item.step_index > 0 {
                            Some(item.variable_index)
                        } else {
                            None
                        }
                    })
                    .collect::<HashSet<_>>();
                let parent_symbol_names = parent_symbols
                    .iter()
                    .map(|&variable_index| {
                        self.syntax_grammar.variables[variable_index as usize]
                            .name
                            .clone()
                    })
                    .collect::<Vec<_>>();

                Err(AmbiguousExtraError {
                    parent_symbols: parent_symbol_names,
                })?;
            }
        }
        // Add actions for the start tokens of each non-terminal extra rule.
        else {
            for (terminal, state_id) in &self.non_terminal_extra_states {
                state
                    .terminal_entries
                    .entry(*terminal)
                    .or_insert(ParseTableEntry {
                        reusable: true,
                        actions: vec![ParseAction::Shift {
                            state: *state_id,
                            is_repetition: false,
                        }],
                    });
            }

            // Add ShiftExtra actions for the terminal extra tokens. These actions
            // are added to every state except for those at the ends of non-terminal
            // extras.
            for extra_token in &self.syntax_grammar.extra_symbols {
                if extra_token.is_non_terminal() {
                    state
                        .nonterminal_entries
                        .insert(*extra_token, GotoAction::ShiftExtra);
                } else {
                    state
                        .terminal_entries
                        .entry(*extra_token)
                        .or_insert(ParseTableEntry {
                            reusable: true,
                            actions: vec![ParseAction::ShiftExtra],
                        });
                }
            }
        }

        if let Some(keyword_capture_token) = self.syntax_grammar.word_token {
            let reserved_word_set_id = item_set
                .entries
                .iter()
                .filter_map(|entry| {
                    if let Some(next_step) = entry.item.step() {
                        if next_step.symbol == keyword_capture_token {
                            Some(next_step.reserved_word_set_id)
                        } else {
                            None
                        }
                    } else if entry.lookaheads.contains(&keyword_capture_token) {
                        Some(entry.following_reserved_word_set)
                    } else {
                        None
                    }
                })
                .max();
            if let Some(reserved_word_set_id) = reserved_word_set_id {
                state.reserved_words =
                    self.syntax_grammar.reserved_word_sets[reserved_word_set_id.0].clone();
            }
        }

        Ok(())
    }

    fn handle_conflict(
        &mut self,
        item_set: &ParseItemSet,
        state_id: ParseStateId,
        preceding_symbols: &SymbolSequence,
        preceding_auxiliary_symbols: &[AuxiliarySymbolInfo],
        conflicting_lookahead: Symbol,
        reduction_info: &ReductionInfo,
    ) -> BuildTableResult<()> {
        let entry = self.parse_table.states[state_id]
            .terminal_entries
            .get_mut(&conflicting_lookahead)
            .unwrap();

        // Determine which items in the set conflict with each other, and the
        // precedences associated with SHIFT vs REDUCE actions. There won't
        // be multiple REDUCE actions with different precedences; that is
        // sorted out ahead of time in `add_actions`. But there can still be
        // REDUCE-REDUCE conflicts where all actions have the *same*
        // precedence, and there can still be SHIFT/REDUCE conflicts.
        let mut considered_associativity = false;
        let mut shift_precedence = Vec::<(&Precedence, Symbol)>::new();
        let mut conflicting_items = BTreeSet::new();
        for ParseItemSetEntry {
            item, lookaheads, ..
        } in &item_set.entries
        {
            if let Some(step) = item.step() {
                if item.step_index > 0
                    && self
                        .item_set_builder
                        .first_set(&step.symbol)
                        .contains(&conflicting_lookahead)
                {
                    if item.variable_index != u32::MAX {
                        conflicting_items.insert(item);
                    }

                    let p = (
                        item.precedence(),
                        Symbol::non_terminal(item.variable_index as usize),
                    );
                    if let Err(i) = shift_precedence.binary_search(&p) {
                        shift_precedence.insert(i, p);
                    }
                }
            } else if lookaheads.contains(&conflicting_lookahead) && item.variable_index != u32::MAX
            {
                conflicting_items.insert(item);
            }
        }

        if let ParseAction::Shift { is_repetition, .. } = entry.actions.last_mut().unwrap() {
            // If all of the items in the conflict have the same parent symbol,
            // and that parent symbols is auxiliary, then this is just the intentional
            // ambiguity associated with a repeat rule. Resolve that class of ambiguity
            // by leaving it in the parse table, but marking the SHIFT action with
            // an `is_repetition` flag.
            let conflicting_variable_index =
                conflicting_items.iter().next().unwrap().variable_index;
            if self.syntax_grammar.variables[conflicting_variable_index as usize].is_auxiliary()
                && conflicting_items
                    .iter()
                    .all(|item| item.variable_index == conflicting_variable_index)
            {
                *is_repetition = true;
                return Ok(());
            }

            // If the SHIFT action has higher precedence, remove all the REDUCE actions.
            let mut shift_is_less = false;
            let mut shift_is_more = false;
            for p in shift_precedence {
                match Self::compare_precedence(
                    self.syntax_grammar,
                    p.0,
                    &[p.1],
                    &reduction_info.precedence,
                    &reduction_info.symbols,
                ) {
                    Ordering::Greater => shift_is_more = true,
                    Ordering::Less => shift_is_less = true,
                    Ordering::Equal => {}
                }
            }

            if shift_is_more && !shift_is_less {
                entry.actions.drain(0..entry.actions.len() - 1);
            }
            // If the REDUCE actions have higher precedence, remove the SHIFT action.
            else if shift_is_less && !shift_is_more {
                entry.actions.pop();
                conflicting_items.retain(|item| item.is_done());
            }
            // If the SHIFT and REDUCE actions have the same predence, consider
            // the REDUCE actions' associativity.
            else if !shift_is_less && !shift_is_more {
                considered_associativity = true;

                // If all Reduce actions are left associative, remove the SHIFT action.
                // If all Reduce actions are right associative, remove the REDUCE actions.
                match (
                    reduction_info.has_left_assoc,
                    reduction_info.has_non_assoc,
                    reduction_info.has_right_assoc,
                ) {
                    (true, false, false) => {
                        entry.actions.pop();
                        conflicting_items.retain(|item| item.is_done());
                    }
                    (false, false, true) => {
                        entry.actions.drain(0..entry.actions.len() - 1);
                    }
                    _ => {}
                }
            }
        }

        // If all of the actions but one have been eliminated, then there's no problem.
        let entry = self.parse_table.states[state_id]
            .terminal_entries
            .get_mut(&conflicting_lookahead)
            .unwrap();
        if entry.actions.len() == 1 {
            return Ok(());
        }

        // Determine the set of parent symbols involved in this conflict.
        let mut actual_conflict = Vec::new();
        for item in &conflicting_items {
            let symbol = Symbol::non_terminal(item.variable_index as usize);
            if self.syntax_grammar.variables[symbol.index].is_auxiliary() {
                actual_conflict.extend(
                    preceding_auxiliary_symbols
                        .iter()
                        .rev()
                        .find_map(|info| {
                            if info.auxiliary_symbol == symbol {
                                Some(&info.parent_symbols)
                            } else {
                                None
                            }
                        })
                        .unwrap()
                        .iter(),
                );
            } else {
                actual_conflict.push(symbol);
            }
        }
        actual_conflict.sort_unstable();
        actual_conflict.dedup();

        // If this set of symbols has been whitelisted, then there's no error.
        if self
            .syntax_grammar
            .expected_conflicts
            .contains(&actual_conflict)
        {
            self.actual_conflicts.remove(&actual_conflict);
            return Ok(());
        }

        let mut conflict_error = ConflictError::default();
        for symbol in preceding_symbols {
            conflict_error
                .symbol_sequence
                .push(self.symbol_name(symbol).to_string());
        }
        conflict_error.conflicting_lookahead = self.symbol_name(&conflicting_lookahead).to_string();

        let interpretations = conflicting_items
            .iter()
            .map(|item| {
                let preceding_symbols = preceding_symbols
                    .iter()
                    .take(preceding_symbols.len() - item.step_index as usize)
                    .map(|symbol| self.symbol_name(symbol).to_string())
                    .collect::<Vec<_>>();

                let variable_name = self.syntax_grammar.variables[item.variable_index as usize]
                    .name
                    .clone();

                let production_step_symbols = item
                    .production
                    .steps
                    .iter()
                    .map(|step| self.symbol_name(&step.symbol).to_string())
                    .collect::<Vec<_>>();

                let precedence = match item.precedence() {
                    Precedence::None => None,
                    _ => Some(item.precedence().to_string()),
                };

                let associativity = item.associativity().map(|assoc| format!("{assoc:?}"));

                Interpretation {
                    preceding_symbols,
                    variable_name,
                    production_step_symbols,
                    step_index: item.step_index,
                    done: item.is_done(),
                    conflicting_lookahead: self.symbol_name(&conflicting_lookahead).to_string(),
                    precedence,
                    associativity,
                }
            })
            .collect::<Vec<_>>();
        conflict_error.possible_interpretations = interpretations;

        let mut shift_items = Vec::new();
        let mut reduce_items = Vec::new();
        for item in conflicting_items {
            if item.is_done() {
                reduce_items.push(item);
            } else {
                shift_items.push(item);
            }
        }
        shift_items.sort_unstable();
        reduce_items.sort_unstable();

        let get_rule_names = |items: &[&ParseItem]| -> Vec<String> {
            let mut last_rule_id = None;
            let mut result = Vec::with_capacity(items.len());
            for item in items {
                if last_rule_id == Some(item.variable_index) {
                    continue;
                }
                last_rule_id = Some(item.variable_index);
                result.push(self.symbol_name(&Symbol::non_terminal(item.variable_index as usize)));
            }

            result
        };

        if actual_conflict.len() > 1 {
            if !shift_items.is_empty() {
                let names = get_rule_names(&shift_items);
                conflict_error
                    .possible_resolutions
                    .push(Resolution::Precedence { symbols: names });
            }

            for item in &reduce_items {
                let name = self.symbol_name(&Symbol::non_terminal(item.variable_index as usize));
                conflict_error
                    .possible_resolutions
                    .push(Resolution::Precedence {
                        symbols: vec![name],
                    });
            }
        }

        if considered_associativity {
            let names = get_rule_names(&reduce_items);
            conflict_error
                .possible_resolutions
                .push(Resolution::Associativity { symbols: names });
        }

        conflict_error
            .possible_resolutions
            .push(Resolution::AddConflict {
                symbols: actual_conflict
                    .iter()
                    .map(|s| self.symbol_name(s))
                    .collect(),
            });

        self.actual_conflicts.insert(actual_conflict);

        Err(conflict_error)?
    }

    fn compare_precedence(
        grammar: &SyntaxGrammar,
        left: &Precedence,
        left_symbols: &[Symbol],
        right: &Precedence,
        right_symbols: &[Symbol],
    ) -> Ordering {
        let precedence_entry_matches =
            |entry: &PrecedenceEntry, precedence: &Precedence, symbols: &[Symbol]| -> bool {
                match entry {
                    PrecedenceEntry::Name(n) => {
                        if let Precedence::Name(p) = precedence {
                            n == p
                        } else {
                            false
                        }
                    }
                    PrecedenceEntry::Symbol(n) => symbols
                        .iter()
                        .any(|s| &grammar.variables[s.index].name == n),
                }
            };

        match (left, right) {
            // Integer precedences can be compared to other integer precedences,
            // and to the default precedence, which is zero.
            (Precedence::Integer(l), Precedence::Integer(r)) if *l != 0 || *r != 0 => l.cmp(r),
            (Precedence::Integer(l), Precedence::None) if *l != 0 => l.cmp(&0),
            (Precedence::None, Precedence::Integer(r)) if *r != 0 => 0.cmp(r),

            // Named precedences can be compared to other named precedences.
            _ => grammar
                .precedence_orderings
                .iter()
                .find_map(|list| {
                    let mut saw_left = false;
                    let mut saw_right = false;
                    for entry in list {
                        let matches_left = precedence_entry_matches(entry, left, left_symbols);
                        let matches_right = precedence_entry_matches(entry, right, right_symbols);
                        if matches_left {
                            saw_left = true;
                            if saw_right {
                                return Some(Ordering::Less);
                            }
                        } else if matches_right {
                            saw_right = true;
                            if saw_left {
                                return Some(Ordering::Greater);
                            }
                        }
                    }
                    None
                })
                .unwrap_or(Ordering::Equal),
        }
    }

    fn get_auxiliary_node_info(
        &self,
        item_set: &ParseItemSet,
        symbol: Symbol,
    ) -> AuxiliarySymbolInfo {
        let parent_symbols = item_set
            .entries
            .iter()
            .filter_map(|ParseItemSetEntry { item, .. }| {
                let variable_index = item.variable_index as usize;
                if item.symbol() == Some(symbol)
                    && !self.syntax_grammar.variables[variable_index].is_auxiliary()
                {
                    Some(Symbol::non_terminal(variable_index))
                } else {
                    None
                }
            })
            .collect();
        AuxiliarySymbolInfo {
            auxiliary_symbol: symbol,
            parent_symbols,
        }
    }

    fn get_production_id(&mut self, item: &ParseItem) -> ProductionInfoId {
        let mut production_info = ProductionInfo {
            alias_sequence: Vec::new(),
            field_map: BTreeMap::new(),
        };

        for (i, step) in item.production.steps.iter().enumerate() {
            production_info.alias_sequence.push(step.alias.clone());
            if let Some(field_name) = &step.field_name {
                production_info
                    .field_map
                    .entry(field_name.clone())
                    .or_default()
                    .push(FieldLocation {
                        index: i,
                        inherited: false,
                    });
            }

            if step.symbol.kind == SymbolType::NonTerminal
                && !self.syntax_grammar.variables[step.symbol.index]
                    .kind
                    .is_visible()
            {
                let info = &self.variable_info[step.symbol.index];
                for field_name in info.fields.keys() {
                    production_info
                        .field_map
                        .entry(field_name.clone())
                        .or_default()
                        .push(FieldLocation {
                            index: i,
                            inherited: true,
                        });
                }
            }
        }

        while production_info.alias_sequence.last() == Some(&None) {
            production_info.alias_sequence.pop();
        }

        if item.production.steps.len() > self.parse_table.max_aliased_production_length {
            self.parse_table.max_aliased_production_length = item.production.steps.len();
        }

        if let Some(index) = self
            .parse_table
            .production_infos
            .iter()
            .position(|seq| *seq == production_info)
        {
            index
        } else {
            self.parse_table.production_infos.push(production_info);
            self.parse_table.production_infos.len() - 1
        }
    }

    fn symbol_name(&self, symbol: &Symbol) -> String {
        match symbol.kind {
            SymbolType::End | SymbolType::EndOfNonTerminalExtra => "EOF".to_string(),
            SymbolType::External => self.syntax_grammar.external_tokens[symbol.index]
                .name
                .clone(),
            SymbolType::NonTerminal => self.syntax_grammar.variables[symbol.index].name.clone(),
            SymbolType::Terminal => {
                let variable = &self.lexical_grammar.variables[symbol.index];
                if variable.kind == VariableType::Named {
                    variable.name.clone()
                } else {
                    format!("'{}'", variable.name)
                }
            }
        }
    }
}

pub fn build_parse_table<'a>(
    syntax_grammar: &'a SyntaxGrammar,
    lexical_grammar: &'a LexicalGrammar,
    item_set_builder: ParseItemSetBuilder<'a>,
    variable_info: &'a [VariableInfo],
) -> BuildTableResult<(ParseTable, Vec<ParseStateInfo<'a>>)> {
    ParseTableBuilder::new(
        syntax_grammar,
        lexical_grammar,
        item_set_builder,
        variable_info,
    )
    .build()
}



================================================
FILE: crates/generate/src/build_tables/coincident_tokens.rs
================================================
use std::fmt;

use crate::{
    grammars::LexicalGrammar,
    rules::Symbol,
    tables::{ParseStateId, ParseTable},
};

pub struct CoincidentTokenIndex<'a> {
    entries: Vec<Vec<ParseStateId>>,
    grammar: &'a LexicalGrammar,
    n: usize,
}

impl<'a> CoincidentTokenIndex<'a> {
    pub fn new(table: &ParseTable, lexical_grammar: &'a LexicalGrammar) -> Self {
        let n = lexical_grammar.variables.len();
        let mut result = Self {
            n,
            grammar: lexical_grammar,
            entries: vec![Vec::new(); n * n],
        };
        for (i, state) in table.states.iter().enumerate() {
            for symbol in state.terminal_entries.keys() {
                if symbol.is_terminal() {
                    for other_symbol in state.terminal_entries.keys() {
                        if other_symbol.is_terminal() {
                            let index = result.index(symbol.index, other_symbol.index);
                            if result.entries[index].last().copied() != Some(i) {
                                result.entries[index].push(i);
                            }
                        }
                    }
                }
            }
        }
        result
    }

    pub fn states_with(&self, a: Symbol, b: Symbol) -> &[ParseStateId] {
        &self.entries[self.index(a.index, b.index)]
    }

    pub fn contains(&self, a: Symbol, b: Symbol) -> bool {
        !self.entries[self.index(a.index, b.index)].is_empty()
    }

    #[must_use]
    const fn index(&self, a: usize, b: usize) -> usize {
        if a < b {
            a * self.n + b
        } else {
            b * self.n + a
        }
    }
}

impl fmt::Debug for CoincidentTokenIndex<'_> {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        writeln!(f, "CoincidentTokenIndex {{")?;

        writeln!(f, "  entries: {{")?;
        for i in 0..self.n {
            writeln!(f, "    {}: {{", self.grammar.variables[i].name)?;
            for j in 0..self.n {
                writeln!(
                    f,
                    "      {}: {:?},",
                    self.grammar.variables[j].name,
                    self.entries[self.index(i, j)].len()
                )?;
            }
            writeln!(f, "    }},")?;
        }
        write!(f, "  }},")?;
        write!(f, "}}")?;
        Ok(())
    }
}



================================================
FILE: crates/generate/src/build_tables/item.rs
================================================
use std::{
    cmp::Ordering,
    fmt,
    hash::{Hash, Hasher},
    sync::LazyLock,
};

use crate::{
    grammars::{
        LexicalGrammar, Production, ProductionStep, ReservedWordSetId, SyntaxGrammar,
        NO_RESERVED_WORDS,
    },
    rules::{Associativity, Precedence, Symbol, SymbolType, TokenSet},
};

static START_PRODUCTION: LazyLock<Production> = LazyLock::new(|| Production {
    dynamic_precedence: 0,
    steps: vec![ProductionStep {
        symbol: Symbol {
            index: 0,
            kind: SymbolType::NonTerminal,
        },
        precedence: Precedence::None,
        associativity: None,
        alias: None,
        field_name: None,
        reserved_word_set_id: NO_RESERVED_WORDS,
    }],
});

/// A [`ParseItem`] represents an in-progress match of a single production in a grammar.
#[derive(Clone, Copy, Debug)]
pub struct ParseItem<'a> {
    /// The index of the parent rule within the grammar.
    pub variable_index: u32,
    /// The number of symbols that have already been matched.
    pub step_index: u32,
    /// The production being matched.
    pub production: &'a Production,
    /// A boolean indicating whether any of the already-matched children were
    /// hidden nodes and had fields. Ordinarily, a parse item's behavior is not
    /// affected by the symbols of its preceding children; it only needs to
    /// keep track of their fields and aliases.
    ///
    /// Take for example these two items:
    ///   X -> a b • c
    ///   X -> a g • c
    ///
    /// They can be considered equivalent, for the purposes of parse table
    /// generation, because they entail the same actions. But if this flag is
    /// true, then the item's set of inherited fields may depend on the specific
    /// symbols of its preceding children.
    pub has_preceding_inherited_fields: bool,
}

/// A [`ParseItemSet`] represents a set of in-progress matches of productions in a
/// grammar, and for each in-progress match, a set of "lookaheads" - tokens that
/// are allowed to *follow* the in-progress rule. This object corresponds directly
/// to a state in the final parse table.
#[derive(Clone, Debug, PartialEq, Eq, Default)]
pub struct ParseItemSet<'a> {
    pub entries: Vec<ParseItemSetEntry<'a>>,
}

#[derive(Clone, Debug, PartialEq, Eq)]
pub struct ParseItemSetEntry<'a> {
    pub item: ParseItem<'a>,
    pub lookaheads: TokenSet,
    pub following_reserved_word_set: ReservedWordSetId,
}

/// A [`ParseItemSetCore`] is like a [`ParseItemSet`], but without the lookahead
/// information. Parse states with the same core are candidates for merging.
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct ParseItemSetCore<'a> {
    pub entries: Vec<ParseItem<'a>>,
}

pub struct ParseItemDisplay<'a>(
    pub &'a ParseItem<'a>,
    pub &'a SyntaxGrammar,
    pub &'a LexicalGrammar,
);

pub struct TokenSetDisplay<'a>(
    pub &'a TokenSet,
    pub &'a SyntaxGrammar,
    pub &'a LexicalGrammar,
);

pub struct ParseItemSetDisplay<'a>(
    pub &'a ParseItemSet<'a>,
    pub &'a SyntaxGrammar,
    pub &'a LexicalGrammar,
);

impl<'a> ParseItem<'a> {
    pub fn start() -> Self {
        ParseItem {
            variable_index: u32::MAX,
            production: &START_PRODUCTION,
            step_index: 0,
            has_preceding_inherited_fields: false,
        }
    }

    pub fn step(&self) -> Option<&'a ProductionStep> {
        self.production.steps.get(self.step_index as usize)
    }

    pub fn symbol(&self) -> Option<Symbol> {
        self.step().map(|step| step.symbol)
    }

    pub fn associativity(&self) -> Option<Associativity> {
        self.prev_step().and_then(|step| step.associativity)
    }

    pub fn precedence(&self) -> &Precedence {
        self.prev_step()
            .map_or(&Precedence::None, |step| &step.precedence)
    }

    pub fn prev_step(&self) -> Option<&'a ProductionStep> {
        if self.step_index > 0 {
            Some(&self.production.steps[self.step_index as usize - 1])
        } else {
            None
        }
    }

    #[must_use]
    pub fn is_done(&self) -> bool {
        self.step_index as usize == self.production.steps.len()
    }

    #[must_use]
    pub const fn is_augmented(&self) -> bool {
        self.variable_index == u32::MAX
    }

    /// Create an item like this one, but advanced by one step.
    #[must_use]
    pub const fn successor(&self) -> Self {
        ParseItem {
            variable_index: self.variable_index,
            production: self.production,
            step_index: self.step_index + 1,
            has_preceding_inherited_fields: self.has_preceding_inherited_fields,
        }
    }

    /// Create an item identical to this one, but with a different production.
    /// This is used when dynamically "inlining" certain symbols in a production.
    pub const fn substitute_production(&self, production: &'a Production) -> Self {
        let mut result = *self;
        result.production = production;
        result
    }
}

impl<'a> ParseItemSet<'a> {
    pub fn insert(&mut self, item: ParseItem<'a>) -> &mut ParseItemSetEntry<'a> {
        match self.entries.binary_search_by(|e| e.item.cmp(&item)) {
            Err(i) => {
                self.entries.insert(
                    i,
                    ParseItemSetEntry {
                        item,
                        lookaheads: TokenSet::new(),
                        following_reserved_word_set: ReservedWordSetId::default(),
                    },
                );
                &mut self.entries[i]
            }
            Ok(i) => &mut self.entries[i],
        }
    }

    pub fn core(&self) -> ParseItemSetCore<'a> {
        ParseItemSetCore {
            entries: self.entries.iter().map(|e| e.item).collect(),
        }
    }
}

impl fmt::Display for ParseItemDisplay<'_> {
    fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
        if self.0.is_augmented() {
            write!(f, "START →")?;
        } else {
            write!(
                f,
                "{} →",
                self.1.variables[self.0.variable_index as usize].name
            )?;
        }

        for (i, step) in self.0.production.steps.iter().enumerate() {
            if i == self.0.step_index as usize {
                write!(f, " •")?;
                if !step.precedence.is_none()
                    || step.associativity.is_some()
                    || step.reserved_word_set_id != ReservedWordSetId::default()
                {
                    write!(f, " (")?;
                    if step.precedence.is_none() {
                        write!(f, " {}", step.precedence)?;
                    }
                    if let Some(associativity) = step.associativity {
                        write!(f, " {associativity:?}")?;
                    }
                    if step.reserved_word_set_id != ReservedWordSetId::default() {
                        write!(f, "reserved: {}", step.reserved_word_set_id)?;
                    }
                    write!(f, " )")?;
                }
            }

            write!(f, " ")?;
            if step.symbol.is_terminal() {
                if let Some(variable) = self.2.variables.get(step.symbol.index) {
                    write!(f, "{}", variable.name)?;
                } else {
                    write!(f, "terminal-{}", step.symbol.index)?;
                }
            } else if step.symbol.is_external() {
                write!(f, "{}", self.1.external_tokens[step.symbol.index].name)?;
            } else {
                write!(f, "{}", self.1.variables[step.symbol.index].name)?;
            }

            if let Some(alias) = &step.alias {
                write!(f, "@{}", alias.value)?;
            }
        }

        if self.0.is_done() {
            write!(f, " •")?;
            if let Some(step) = self.0.production.steps.last() {
                if let Some(associativity) = step.associativity {
                    if step.precedence.is_none() {
                        write!(f, " ({associativity:?})")?;
                    } else {
                        write!(f, " ({} {associativity:?})", step.precedence)?;
                    }
                } else if !step.precedence.is_none() {
                    write!(f, " ({})", step.precedence)?;
                }
            }
        }

        Ok(())
    }
}

const fn escape_invisible(c: char) -> Option<&'static str> {
    Some(match c {
        '\n' => "\\n",
        '\r' => "\\r",
        '\t' => "\\t",
        '\0' => "\\0",
        '\\' => "\\\\",
        '\x0b' => "\\v",
        '\x0c' => "\\f",
        _ => return None,
    })
}

fn display_variable_name(source: &str) -> String {
    source
        .chars()
        .fold(String::with_capacity(source.len()), |mut acc, c| {
            if let Some(esc) = escape_invisible(c) {
                acc.push_str(esc);
            } else {
                acc.push(c);
            }
            acc
        })
}

impl fmt::Display for TokenSetDisplay<'_> {
    fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
        write!(f, "[")?;
        for (i, symbol) in self.0.iter().enumerate() {
            if i > 0 {
                write!(f, ", ")?;
            }

            if symbol.is_terminal() {
                if let Some(variable) = self.2.variables.get(symbol.index) {
                    write!(f, "{}", display_variable_name(&variable.name))?;
                } else {
                    write!(f, "terminal-{}", symbol.index)?;
                }
            } else if symbol.is_external() {
                write!(f, "{}", self.1.external_tokens[symbol.index].name)?;
            } else {
                write!(f, "{}", self.1.variables[symbol.index].name)?;
            }
        }
        write!(f, "]")?;
        Ok(())
    }
}

impl fmt::Display for ParseItemSetDisplay<'_> {
    fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
        for entry in &self.0.entries {
            write!(
                f,
                "{}\t{}",
                ParseItemDisplay(&entry.item, self.1, self.2),
                TokenSetDisplay(&entry.lookaheads, self.1, self.2),
            )?;
            if entry.following_reserved_word_set != ReservedWordSetId::default() {
                write!(
                    f,
                    "\treserved word set: {}",
                    entry.following_reserved_word_set
                )?;
            }
            writeln!(f)?;
        }
        Ok(())
    }
}

impl Hash for ParseItem<'_> {
    fn hash<H: Hasher>(&self, hasher: &mut H) {
        hasher.write_u32(self.variable_index);
        hasher.write_u32(self.step_index);
        hasher.write_i32(self.production.dynamic_precedence);
        hasher.write_usize(self.production.steps.len());
        hasher.write_i32(i32::from(self.has_preceding_inherited_fields));
        self.precedence().hash(hasher);
        self.associativity().hash(hasher);

        // The already-matched children don't play any role in the parse state for
        // this item, unless any of the following are true:
        //   * the children have fields
        //   * the children have aliases
        //   * the children are hidden and represent rules that have fields.
        // See the docs for `has_preceding_inherited_fields`.
        for step in &self.production.steps[0..self.step_index as usize] {
            step.alias.hash(hasher);
            step.field_name.hash(hasher);
            if self.has_preceding_inherited_fields {
                step.symbol.hash(hasher);
            }
        }
        for step in &self.production.steps[self.step_index as usize..] {
            step.hash(hasher);
        }
    }
}

impl PartialEq for ParseItem<'_> {
    fn eq(&self, other: &Self) -> bool {
        if self.variable_index != other.variable_index
            || self.step_index != other.step_index
            || self.production.dynamic_precedence != other.production.dynamic_precedence
            || self.production.steps.len() != other.production.steps.len()
            || self.precedence() != other.precedence()
            || self.associativity() != other.associativity()
            || self.has_preceding_inherited_fields != other.has_preceding_inherited_fields
        {
            return false;
        }

        for (i, step) in self.production.steps.iter().enumerate() {
            // See the previous comment (in the `Hash::hash` impl) regarding comparisons
            // of parse items' already-completed steps.
            if i < self.step_index as usize {
                if step.alias != other.production.steps[i].alias {
                    return false;
                }
                if step.field_name != other.production.steps[i].field_name {
                    return false;
                }
                if self.has_preceding_inherited_fields
                    && step.symbol != other.production.steps[i].symbol
                {
                    return false;
                }
            } else if *step != other.production.steps[i] {
                return false;
            }
        }

        true
    }
}

impl Ord for ParseItem<'_> {
    fn cmp(&self, other: &Self) -> Ordering {
        self.step_index
            .cmp(&other.step_index)
            .then_with(|| self.variable_index.cmp(&other.variable_index))
            .then_with(|| {
                self.production
                    .dynamic_precedence
                    .cmp(&other.production.dynamic_precedence)
            })
            .then_with(|| {
                self.production
                    .steps
                    .len()
                    .cmp(&other.production.steps.len())
            })
            .then_with(|| self.precedence().cmp(other.precedence()))
            .then_with(|| self.associativity().cmp(&other.associativity()))
            .then_with(|| {
                for (i, step) in self.production.steps.iter().enumerate() {
                    // See the previous comment (in the `Hash::hash` impl) regarding comparisons
                    // of parse items' already-completed steps.
                    let o = if i < self.step_index as usize {
                        step.alias
                            .cmp(&other.production.steps[i].alias)
                            .then_with(|| {
                                step.field_name.cmp(&other.production.steps[i].field_name)
                            })
                    } else {
                        step.cmp(&other.production.steps[i])
                    };
                    if o != Ordering::Equal {
                        return o;
                    }
                }
                Ordering::Equal
            })
    }
}

impl PartialOrd for ParseItem<'_> {
    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
        Some(self.cmp(other))
    }
}

impl Eq for ParseItem<'_> {}

impl Hash for ParseItemSet<'_> {
    fn hash<H: Hasher>(&self, hasher: &mut H) {
        hasher.write_usize(self.entries.len());
        for entry in &self.entries {
            entry.item.hash(hasher);
            entry.lookaheads.hash(hasher);
            entry.following_reserved_word_set.hash(hasher);
        }
    }
}

impl Hash for ParseItemSetCore<'_> {
    fn hash<H: Hasher>(&self, hasher: &mut H) {
        hasher.write_usize(self.entries.len());
        for item in &self.entries {
            item.hash(hasher);
        }
    }
}



================================================
FILE: crates/generate/src/build_tables/item_set_builder.rs
================================================
use std::{
    collections::{HashMap, HashSet},
    fmt,
};

use super::item::{ParseItem, ParseItemDisplay, ParseItemSet, ParseItemSetEntry, TokenSetDisplay};
use crate::{
    grammars::{InlinedProductionMap, LexicalGrammar, ReservedWordSetId, SyntaxGrammar},
    rules::{Symbol, SymbolType, TokenSet},
};

#[derive(Clone, Debug, PartialEq, Eq)]
struct TransitiveClosureAddition<'a> {
    item: ParseItem<'a>,
    info: FollowSetInfo,
}

#[derive(Clone, Debug, Default, PartialEq, Eq)]
struct FollowSetInfo {
    lookaheads: TokenSet,
    reserved_lookaheads: ReservedWordSetId,
    propagates_lookaheads: bool,
}

pub struct ParseItemSetBuilder<'a> {
    syntax_grammar: &'a SyntaxGrammar,
    lexical_grammar: &'a LexicalGrammar,
    first_sets: HashMap<Symbol, TokenSet>,
    reserved_first_sets: HashMap<Symbol, ReservedWordSetId>,
    last_sets: HashMap<Symbol, TokenSet>,
    inlines: &'a InlinedProductionMap,
    transitive_closure_additions: Vec<Vec<TransitiveClosureAddition<'a>>>,
}

fn find_or_push<T: Eq>(vector: &mut Vec<T>, value: T) {
    if !vector.contains(&value) {
        vector.push(value);
    }
}

impl<'a> ParseItemSetBuilder<'a> {
    pub fn new(
        syntax_grammar: &'a SyntaxGrammar,
        lexical_grammar: &'a LexicalGrammar,
        inlines: &'a InlinedProductionMap,
    ) -> Self {
        let mut result = Self {
            syntax_grammar,
            lexical_grammar,
            first_sets: HashMap::new(),
            reserved_first_sets: HashMap::new(),
            last_sets: HashMap::new(),
            inlines,
            transitive_closure_additions: vec![Vec::new(); syntax_grammar.variables.len()],
        };

        // For each grammar symbol, populate the FIRST and LAST sets: the set of
        // terminals that appear at the beginning and end that symbol's productions,
        // respectively.
        // For a terminal symbol, the FIRST and LAST sets just consist of the
        // terminal itself.
        for i in 0..lexical_grammar.variables.len() {
            let symbol = Symbol::terminal(i);
            let mut set = TokenSet::new();
            set.insert(symbol);
            result.first_sets.insert(symbol, set.clone());
            result.last_sets.insert(symbol, set);
            result
                .reserved_first_sets
                .insert(symbol, ReservedWordSetId::default());
        }

        for i in 0..syntax_grammar.external_tokens.len() {
            let symbol = Symbol::external(i);
            let mut set = TokenSet::new();
            set.insert(symbol);
            result.first_sets.insert(symbol, set.clone());
            result.last_sets.insert(symbol, set);
            result
                .reserved_first_sets
                .insert(symbol, ReservedWordSetId::default());
        }

        // The FIRST set of a non-terminal `i` is the union of the FIRST sets
        // of all the symbols that appear at the beginnings of i's productions. Some
        // of these symbols may themselves be non-terminals, so this is a recursive
        // definition.
        //
        // Rather than computing these sets using recursion, we use an explicit stack
        // called `symbols_to_process`.
        let mut symbols_to_process = Vec::new();
        let mut processed_non_terminals = HashSet::new();
        for i in 0..syntax_grammar.variables.len() {
            let symbol = Symbol::non_terminal(i);
            let first_set = result.first_sets.entry(symbol).or_default();
            let reserved_first_set = result.reserved_first_sets.entry(symbol).or_default();

            processed_non_terminals.clear();
            symbols_to_process.clear();
            symbols_to_process.push(symbol);
            while let Some(sym) = symbols_to_process.pop() {
                for production in &syntax_grammar.variables[sym.index].productions {
                    if let Some(step) = production.steps.first() {
                        if step.symbol.is_terminal() || step.symbol.is_external() {
                            first_set.insert(step.symbol);
                        } else if processed_non_terminals.insert(step.symbol) {
                            symbols_to_process.push(step.symbol);
                        }
                        *reserved_first_set = (*reserved_first_set).max(step.reserved_word_set_id);
                    }
                }
            }

            // The LAST set is defined in a similar way to the FIRST set.
            let last_set = result.last_sets.entry(symbol).or_default();
            processed_non_terminals.clear();
            symbols_to_process.clear();
            symbols_to_process.push(symbol);
            while let Some(sym) = symbols_to_process.pop() {
                for production in &syntax_grammar.variables[sym.index].productions {
                    if let Some(step) = production.steps.last() {
                        if step.symbol.is_terminal() || step.symbol.is_external() {
                            last_set.insert(step.symbol);
                        } else if processed_non_terminals.insert(step.symbol) {
                            symbols_to_process.push(step.symbol);
                        }
                    }
                }
            }
        }

        // To compute an item set's transitive closure, we find each item in the set
        // whose next symbol is a non-terminal, and we add new items to the set for
        // each of that symbol's productions. These productions might themselves begin
        // with non-terminals, so the process continues recursively. In this process,
        // the total set of entries that get added depends only on two things:
        //
        //   * the non-terminal symbol that occurs next in each item
        //
        //   * the set of terminals that can follow that non-terminal symbol in the item
        //
        // So we can avoid a lot of duplicated recursive work by precomputing, for each
        // non-terminal symbol `i`, a final list of *additions* that must be made to an
        // item set when symbol `i` occurs as the next symbol in one if its core items.
        // The structure of a precomputed *addition* is as follows:
        //
        //   * `item` - the new item that must be added as part of the expansion of the symbol `i`.
        //
        //   * `lookaheads` - the set of possible lookahead tokens that can always come after `item`
        //     in an expansion of symbol `i`.
        //
        //   * `reserved_lookaheads` - the set of reserved lookahead lookahead tokens that can
        //     always come after `item` in the expansion of symbol `i`.
        //
        //   * `propagates_lookaheads` - a boolean indicating whether or not `item` can occur at the
        //     *end* of the expansion of symbol `i`, so that i's own current lookahead tokens can
        //     occur after `item`.
        //
        // Rather than computing these additions recursively, we use an explicit stack.
        let empty_lookaheads = TokenSet::new();
        let mut stack = Vec::new();
        let mut follow_set_info_by_non_terminal = HashMap::<usize, FollowSetInfo>::new();
        for i in 0..syntax_grammar.variables.len() {
            // First, build up a map whose keys are all of the non-terminals that can
            // appear at the beginning of non-terminal `i`, and whose values store
            // information about the tokens that can follow those non-terminals.
            stack.clear();
            stack.push((i, &empty_lookaheads, ReservedWordSetId::default(), true));
            follow_set_info_by_non_terminal.clear();
            while let Some((sym_ix, lookaheads, reserved_word_set_id, propagates_lookaheads)) =
                stack.pop()
            {
                let mut did_add = false;
                let info = follow_set_info_by_non_terminal.entry(sym_ix).or_default();
                did_add |= info.lookaheads.insert_all(lookaheads);
                if reserved_word_set_id > info.reserved_lookaheads {
                    info.reserved_lookaheads = reserved_word_set_id;
                    did_add = true;
                }
                did_add |= propagates_lookaheads && !info.propagates_lookaheads;
                info.propagates_lookaheads |= propagates_lookaheads;
                if !did_add {
                    continue;
                }

                for production in &syntax_grammar.variables[sym_ix].productions {
                    if let Some(symbol) = production.first_symbol() {
                        if symbol.is_non_terminal() {
                            if let Some(next_step) = production.steps.get(1) {
                                stack.push((
                                    symbol.index,
                                    &result.first_sets[&next_step.symbol],
                                    result.reserved_first_sets[&next_step.symbol],
                                    false,
                                ));
                            } else {
                                stack.push((
                                    symbol.index,
                                    lookaheads,
                                    reserved_word_set_id,
                                    propagates_lookaheads,
                                ));
                            }
                        }
                    }
                }
            }

            // Store all of those non-terminals' productions, along with their associated
            // lookahead info, as *additions* associated with non-terminal `i`.
            let additions_for_non_terminal = &mut result.transitive_closure_additions[i];
            for (&variable_index, follow_set_info) in &follow_set_info_by_non_terminal {
                let variable = &syntax_grammar.variables[variable_index];
                let non_terminal = Symbol::non_terminal(variable_index);
                let variable_index = variable_index as u32;
                if syntax_grammar.variables_to_inline.contains(&non_terminal) {
                    continue;
                }
                for production in &variable.productions {
                    let item = ParseItem {
                        variable_index,
                        production,
                        step_index: 0,
                        has_preceding_inherited_fields: false,
                    };

                    if let Some(inlined_productions) =
                        inlines.inlined_productions(item.production, item.step_index)
                    {
                        for production in inlined_productions {
                            find_or_push(
                                additions_for_non_terminal,
                                TransitiveClosureAddition {
                                    item: item.substitute_production(production),
                                    info: follow_set_info.clone(),
                                },
                            );
                        }
                    } else {
                        find_or_push(
                            additions_for_non_terminal,
                            TransitiveClosureAddition {
                                item,
                                info: follow_set_info.clone(),
                            },
                        );
                    }
                }
            }
        }

        result
    }

    pub fn transitive_closure(&self, item_set: &ParseItemSet<'a>) -> ParseItemSet<'a> {
        let mut result = ParseItemSet::default();
        for entry in &item_set.entries {
            if let Some(productions) = self
                .inlines
                .inlined_productions(entry.item.production, entry.item.step_index)
            {
                for production in productions {
                    self.add_item(
                        &mut result,
                        &ParseItemSetEntry {
                            item: entry.item.substitute_production(production),
                            lookaheads: entry.lookaheads.clone(),
                            following_reserved_word_set: entry.following_reserved_word_set,
                        },
                    );
                }
            } else {
                self.add_item(&mut result, entry);
            }
        }
        result
    }

    pub fn first_set(&self, symbol: &Symbol) -> &TokenSet {
        &self.first_sets[symbol]
    }

    pub fn reserved_first_set(&self, symbol: &Symbol) -> Option<&TokenSet> {
        let id = *self.reserved_first_sets.get(symbol)?;
        Some(&self.syntax_grammar.reserved_word_sets[id.0])
    }

    pub fn last_set(&self, symbol: &Symbol) -> &TokenSet {
        &self.last_sets[symbol]
    }

    fn add_item(&self, set: &mut ParseItemSet<'a>, entry: &ParseItemSetEntry<'a>) {
        if let Some(step) = entry.item.step() {
            if step.symbol.is_non_terminal() {
                let next_step = entry.item.successor().step();

                // Determine which tokens can follow this non-terminal.
                let (following_tokens, following_reserved_tokens) =
                    if let Some(next_step) = next_step {
                        (
                            self.first_sets.get(&next_step.symbol).unwrap(),
                            *self.reserved_first_sets.get(&next_step.symbol).unwrap(),
                        )
                    } else {
                        (&entry.lookaheads, entry.following_reserved_word_set)
                    };

                // Use the pre-computed *additions* to expand the non-terminal.
                for addition in &self.transitive_closure_additions[step.symbol.index] {
                    let entry = set.insert(addition.item);
                    entry.lookaheads.insert_all(&addition.info.lookaheads);

                    if let Some(word_token) = self.syntax_grammar.word_token {
                        if addition.info.lookaheads.contains(&word_token) {
                            entry.following_reserved_word_set = entry
                                .following_reserved_word_set
                                .max(addition.info.reserved_lookaheads);
                        }
                    }

                    if addition.info.propagates_lookaheads {
                        entry.lookaheads.insert_all(following_tokens);

                        if let Some(word_token) = self.syntax_grammar.word_token {
                            if following_tokens.contains(&word_token) {
                                entry.following_reserved_word_set = entry
                                    .following_reserved_word_set
                                    .max(following_reserved_tokens);
                            }
                        }
                    }
                }
            }
        }

        let e = set.insert(entry.item);
        e.lookaheads.insert_all(&entry.lookaheads);
        e.following_reserved_word_set = e
            .following_reserved_word_set
            .max(entry.following_reserved_word_set);
    }
}

impl fmt::Debug for ParseItemSetBuilder<'_> {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        writeln!(f, "ParseItemSetBuilder {{")?;

        writeln!(f, "  first_sets: {{")?;
        for (symbol, first_set) in &self.first_sets {
            let name = match symbol.kind {
                SymbolType::NonTerminal => &self.syntax_grammar.variables[symbol.index].name,
                SymbolType::External => &self.syntax_grammar.external_tokens[symbol.index].name,
                SymbolType::Terminal => &self.lexical_grammar.variables[symbol.index].name,
                SymbolType::End | SymbolType::EndOfNonTerminalExtra => "END",
            };
            writeln!(
                f,
                "    first({name:?}): {}",
                TokenSetDisplay(first_set, self.syntax_grammar, self.lexical_grammar)
            )?;
        }
        writeln!(f, "  }}")?;

        writeln!(f, "  last_sets: {{")?;
        for (symbol, last_set) in &self.last_sets {
            let name = match symbol.kind {
                SymbolType::NonTerminal => &self.syntax_grammar.variables[symbol.index].name,
                SymbolType::External => &self.syntax_grammar.external_tokens[symbol.index].name,
                SymbolType::Terminal => &self.lexical_grammar.variables[symbol.index].name,
                SymbolType::End | SymbolType::EndOfNonTerminalExtra => "END",
            };
            writeln!(
                f,
                "    last({name:?}): {}",
                TokenSetDisplay(last_set, self.syntax_grammar, self.lexical_grammar)
            )?;
        }
        writeln!(f, "  }}")?;

        writeln!(f, "  additions: {{")?;
        for (i, variable) in self.syntax_grammar.variables.iter().enumerate() {
            writeln!(f, "    {}: {{", variable.name)?;
            for addition in &self.transitive_closure_additions[i] {
                writeln!(
                    f,
                    "      {}",
                    ParseItemDisplay(&addition.item, self.syntax_grammar, self.lexical_grammar)
                )?;
            }
            writeln!(f, "    }},")?;
        }
        write!(f, "  }},")?;

        write!(f, "}}")?;
        Ok(())
    }
}



================================================
FILE: crates/generate/src/build_tables/minimize_parse_table.rs
================================================
use std::{
    collections::{HashMap, HashSet},
    mem,
};

use log::info;

use super::token_conflicts::TokenConflictMap;
use crate::{
    dedup::split_state_id_groups,
    grammars::{LexicalGrammar, SyntaxGrammar, VariableType},
    rules::{AliasMap, Symbol, TokenSet},
    tables::{GotoAction, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry},
};

pub fn minimize_parse_table(
    parse_table: &mut ParseTable,
    syntax_grammar: &SyntaxGrammar,
    lexical_grammar: &LexicalGrammar,
    simple_aliases: &AliasMap,
    token_conflict_map: &TokenConflictMap,
    keywords: &TokenSet,
) {
    let mut minimizer = Minimizer {
        parse_table,
        syntax_grammar,
        lexical_grammar,
        token_conflict_map,
        keywords,
        simple_aliases,
    };
    minimizer.merge_compatible_states();
    minimizer.remove_unit_reductions();
    minimizer.remove_unused_states();
    minimizer.reorder_states_by_descending_size();
}

struct Minimizer<'a> {
    parse_table: &'a mut ParseTable,
    syntax_grammar: &'a SyntaxGrammar,
    lexical_grammar: &'a LexicalGrammar,
    token_conflict_map: &'a TokenConflictMap<'a>,
    keywords: &'a TokenSet,
    simple_aliases: &'a AliasMap,
}

impl Minimizer<'_> {
    fn remove_unit_reductions(&mut self) {
        let mut aliased_symbols = HashSet::new();
        for variable in &self.syntax_grammar.variables {
            for production in &variable.productions {
                for step in &production.steps {
                    if step.alias.is_some() {
                        aliased_symbols.insert(step.symbol);
                    }
                }
            }
        }

        let mut unit_reduction_symbols_by_state = HashMap::new();
        for (i, state) in self.parse_table.states.iter().enumerate() {
            let mut only_unit_reductions = true;
            let mut unit_reduction_symbol = None;
            for (_, entry) in &state.terminal_entries {
                for action in &entry.actions {
                    match action {
                        ParseAction::ShiftExtra => continue,
                        ParseAction::Reduce {
                            child_count: 1,
                            production_id: 0,
                            symbol,
                            ..
                        } if !self.simple_aliases.contains_key(symbol)
                            && !self.syntax_grammar.supertype_symbols.contains(symbol)
                            && !self.syntax_grammar.extra_symbols.contains(symbol)
                            && !aliased_symbols.contains(symbol)
                            && self.syntax_grammar.variables[symbol.index].kind
                                != VariableType::Named
                            && (unit_reduction_symbol.is_none()
                                || unit_reduction_symbol == Some(symbol)) =>
                        {
                            unit_reduction_symbol = Some(symbol);
                            continue;
                        }
                        _ => {}
                    }
                    only_unit_reductions = false;
                    break;
                }

                if !only_unit_reductions {
                    break;
                }
            }

            if let Some(symbol) = unit_reduction_symbol {
                if only_unit_reductions {
                    unit_reduction_symbols_by_state.insert(i, *symbol);
                }
            }
        }

        for state in &mut self.parse_table.states {
            let mut done = false;
            while !done {
                done = true;
                state.update_referenced_states(|other_state_id, state| {
                    unit_reduction_symbols_by_state.get(&other_state_id).map_or(
                        other_state_id,
                        |symbol| {
                            done = false;
                            match state.nonterminal_entries.get(symbol) {
                                Some(GotoAction::Goto(state_id)) => *state_id,
                                _ => other_state_id,
                            }
                        },
                    )
                });
            }
        }
    }

    fn merge_compatible_states(&mut self) {
        let core_count = 1 + self
            .parse_table
            .states
            .iter()
            .map(|state| state.core_id)
            .max()
            .unwrap();

        // Initially group the states by their parse item set core.
        let mut group_ids_by_state_id = Vec::with_capacity(self.parse_table.states.len());
        let mut state_ids_by_group_id = vec![Vec::<ParseStateId>::new(); core_count];
        for (i, state) in self.parse_table.states.iter().enumerate() {
            state_ids_by_group_id[state.core_id].push(i);
            group_ids_by_state_id.push(state.core_id);
        }

        split_state_id_groups(
            &self.parse_table.states,
            &mut state_ids_by_group_id,
            &mut group_ids_by_state_id,
            0,
            |left, right, groups| self.states_conflict(left, right, groups),
        );

        while split_state_id_groups(
            &self.parse_table.states,
            &mut state_ids_by_group_id,
            &mut group_ids_by_state_id,
            0,
            |left, right, groups| self.state_successors_differ(left, right, groups),
        ) {}

        let error_group_index = state_ids_by_group_id
            .iter()
            .position(|g| g.contains(&0))
            .unwrap();
        let start_group_index = state_ids_by_group_id
            .iter()
            .position(|g| g.contains(&1))
            .unwrap();
        state_ids_by_group_id.swap(error_group_index, 0);
        state_ids_by_group_id.swap(start_group_index, 1);

        // Create a list of new parse states: one state for each group of old states.
        let mut new_states = Vec::with_capacity(state_ids_by_group_id.len());
        for state_ids in &state_ids_by_group_id {
            // Initialize the new state based on the first old state in the group.
            let mut parse_state = mem::take(&mut self.parse_table.states[state_ids[0]]);

            // Extend the new state with all of the actions from the other old states
            // in the group.
            for state_id in &state_ids[1..] {
                let other_parse_state = mem::take(&mut self.parse_table.states[*state_id]);

                parse_state
                    .terminal_entries
                    .extend(other_parse_state.terminal_entries);
                parse_state
                    .nonterminal_entries
                    .extend(other_parse_state.nonterminal_entries);
                parse_state
                    .reserved_words
                    .insert_all(&other_parse_state.reserved_words);
                for symbol in parse_state.terminal_entries.keys() {
                    parse_state.reserved_words.remove(symbol);
                }
            }

            // Update the new state's outgoing references using the new grouping.
            parse_state.update_referenced_states(|state_id, _| group_ids_by_state_id[state_id]);
            new_states.push(parse_state);
        }

        self.parse_table.states = new_states;
    }

    fn states_conflict(
        &self,
        left_state: &ParseState,
        right_state: &ParseState,
        group_ids_by_state_id: &[ParseStateId],
    ) -> bool {
        for (token, left_entry) in &left_state.terminal_entries {
            if let Some(right_entry) = right_state.terminal_entries.get(token) {
                if self.entries_conflict(
                    left_state.id,
                    right_state.id,
                    token,
                    left_entry,
                    right_entry,
                    group_ids_by_state_id,
                ) {
                    return true;
                }
            } else if self.token_conflicts(left_state.id, right_state.id, right_state, *token) {
                return true;
            }
        }

        for token in right_state.terminal_entries.keys() {
            if !left_state.terminal_entries.contains_key(token)
                && self.token_conflicts(left_state.id, right_state.id, left_state, *token)
            {
                return true;
            }
        }

        false
    }

    fn state_successors_differ(
        &self,
        state1: &ParseState,
        state2: &ParseState,
        group_ids_by_state_id: &[ParseStateId],
    ) -> bool {
        for (token, entry1) in &state1.terminal_entries {
            if let ParseAction::Shift { state: s1, .. } = entry1.actions.last().unwrap() {
                if let Some(entry2) = state2.terminal_entries.get(token) {
                    if let ParseAction::Shift { state: s2, .. } = entry2.actions.last().unwrap() {
                        let group1 = group_ids_by_state_id[*s1];
                        let group2 = group_ids_by_state_id[*s2];
                        if group1 != group2 {
                            info!(
                                "split states {} {} - successors for {} are split: {s1} {s2}",
                                state1.id,
                                state2.id,
                                self.symbol_name(token),
                            );
                            return true;
                        }
                    }
                }
            }
        }

        for (symbol, s1) in &state1.nonterminal_entries {
            if let Some(s2) = state2.nonterminal_entries.get(symbol) {
                match (s1, s2) {
                    (GotoAction::ShiftExtra, GotoAction::ShiftExtra) => {}
                    (GotoAction::Goto(s1), GotoAction::Goto(s2)) => {
                        let group1 = group_ids_by_state_id[*s1];
                        let group2 = group_ids_by_state_id[*s2];
                        if group1 != group2 {
                            info!(
                                "split states {} {} - successors for {} are split: {s1} {s2}",
                                state1.id,
                                state2.id,
                                self.symbol_name(symbol),
                            );
                            return true;
                        }
                    }
                    _ => return true,
                }
            }
        }

        false
    }

    fn entries_conflict(
        &self,
        state_id1: ParseStateId,
        state_id2: ParseStateId,
        token: &Symbol,
        entry1: &ParseTableEntry,
        entry2: &ParseTableEntry,
        group_ids_by_state_id: &[ParseStateId],
    ) -> bool {
        // To be compatible, entries need to have the same actions.
        let actions1 = &entry1.actions;
        let actions2 = &entry2.actions;
        if actions1.len() != actions2.len() {
            info!(
                "split states {state_id1} {state_id2} - differing action counts for token {}",
                self.symbol_name(token)
            );
            return true;
        }

        for (i, action1) in actions1.iter().enumerate() {
            let action2 = &actions2[i];

            // Two shift actions are equivalent if their destinations are in the same group.
            if let (
                ParseAction::Shift {
                    state: s1,
                    is_repetition: is_repetition1,
                },
                ParseAction::Shift {
                    state: s2,
                    is_repetition: is_repetition2,
                },
            ) = (action1, action2)
            {
                let group1 = group_ids_by_state_id[*s1];
                let group2 = group_ids_by_state_id[*s2];
                if group1 == group2 && is_repetition1 == is_repetition2 {
                    continue;
                }
                info!(
                    "split states {state_id1} {state_id2} - successors for {} are split: {s1} {s2}",
                    self.symbol_name(token),
                );
                return true;
            } else if action1 != action2 {
                info!(
                    "split states {state_id1} {state_id2} - unequal actions for {}",
                    self.symbol_name(token),
                );
                return true;
            }
        }

        false
    }

    fn token_conflicts(
        &self,
        left_id: ParseStateId,
        right_id: ParseStateId,
        right_state: &ParseState,
        new_token: Symbol,
    ) -> bool {
        if new_token == Symbol::end_of_nonterminal_extra() {
            info!("split states {left_id} {right_id} - end of non-terminal extra",);
            return true;
        }

        // Do not add external tokens; they could conflict lexically with any of the state's
        // existing lookahead tokens.
        if new_token.is_external() {
            info!(
                "split states {left_id} {right_id} - external token {}",
                self.symbol_name(&new_token),
            );
            return true;
        }

        if right_state.reserved_words.contains(&new_token) {
            return false;
        }

        // Do not add tokens which are both internal and external. Their validity could
        // influence the behavior of the external scanner.
        if self
            .syntax_grammar
            .external_tokens
            .iter()
            .any(|external| external.corresponding_internal_token == Some(new_token))
        {
            info!(
                "split states {left_id} {right_id} - internal/external token {}",
                self.symbol_name(&new_token),
            );
            return true;
        }

        // Do not add a token if it conflicts with an existing token.
        for token in right_state.terminal_entries.keys().copied() {
            if !token.is_terminal() {
                continue;
            }
            if self.syntax_grammar.word_token == Some(token) && self.keywords.contains(&new_token) {
                continue;
            }
            if self.syntax_grammar.word_token == Some(new_token) && self.keywords.contains(&token) {
                continue;
            }

            if self
                .token_conflict_map
                .does_conflict(new_token.index, token.index)
                || self
                    .token_conflict_map
                    .does_match_same_string(new_token.index, token.index)
            {
                info!(
                    "split states {} {} - token {} conflicts with {}",
                    left_id,
                    right_id,
                    self.symbol_name(&new_token),
                    self.symbol_name(&token),
                );
                return true;
            }
        }

        false
    }

    fn symbol_name(&self, symbol: &Symbol) -> &String {
        if symbol.is_non_terminal() {
            &self.syntax_grammar.variables[symbol.index].name
        } else if symbol.is_external() {
            &self.syntax_grammar.external_tokens[symbol.index].name
        } else {
            &self.lexical_grammar.variables[symbol.index].name
        }
    }

    fn remove_unused_states(&mut self) {
        let mut state_usage_map = vec![false; self.parse_table.states.len()];

        state_usage_map[0] = true;
        state_usage_map[1] = true;

        for state in &self.parse_table.states {
            for referenced_state in state.referenced_states() {
                state_usage_map[referenced_state] = true;
            }
        }
        let mut removed_predecessor_count = 0;
        let mut state_replacement_map = vec![0; self.parse_table.states.len()];
        for state_id in 0..self.parse_table.states.len() {
            state_replacement_map[state_id] = state_id - removed_predecessor_count;
            if !state_usage_map[state_id] {
                removed_predecessor_count += 1;
            }
        }
        let mut state_id = 0;
        let mut original_state_id = 0;
        while state_id < self.parse_table.states.len() {
            if state_usage_map[original_state_id] {
                self.parse_table.states[state_id].update_referenced_states(|other_state_id, _| {
                    state_replacement_map[other_state_id]
                });
                state_id += 1;
            } else {
                self.parse_table.states.remove(state_id);
            }
            original_state_id += 1;
        }
    }

    fn reorder_states_by_descending_size(&mut self) {
        // Get a mapping of old state index -> new_state_index
        let mut old_ids_by_new_id = (0..self.parse_table.states.len()).collect::<Vec<_>>();
        old_ids_by_new_id.sort_unstable_by_key(|i| {
            // Don't changes states 0 (the error state) or 1 (the start state).
            if *i <= 1 {
                return *i as i64 - 1_000_000;
            }

            // Reorder all the other states by descending symbol count.
            let state = &self.parse_table.states[*i];
            -((state.terminal_entries.len() + state.nonterminal_entries.len()) as i64)
        });

        // Get the inverse mapping
        let mut new_ids_by_old_id = vec![0; old_ids_by_new_id.len()];
        for (id, old_id) in old_ids_by_new_id.iter().enumerate() {
            new_ids_by_old_id[*old_id] = id;
        }

        // Reorder the parse states and update their references to reflect
        // the new ordering.
        self.parse_table.states = old_ids_by_new_id
            .iter()
            .map(|old_id| {
                let mut state = ParseState::default();
                mem::swap(&mut state, &mut self.parse_table.states[*old_id]);
                state.update_referenced_states(|id, _| new_ids_by_old_id[id]);
                state
            })
            .collect();
    }
}



================================================
FILE: crates/generate/src/build_tables/token_conflicts.rs
================================================
use std::{cmp::Ordering, collections::HashSet, fmt};

use crate::{
    build_tables::item::TokenSetDisplay,
    grammars::{LexicalGrammar, SyntaxGrammar},
    nfa::{CharacterSet, NfaCursor, NfaTransition},
    rules::TokenSet,
};

#[derive(Clone, Debug, Default, PartialEq, Eq)]
struct TokenConflictStatus {
    matches_prefix: bool,
    does_match_continuation: bool,
    does_match_valid_continuation: bool,
    does_match_separators: bool,
    matches_same_string: bool,
    matches_different_string: bool,
}

pub struct TokenConflictMap<'a> {
    n: usize,
    status_matrix: Vec<TokenConflictStatus>,
    following_tokens: Vec<TokenSet>,
    starting_chars_by_index: Vec<CharacterSet>,
    following_chars_by_index: Vec<CharacterSet>,
    grammar: &'a LexicalGrammar,
}

impl<'a> TokenConflictMap<'a> {
    /// Create a token conflict map based on a lexical grammar, which describes the structure
    /// each token, and a `following_token` map, which indicates which tokens may be appear
    /// immediately after each other token.
    ///
    /// This analyzes the possible kinds of overlap between each pair of tokens and stores
    /// them in a matrix.
    pub fn new(grammar: &'a LexicalGrammar, following_tokens: Vec<TokenSet>) -> Self {
        let mut cursor = NfaCursor::new(&grammar.nfa, Vec::new());
        let starting_chars = get_starting_chars(&mut cursor, grammar);
        let following_chars = get_following_chars(&starting_chars, &following_tokens);

        let n = grammar.variables.len();
        let mut status_matrix = vec![TokenConflictStatus::default(); n * n];
        for i in 0..grammar.variables.len() {
            for j in 0..i {
                let status = compute_conflict_status(&mut cursor, grammar, &following_chars, i, j);
                status_matrix[matrix_index(n, i, j)] = status.0;
                status_matrix[matrix_index(n, j, i)] = status.1;
            }
        }

        TokenConflictMap {
            n,
            status_matrix,
            following_tokens,
            starting_chars_by_index: starting_chars,
            following_chars_by_index: following_chars,
            grammar,
        }
    }

    /// Does token `i` match any strings that token `j` also matches, such that token `i`
    /// is preferred over token `j`?
    pub fn has_same_conflict_status(&self, a: usize, b: usize, other: usize) -> bool {
        let left = &self.status_matrix[matrix_index(self.n, a, other)];
        let right = &self.status_matrix[matrix_index(self.n, b, other)];
        left == right
    }

    /// Does token `i` match any strings that token `j` does *not* match?
    pub fn does_match_different_string(&self, i: usize, j: usize) -> bool {
        self.status_matrix[matrix_index(self.n, i, j)].matches_different_string
    }

    /// Does token `i` match any strings that token `j` also matches, where
    /// token `i` is preferred over token `j`?
    pub fn does_match_same_string(&self, i: usize, j: usize) -> bool {
        self.status_matrix[matrix_index(self.n, i, j)].matches_same_string
    }

    pub fn does_conflict(&self, i: usize, j: usize) -> bool {
        let entry = &self.status_matrix[matrix_index(self.n, i, j)];
        entry.does_match_valid_continuation
            || entry.does_match_separators
            || entry.matches_same_string
    }

    /// Does token `i` match any strings that are *prefixes* of strings matched by `j`?
    pub fn does_match_prefix(&self, i: usize, j: usize) -> bool {
        self.status_matrix[matrix_index(self.n, i, j)].matches_prefix
    }

    pub fn does_match_shorter_or_longer(&self, i: usize, j: usize) -> bool {
        let entry = &self.status_matrix[matrix_index(self.n, i, j)];
        let reverse_entry = &self.status_matrix[matrix_index(self.n, j, i)];
        (entry.does_match_valid_continuation || entry.does_match_separators)
            && !reverse_entry.does_match_separators
    }

    pub fn does_overlap(&self, i: usize, j: usize) -> bool {
        let status = &self.status_matrix[matrix_index(self.n, i, j)];
        status.does_match_separators
            || status.matches_prefix
            || status.matches_same_string
            || status.does_match_continuation
    }

    pub fn prefer_token(grammar: &LexicalGrammar, left: (i32, usize), right: (i32, usize)) -> bool {
        match left.0.cmp(&right.0) {
            Ordering::Less => false,
            Ordering::Greater => true,
            Ordering::Equal => match grammar.variables[left.1]
                .implicit_precedence
                .cmp(&grammar.variables[right.1].implicit_precedence)
            {
                Ordering::Less => false,
                Ordering::Greater => true,
                Ordering::Equal => left.1 < right.1,
            },
        }
    }

    pub fn prefer_transition(
        grammar: &LexicalGrammar,
        t: &NfaTransition,
        completed_id: usize,
        completed_precedence: i32,
        has_separator_transitions: bool,
    ) -> bool {
        if t.precedence < completed_precedence {
            return false;
        }
        if t.precedence == completed_precedence {
            if t.is_separator {
                return false;
            }
            if has_separator_transitions
                && !grammar
                    .variable_indices_for_nfa_states(&t.states)
                    .any(|i| i == completed_id)
            {
                return false;
            }
        }
        true
    }
}

impl fmt::Debug for TokenConflictMap<'_> {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        writeln!(f, "TokenConflictMap {{")?;

        let syntax_grammar = SyntaxGrammar::default();

        writeln!(f, "  following_tokens: {{")?;
        for (i, following_tokens) in self.following_tokens.iter().enumerate() {
            writeln!(
                f,
                "    follow({:?}): {},",
                self.grammar.variables[i].name,
                TokenSetDisplay(following_tokens, &syntax_grammar, self.grammar)
            )?;
        }
        writeln!(f, "  }},")?;

        writeln!(f, "  starting_characters: {{")?;
        for i in 0..self.n {
            writeln!(
                f,
                "    {:?}: {:?},",
                self.grammar.variables[i].name, self.starting_chars_by_index[i]
            )?;
        }
        writeln!(f, "  }},")?;

        writeln!(f, "  following_characters: {{")?;
        for i in 0..self.n {
            writeln!(
                f,
                "    {:?}: {:?},",
                self.grammar.variables[i].name, self.following_chars_by_index[i]
            )?;
        }
        writeln!(f, "  }},")?;

        writeln!(f, "  status_matrix: {{")?;
        for i in 0..self.n {
            writeln!(f, "    {:?}: {{", self.grammar.variables[i].name)?;
            for j in 0..self.n {
                writeln!(
                    f,
                    "      {:?}: {:?},",
                    self.grammar.variables[j].name,
                    self.status_matrix[matrix_index(self.n, i, j)]
                )?;
            }
            writeln!(f, "    }},")?;
        }
        write!(f, "  }},")?;
        write!(f, "}}")?;
        Ok(())
    }
}

const fn matrix_index(variable_count: usize, i: usize, j: usize) -> usize {
    variable_count * i + j
}

fn get_starting_chars(cursor: &mut NfaCursor, grammar: &LexicalGrammar) -> Vec<CharacterSet> {
    let mut result = Vec::with_capacity(grammar.variables.len());
    for variable in &grammar.variables {
        cursor.reset(vec![variable.start_state]);
        let mut all_chars = CharacterSet::empty();
        for (chars, _) in cursor.transition_chars() {
            all_chars = all_chars.add(chars);
        }
        result.push(all_chars);
    }
    result
}

fn get_following_chars(
    starting_chars: &[CharacterSet],
    following_tokens: &[TokenSet],
) -> Vec<CharacterSet> {
    following_tokens
        .iter()
        .map(|following_tokens| {
            let mut chars = CharacterSet::empty();
            for token in following_tokens.iter() {
                if token.is_terminal() {
                    chars = chars.add(&starting_chars[token.index]);
                }
            }
            chars
        })
        .collect()
}

fn compute_conflict_status(
    cursor: &mut NfaCursor,
    grammar: &LexicalGrammar,
    following_chars: &[CharacterSet],
    i: usize,
    j: usize,
) -> (TokenConflictStatus, TokenConflictStatus) {
    let mut visited_state_sets = HashSet::new();
    let mut state_set_queue = vec![vec![
        grammar.variables[i].start_state,
        grammar.variables[j].start_state,
    ]];
    let mut result = (
        TokenConflictStatus::default(),
        TokenConflictStatus::default(),
    );

    while let Some(state_set) = state_set_queue.pop() {
        let mut live_variable_indices = grammar.variable_indices_for_nfa_states(&state_set);

        // If only one of the two tokens could possibly match from this state, then
        // there is no reason to analyze any of its successors. Just record the fact
        // that the token matches a string that the other token does not match.
        let first_live_variable_index = live_variable_indices.next().unwrap();
        if live_variable_indices.count() == 0 {
            if first_live_variable_index == i {
                result.0.matches_different_string = true;
            } else {
                result.1.matches_different_string = true;
            }
            continue;
        }

        // Don't pursue states where there's no potential for conflict.
        cursor.reset(state_set);
        let within_separator = cursor.transition_chars().any(|(_, sep)| sep);

        // Examine each possible completed token in this state.
        let mut completion = None;
        for (id, precedence) in cursor.completions() {
            if within_separator {
                if id == i {
                    result.0.does_match_separators = true;
                } else {
                    result.1.does_match_separators = true;
                }
            }

            // If the other token has already completed, then this is
            // a same-string conflict.
            if let Some((prev_id, prev_precedence)) = completion {
                if id == prev_id {
                    continue;
                }

                // Determine which of the two tokens is preferred.
                let preferred_id;
                if TokenConflictMap::prefer_token(
                    grammar,
                    (prev_precedence, prev_id),
                    (precedence, id),
                ) {
                    preferred_id = prev_id;
                } else {
                    preferred_id = id;
                    completion = Some((id, precedence));
                }

                if preferred_id == i {
                    result.0.matches_same_string = true;
                } else {
                    result.1.matches_same_string = true;
                }
            } else {
                completion = Some((id, precedence));
            }
        }

        // Examine each possible transition from this state to detect substring conflicts.
        for transition in cursor.transitions() {
            let mut can_advance = true;

            // If there is already a completed token in this state, then determine
            // if the next state can also match the completed token. If so, then
            // this is *not* a conflict.
            if let Some((completed_id, completed_precedence)) = completion {
                let mut advanced_id = None;
                let mut successor_contains_completed_id = false;
                for variable_id in grammar.variable_indices_for_nfa_states(&transition.states) {
                    if variable_id == completed_id {
                        successor_contains_completed_id = true;
                        break;
                    }
                    advanced_id = Some(variable_id);
                }

                // Determine which action is preferred: matching the already complete
                // token, or continuing on to try and match the other longer token.
                if let (Some(advanced_id), false) = (advanced_id, successor_contains_completed_id) {
                    if TokenConflictMap::prefer_transition(
                        grammar,
                        &transition,
                        completed_id,
                        completed_precedence,
                        within_separator,
                    ) {
                        can_advance = true;
                        if advanced_id == i {
                            result.0.does_match_continuation = true;
                            if transition.characters.does_intersect(&following_chars[j]) {
                                result.0.does_match_valid_continuation = true;
                            }
                        } else {
                            result.1.does_match_continuation = true;
                            if transition.characters.does_intersect(&following_chars[i]) {
                                result.1.does_match_valid_continuation = true;
                            }
                        }
                    } else if completed_id == i {
                        result.0.matches_prefix = true;
                    } else {
                        result.1.matches_prefix = true;
                    }
                }
            }

            if can_advance && visited_state_sets.insert(transition.states.clone()) {
                state_set_queue.push(transition.states);
            }
        }
    }
    result
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::{
        grammars::{Variable, VariableType},
        prepare_grammar::{expand_tokens, ExtractedLexicalGrammar},
        rules::{Precedence, Rule, Symbol},
    };

    #[test]
    fn test_starting_characters() {
        let grammar = expand_tokens(ExtractedLexicalGrammar {
            separators: Vec::new(),
            variables: vec![
                Variable {
                    name: "token_0".to_string(),
                    kind: VariableType::Named,
                    rule: Rule::pattern("[a-f]1|0x\\d", ""),
                },
                Variable {
                    name: "token_1".to_string(),
                    kind: VariableType::Named,
                    rule: Rule::pattern("d*ef", ""),
                },
            ],
        })
        .unwrap();

        let token_map = TokenConflictMap::new(&grammar, Vec::new());

        assert_eq!(
            token_map.starting_chars_by_index[0],
            CharacterSet::empty().add_range('a', 'f').add_char('0')
        );
        assert_eq!(
            token_map.starting_chars_by_index[1],
            CharacterSet::empty().add_range('d', 'e')
        );
    }

    #[test]
    fn test_token_conflicts() {
        let grammar = expand_tokens(ExtractedLexicalGrammar {
            separators: Vec::new(),
            variables: vec![
                Variable {
                    name: "in".to_string(),
                    kind: VariableType::Named,
                    rule: Rule::string("in"),
                },
                Variable {
                    name: "identifier".to_string(),
                    kind: VariableType::Named,
                    rule: Rule::pattern("\\w+", ""),
                },
                Variable {
                    name: "instanceof".to_string(),
                    kind: VariableType::Named,
                    rule: Rule::string("instanceof"),
                },
            ],
        })
        .unwrap();

        let var = |name| index_of_var(&grammar, name);

        let token_map = TokenConflictMap::new(
            &grammar,
            vec![
                std::iter::once(&Symbol::terminal(var("identifier")))
                    .copied()
                    .collect(),
                std::iter::once(&Symbol::terminal(var("in")))
                    .copied()
                    .collect(),
                std::iter::once(&Symbol::terminal(var("identifier")))
                    .copied()
                    .collect(),
            ],
        );

        // Given the string "in", the `in` token is preferred over the `identifier` token
        assert!(token_map.does_match_same_string(var("in"), var("identifier")));
        assert!(!token_map.does_match_same_string(var("identifier"), var("in")));

        // Depending on what character follows, the string "in" may be treated as part of an
        // `identifier` token.
        assert!(token_map.does_conflict(var("identifier"), var("in")));

        // Depending on what character follows, the string "instanceof" may be treated as part of
        // an `identifier` token.
        assert!(token_map.does_conflict(var("identifier"), var("instanceof")));
        assert!(token_map.does_conflict(var("instanceof"), var("in")));
    }

    #[test]
    fn test_token_conflicts_with_separators() {
        let grammar = expand_tokens(ExtractedLexicalGrammar {
            separators: vec![Rule::pattern("\\s", "")],
            variables: vec![
                Variable {
                    name: "x".to_string(),
                    kind: VariableType::Named,
                    rule: Rule::string("x"),
                },
                Variable {
                    name: "newline".to_string(),
                    kind: VariableType::Named,
                    rule: Rule::string("\n"),
                },
            ],
        })
        .unwrap();

        let var = |name| index_of_var(&grammar, name);

        let token_map = TokenConflictMap::new(&grammar, vec![TokenSet::new(); 4]);

        assert!(token_map.does_conflict(var("newline"), var("x")));
        assert!(!token_map.does_conflict(var("x"), var("newline")));
    }

    #[test]
    fn test_token_conflicts_with_open_ended_tokens() {
        let grammar = expand_tokens(ExtractedLexicalGrammar {
            separators: vec![Rule::pattern("\\s", "")],
            variables: vec![
                Variable {
                    name: "x".to_string(),
                    kind: VariableType::Named,
                    rule: Rule::string("x"),
                },
                Variable {
                    name: "anything".to_string(),
                    kind: VariableType::Named,
                    rule: Rule::prec(Precedence::Integer(-1), Rule::pattern(".*", "")),
                },
            ],
        })
        .unwrap();

        let var = |name| index_of_var(&grammar, name);

        let token_map = TokenConflictMap::new(&grammar, vec![TokenSet::new(); 4]);

        assert!(token_map.does_match_shorter_or_longer(var("anything"), var("x")));
        assert!(!token_map.does_match_shorter_or_longer(var("x"), var("anything")));
    }

    fn index_of_var(grammar: &LexicalGrammar, name: &str) -> usize {
        grammar
            .variables
            .iter()
            .position(|v| v.name == name)
            .unwrap()
    }
}



================================================
FILE: crates/generate/src/prepare_grammar/expand_repeats.rs
================================================
use std::{collections::HashMap, mem};

use super::ExtractedSyntaxGrammar;
use crate::{
    grammars::{Variable, VariableType},
    rules::{Rule, Symbol},
};

struct Expander {
    variable_name: String,
    repeat_count_in_variable: usize,
    preceding_symbol_count: usize,
    auxiliary_variables: Vec<Variable>,
    existing_repeats: HashMap<Rule, Symbol>,
}

impl Expander {
    fn expand_variable(&mut self, index: usize, variable: &mut Variable) -> bool {
        self.variable_name.clear();
        self.variable_name.push_str(&variable.name);
        self.repeat_count_in_variable = 0;
        let mut rule = Rule::Blank;
        mem::swap(&mut rule, &mut variable.rule);

        // In the special case of a hidden variable with a repetition at its top level,
        // convert that rule itself into a binary tree structure instead of introducing
        // another auxiliary rule.
        if let (VariableType::Hidden, Rule::Repeat(repeated_content)) = (variable.kind, &rule) {
            let inner_rule = self.expand_rule(repeated_content);
            variable.rule = self.wrap_rule_in_binary_tree(Symbol::non_terminal(index), inner_rule);
            variable.kind = VariableType::Auxiliary;
            return true;
        }

        variable.rule = self.expand_rule(&rule);
        false
    }

    fn expand_rule(&mut self, rule: &Rule) -> Rule {
        match rule {
            // For choices, sequences, and metadata, descend into the child rules,
            // replacing any nested repetitions.
            Rule::Choice(elements) => Rule::Choice(
                elements
                    .iter()
                    .map(|element| self.expand_rule(element))
                    .collect(),
            ),

            Rule::Seq(elements) => Rule::Seq(
                elements
                    .iter()
                    .map(|element| self.expand_rule(element))
                    .collect(),
            ),

            Rule::Metadata { rule, params } => Rule::Metadata {
                rule: Box::new(self.expand_rule(rule)),
                params: params.clone(),
            },

            // For repetitions, introduce an auxiliary rule that contains the
            // repeated content, but can also contain a recursive binary tree structure.
            Rule::Repeat(content) => {
                let inner_rule = self.expand_rule(content);

                if let Some(existing_symbol) = self.existing_repeats.get(&inner_rule) {
                    return Rule::Symbol(*existing_symbol);
                }

                self.repeat_count_in_variable += 1;
                let rule_name = format!(
                    "{}_repeat{}",
                    self.variable_name, self.repeat_count_in_variable
                );
                let repeat_symbol = Symbol::non_terminal(
                    self.preceding_symbol_count + self.auxiliary_variables.len(),
                );
                self.existing_repeats
                    .insert(inner_rule.clone(), repeat_symbol);
                self.auxiliary_variables.push(Variable {
                    name: rule_name,
                    kind: VariableType::Auxiliary,
                    rule: self.wrap_rule_in_binary_tree(repeat_symbol, inner_rule),
                });

                Rule::Symbol(repeat_symbol)
            }

            // For primitive rules, don't change anything.
            _ => rule.clone(),
        }
    }

    fn wrap_rule_in_binary_tree(&self, symbol: Symbol, rule: Rule) -> Rule {
        Rule::choice(vec![
            Rule::Seq(vec![Rule::Symbol(symbol), Rule::Symbol(symbol)]),
            rule,
        ])
    }
}

pub(super) fn expand_repeats(mut grammar: ExtractedSyntaxGrammar) -> ExtractedSyntaxGrammar {
    let mut expander = Expander {
        variable_name: String::new(),
        repeat_count_in_variable: 0,
        preceding_symbol_count: grammar.variables.len(),
        auxiliary_variables: Vec::new(),
        existing_repeats: HashMap::new(),
    };

    for (i, variable) in grammar.variables.iter_mut().enumerate() {
        let expanded_top_level_repetition = expander.expand_variable(i, variable);

        // If a hidden variable had a top-level repetition and it was converted to
        // a recursive rule, then it can't be inlined.
        if expanded_top_level_repetition {
            grammar
                .variables_to_inline
                .retain(|symbol| *symbol != Symbol::non_terminal(i));
        }
    }

    grammar.variables.extend(expander.auxiliary_variables);
    grammar
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_basic_repeat_expansion() {
        // Repeats nested inside of sequences and choices are expanded.
        let grammar = expand_repeats(build_grammar(vec![Variable::named(
            "rule0",
            Rule::seq(vec![
                Rule::terminal(10),
                Rule::choice(vec![
                    Rule::repeat(Rule::terminal(11)),
                    Rule::repeat(Rule::terminal(12)),
                ]),
                Rule::terminal(13),
            ]),
        )]));

        assert_eq!(
            grammar.variables,
            vec![
                Variable::named(
                    "rule0",
                    Rule::seq(vec![
                        Rule::terminal(10),
                        Rule::choice(vec![Rule::non_terminal(1), Rule::non_terminal(2),]),
                        Rule::terminal(13),
                    ])
                ),
                Variable::auxiliary(
                    "rule0_repeat1",
                    Rule::choice(vec![
                        Rule::seq(vec![Rule::non_terminal(1), Rule::non_terminal(1),]),
                        Rule::terminal(11),
                    ])
                ),
                Variable::auxiliary(
                    "rule0_repeat2",
                    Rule::choice(vec![
                        Rule::seq(vec![Rule::non_terminal(2), Rule::non_terminal(2),]),
                        Rule::terminal(12),
                    ])
                ),
            ]
        );
    }

    #[test]
    fn test_repeat_deduplication() {
        // Terminal 4 appears inside of a repeat in three different places.
        let grammar = expand_repeats(build_grammar(vec![
            Variable::named(
                "rule0",
                Rule::choice(vec![
                    Rule::seq(vec![Rule::terminal(1), Rule::repeat(Rule::terminal(4))]),
                    Rule::seq(vec![Rule::terminal(2), Rule::repeat(Rule::terminal(4))]),
                ]),
            ),
            Variable::named(
                "rule1",
                Rule::seq(vec![Rule::terminal(3), Rule::repeat(Rule::terminal(4))]),
            ),
        ]));

        // Only one auxiliary rule is created for repeating terminal 4.
        assert_eq!(
            grammar.variables,
            vec![
                Variable::named(
                    "rule0",
                    Rule::choice(vec![
                        Rule::seq(vec![Rule::terminal(1), Rule::non_terminal(2)]),
                        Rule::seq(vec![Rule::terminal(2), Rule::non_terminal(2)]),
                    ])
                ),
                Variable::named(
                    "rule1",
                    Rule::seq(vec![Rule::terminal(3), Rule::non_terminal(2),])
                ),
                Variable::auxiliary(
                    "rule0_repeat1",
                    Rule::choice(vec![
                        Rule::seq(vec![Rule::non_terminal(2), Rule::non_terminal(2),]),
                        Rule::terminal(4),
                    ])
                )
            ]
        );
    }

    #[test]
    fn test_expansion_of_nested_repeats() {
        let grammar = expand_repeats(build_grammar(vec![Variable::named(
            "rule0",
            Rule::seq(vec![
                Rule::terminal(10),
                Rule::repeat(Rule::seq(vec![
                    Rule::terminal(11),
                    Rule::repeat(Rule::terminal(12)),
                ])),
            ]),
        )]));

        assert_eq!(
            grammar.variables,
            vec![
                Variable::named(
                    "rule0",
                    Rule::seq(vec![Rule::terminal(10), Rule::non_terminal(2),])
                ),
                Variable::auxiliary(
                    "rule0_repeat1",
                    Rule::choice(vec![
                        Rule::seq(vec![Rule::non_terminal(1), Rule::non_terminal(1),]),
                        Rule::terminal(12),
                    ])
                ),
                Variable::auxiliary(
                    "rule0_repeat2",
                    Rule::choice(vec![
                        Rule::seq(vec![Rule::non_terminal(2), Rule::non_terminal(2),]),
                        Rule::seq(vec![Rule::terminal(11), Rule::non_terminal(1),]),
                    ])
                ),
            ]
        );
    }

    #[test]
    fn test_expansion_of_repeats_at_top_of_hidden_rules() {
        let grammar = expand_repeats(build_grammar(vec![
            Variable::named("rule0", Rule::non_terminal(1)),
            Variable::hidden(
                "_rule1",
                Rule::repeat(Rule::choice(vec![Rule::terminal(11), Rule::terminal(12)])),
            ),
        ]));

        assert_eq!(
            grammar.variables,
            vec![
                Variable::named("rule0", Rule::non_terminal(1),),
                Variable::auxiliary(
                    "_rule1",
                    Rule::choice(vec![
                        Rule::seq(vec![Rule::non_terminal(1), Rule::non_terminal(1)]),
                        Rule::terminal(11),
                        Rule::terminal(12),
                    ]),
                ),
            ]
        );
    }

    fn build_grammar(variables: Vec<Variable>) -> ExtractedSyntaxGrammar {
        ExtractedSyntaxGrammar {
            variables,
            ..Default::default()
        }
    }
}



================================================
FILE: crates/generate/src/prepare_grammar/expand_tokens.rs
================================================
use anyhow::Result;
use regex_syntax::{
    hir::{Class, Hir, HirKind},
    ParserBuilder,
};
use serde::Serialize;
use thiserror::Error;

use super::ExtractedLexicalGrammar;
use crate::{
    grammars::{LexicalGrammar, LexicalVariable},
    nfa::{CharacterSet, Nfa, NfaState},
    rules::{Precedence, Rule},
};

struct NfaBuilder {
    nfa: Nfa,
    is_sep: bool,
    precedence_stack: Vec<i32>,
}

pub type ExpandTokensResult<T> = Result<T, ExpandTokensError>;

#[derive(Debug, Error, Serialize)]
pub enum ExpandTokensError {
    #[error(
        "The rule `{0}` matches the empty string.
Tree-sitter does not support syntactic rules that match the empty string
unless they are used only as the grammar's start rule.
        "
    )]
    EmptyString(String),
    #[error(transparent)]
    Processing(ExpandTokensProcessingError),
    #[error(transparent)]
    ExpandRule(ExpandRuleError),
}

#[derive(Debug, Error, Serialize)]
pub struct ExpandTokensProcessingError {
    rule: String,
    error: ExpandRuleError,
}

impl std::fmt::Display for ExpandTokensProcessingError {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        writeln!(
            f,
            "Error processing rule {}: Grammar error: Unexpected rule {:?}",
            self.rule, self.error
        )?;
        Ok(())
    }
}

fn get_implicit_precedence(rule: &Rule) -> i32 {
    match rule {
        Rule::String(_) => 2,
        Rule::Metadata { rule, params } => {
            if params.is_main_token {
                get_implicit_precedence(rule) + 1
            } else {
                get_implicit_precedence(rule)
            }
        }
        _ => 0,
    }
}

const fn get_completion_precedence(rule: &Rule) -> i32 {
    if let Rule::Metadata { params, .. } = rule {
        if let Precedence::Integer(p) = params.precedence {
            return p;
        }
    }
    0
}

pub fn expand_tokens(mut grammar: ExtractedLexicalGrammar) -> ExpandTokensResult<LexicalGrammar> {
    let mut builder = NfaBuilder {
        nfa: Nfa::new(),
        is_sep: true,
        precedence_stack: vec![0],
    };

    let separator_rule = if grammar.separators.is_empty() {
        Rule::Blank
    } else {
        grammar.separators.push(Rule::Blank);
        Rule::repeat(Rule::choice(grammar.separators))
    };

    let mut variables = Vec::with_capacity(grammar.variables.len());
    for (i, variable) in grammar.variables.into_iter().enumerate() {
        if variable.rule.is_empty() {
            Err(ExpandTokensError::EmptyString(variable.name.clone()))?;
        }

        let is_immediate_token = match &variable.rule {
            Rule::Metadata { params, .. } => params.is_main_token,
            _ => false,
        };

        builder.is_sep = false;
        builder.nfa.states.push(NfaState::Accept {
            variable_index: i,
            precedence: get_completion_precedence(&variable.rule),
        });
        let last_state_id = builder.nfa.last_state_id();
        builder
            .expand_rule(&variable.rule, last_state_id)
            .map_err(|e| {
                ExpandTokensError::Processing(ExpandTokensProcessingError {
                    rule: variable.name.clone(),
                    error: e,
                })
            })?;

        if !is_immediate_token {
            builder.is_sep = true;
            let last_state_id = builder.nfa.last_state_id();
            builder
                .expand_rule(&separator_rule, last_state_id)
                .map_err(ExpandTokensError::ExpandRule)?;
        }

        variables.push(LexicalVariable {
            name: variable.name,
            kind: variable.kind,
            implicit_precedence: get_implicit_precedence(&variable.rule),
            start_state: builder.nfa.last_state_id(),
        });
    }

    Ok(LexicalGrammar {
        nfa: builder.nfa,
        variables,
    })
}

pub type ExpandRuleResult<T> = Result<T, ExpandRuleError>;

#[derive(Debug, Error, Serialize)]
pub enum ExpandRuleError {
    #[error("Grammar error: Unexpected rule {0:?}")]
    UnexpectedRule(Rule),
    #[error("{0}")]
    Parse(String),
    #[error(transparent)]
    ExpandRegex(ExpandRegexError),
}

pub type ExpandRegexResult<T> = Result<T, ExpandRegexError>;

#[derive(Debug, Error, Serialize)]
pub enum ExpandRegexError {
    #[error("{0}")]
    Utf8(String),
    #[error("Regex error: Assertions are not supported")]
    Assertion,
}

impl NfaBuilder {
    fn expand_rule(&mut self, rule: &Rule, mut next_state_id: u32) -> ExpandRuleResult<bool> {
        match rule {
            Rule::Pattern(s, f) => {
                // With unicode enabled, `\w`, `\s` and `\d` expand to character sets that are much
                // larger than intended, so we replace them with the actual
                // character sets they should represent. If the full unicode range
                // of `\w`, `\s` or `\d` are needed then `\p{L}`, `\p{Z}` and `\p{N}` should be
                // used.
                let s = s
                    .replace(r"\w", r"[0-9A-Za-z_]")
                    .replace(r"\s", r"[\t-\r ]")
                    .replace(r"\d", r"[0-9]")
                    .replace(r"\W", r"[^0-9A-Za-z_]")
                    .replace(r"\S", r"[^\t-\r ]")
                    .replace(r"\D", r"[^0-9]");
                let mut parser = ParserBuilder::new()
                    .case_insensitive(f.contains('i'))
                    .unicode(true)
                    .utf8(false)
                    .build();
                let hir = parser
                    .parse(&s)
                    .map_err(|e| ExpandRuleError::Parse(e.to_string()))?;
                self.expand_regex(&hir, next_state_id)
                    .map_err(ExpandRuleError::ExpandRegex)
            }
            Rule::String(s) => {
                for c in s.chars().rev() {
                    self.push_advance(CharacterSet::empty().add_char(c), next_state_id);
                    next_state_id = self.nfa.last_state_id();
                }
                Ok(!s.is_empty())
            }
            Rule::Choice(elements) => {
                let mut alternative_state_ids = Vec::with_capacity(elements.len());
                for element in elements {
                    if self.expand_rule(element, next_state_id)? {
                        alternative_state_ids.push(self.nfa.last_state_id());
                    } else {
                        alternative_state_ids.push(next_state_id);
                    }
                }
                alternative_state_ids.sort_unstable();
                alternative_state_ids.dedup();
                alternative_state_ids.retain(|i| *i != self.nfa.last_state_id());
                for alternative_state_id in alternative_state_ids {
                    self.push_split(alternative_state_id);
                }
                Ok(true)
            }
            Rule::Seq(elements) => {
                let mut result = false;
                for element in elements.iter().rev() {
                    if self.expand_rule(element, next_state_id)? {
                        result = true;
                    }
                    next_state_id = self.nfa.last_state_id();
                }
                Ok(result)
            }
            Rule::Repeat(rule) => {
                self.nfa.states.push(NfaState::Accept {
                    variable_index: 0,
                    precedence: 0,
                }); // Placeholder for split
                let split_state_id = self.nfa.last_state_id();
                if self.expand_rule(rule, split_state_id)? {
                    self.nfa.states[split_state_id as usize] =
                        NfaState::Split(self.nfa.last_state_id(), next_state_id);
                    Ok(true)
                } else {
                    Ok(false)
                }
            }
            Rule::Metadata { rule, params } => {
                let has_precedence = if let Precedence::Integer(precedence) = &params.precedence {
                    self.precedence_stack.push(*precedence);
                    true
                } else {
                    false
                };
                let result = self.expand_rule(rule, next_state_id);
                if has_precedence {
                    self.precedence_stack.pop();
                }
                result
            }
            Rule::Blank => Ok(false),
            _ => Err(ExpandRuleError::UnexpectedRule(rule.clone()))?,
        }
    }

    fn expand_regex(&mut self, hir: &Hir, mut next_state_id: u32) -> ExpandRegexResult<bool> {
        match hir.kind() {
            HirKind::Empty => Ok(false),
            HirKind::Literal(literal) => {
                for character in std::str::from_utf8(&literal.0)
                    .map_err(|e| ExpandRegexError::Utf8(e.to_string()))?
                    .chars()
                    .rev()
                {
                    let char_set = CharacterSet::from_char(character);
                    self.push_advance(char_set, next_state_id);
                    next_state_id = self.nfa.last_state_id();
                }

                Ok(true)
            }
            HirKind::Class(class) => match class {
                Class::Unicode(class) => {
                    let mut chars = CharacterSet::default();
                    for c in class.ranges() {
                        chars = chars.add_range(c.start(), c.end());
                    }

                    // For some reason, the long s `ſ` is included if the letter `s` is in a
                    // pattern, so we remove it.
                    if chars.range_count() == 3
                        && chars
                            .ranges()
                            // exact check to ensure that `ſ` wasn't intentionally added.
                            .all(|r| ['s'..='s', 'S'..='S', 'ſ'..='ſ'].contains(&r))
                    {
                        chars = chars.difference(CharacterSet::from_char('ſ'));
                    }
                    self.push_advance(chars, next_state_id);
                    Ok(true)
                }
                Class::Bytes(bytes_class) => {
                    let mut chars = CharacterSet::default();
                    for c in bytes_class.ranges() {
                        chars = chars.add_range(c.start().into(), c.end().into());
                    }
                    self.push_advance(chars, next_state_id);
                    Ok(true)
                }
            },
            HirKind::Look(_) => Err(ExpandRegexError::Assertion)?,
            HirKind::Repetition(repetition) => match (repetition.min, repetition.max) {
                (0, Some(1)) => self.expand_zero_or_one(&repetition.sub, next_state_id),
                (1, None) => self.expand_one_or_more(&repetition.sub, next_state_id),
                (0, None) => self.expand_zero_or_more(&repetition.sub, next_state_id),
                (min, Some(max)) if min == max => {
                    self.expand_count(&repetition.sub, min, next_state_id)
                }
                (min, None) => {
                    if self.expand_zero_or_more(&repetition.sub, next_state_id)? {
                        self.expand_count(&repetition.sub, min, next_state_id)
                    } else {
                        Ok(false)
                    }
                }
                (min, Some(max)) => {
                    let mut result = self.expand_count(&repetition.sub, min, next_state_id)?;
                    for _ in min..max {
                        if result {
                            next_state_id = self.nfa.last_state_id();
                        }
                        if self.expand_zero_or_one(&repetition.sub, next_state_id)? {
                            result = true;
                        }
                    }
                    Ok(result)
                }
            },
            HirKind::Capture(capture) => self.expand_regex(&capture.sub, next_state_id),
            HirKind::Concat(concat) => {
                let mut result = false;
                for hir in concat.iter().rev() {
                    if self.expand_regex(hir, next_state_id)? {
                        result = true;
                        next_state_id = self.nfa.last_state_id();
                    }
                }
                Ok(result)
            }
            HirKind::Alternation(alternations) => {
                let mut alternative_state_ids = Vec::with_capacity(alternations.len());
                for hir in alternations {
                    if self.expand_regex(hir, next_state_id)? {
                        alternative_state_ids.push(self.nfa.last_state_id());
                    } else {
                        alternative_state_ids.push(next_state_id);
                    }
                }
                alternative_state_ids.sort_unstable();
                alternative_state_ids.dedup();
                alternative_state_ids.retain(|i| *i != self.nfa.last_state_id());
                for alternative_state_id in alternative_state_ids {
                    self.push_split(alternative_state_id);
                }
                Ok(true)
            }
        }
    }

    fn expand_one_or_more(&mut self, hir: &Hir, next_state_id: u32) -> ExpandRegexResult<bool> {
        self.nfa.states.push(NfaState::Accept {
            variable_index: 0,
            precedence: 0,
        }); // Placeholder for split
        let split_state_id = self.nfa.last_state_id();
        if self.expand_regex(hir, split_state_id)? {
            self.nfa.states[split_state_id as usize] =
                NfaState::Split(self.nfa.last_state_id(), next_state_id);
            Ok(true)
        } else {
            self.nfa.states.pop();
            Ok(false)
        }
    }

    fn expand_zero_or_one(&mut self, hir: &Hir, next_state_id: u32) -> ExpandRegexResult<bool> {
        if self.expand_regex(hir, next_state_id)? {
            self.push_split(next_state_id);
            Ok(true)
        } else {
            Ok(false)
        }
    }

    fn expand_zero_or_more(&mut self, hir: &Hir, next_state_id: u32) -> ExpandRegexResult<bool> {
        if self.expand_one_or_more(hir, next_state_id)? {
            self.push_split(next_state_id);
            Ok(true)
        } else {
            Ok(false)
        }
    }

    fn expand_count(
        &mut self,
        hir: &Hir,
        count: u32,
        mut next_state_id: u32,
    ) -> ExpandRegexResult<bool> {
        let mut result = false;
        for _ in 0..count {
            if self.expand_regex(hir, next_state_id)? {
                result = true;
                next_state_id = self.nfa.last_state_id();
            }
        }
        Ok(result)
    }

    fn push_advance(&mut self, chars: CharacterSet, state_id: u32) {
        let precedence = *self.precedence_stack.last().unwrap();
        self.nfa.states.push(NfaState::Advance {
            chars,
            state_id,
            precedence,
            is_sep: self.is_sep,
        });
    }

    fn push_split(&mut self, state_id: u32) {
        let last_state_id = self.nfa.last_state_id();
        self.nfa
            .states
            .push(NfaState::Split(state_id, last_state_id));
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::{
        grammars::Variable,
        nfa::{NfaCursor, NfaTransition},
    };

    fn simulate_nfa<'a>(grammar: &'a LexicalGrammar, s: &'a str) -> Option<(usize, &'a str)> {
        let start_states = grammar.variables.iter().map(|v| v.start_state).collect();
        let mut cursor = NfaCursor::new(&grammar.nfa, start_states);

        let mut result = None;
        let mut result_precedence = i32::MIN;
        let mut start_char = 0;
        let mut end_char = 0;
        for c in s.chars() {
            for (id, precedence) in cursor.completions() {
                if result.is_none() || result_precedence <= precedence {
                    result = Some((id, &s[start_char..end_char]));
                    result_precedence = precedence;
                }
            }
            if let Some(NfaTransition {
                states,
                is_separator,
                ..
            }) = cursor
                .transitions()
                .into_iter()
                .find(|t| t.characters.contains(c) && t.precedence >= result_precedence)
            {
                cursor.reset(states);
                end_char += c.len_utf8();
                if is_separator {
                    start_char = end_char;
                }
            } else {
                break;
            }
        }

        for (id, precedence) in cursor.completions() {
            if result.is_none() || result_precedence <= precedence {
                result = Some((id, &s[start_char..end_char]));
                result_precedence = precedence;
            }
        }

        result
    }

    #[test]
    fn test_rule_expansion() {
        struct Row {
            rules: Vec<Rule>,
            separators: Vec<Rule>,
            examples: Vec<(&'static str, Option<(usize, &'static str)>)>,
        }

        let table = [
            // regex with sequences and alternatives
            Row {
                rules: vec![Rule::pattern("(a|b|c)d(e|f|g)h?", "")],
                separators: vec![],
                examples: vec![
                    ("ade1", Some((0, "ade"))),
                    ("bdf1", Some((0, "bdf"))),
                    ("bdfh1", Some((0, "bdfh"))),
                    ("ad1", None),
                ],
            },
            // regex with repeats
            Row {
                rules: vec![Rule::pattern("a*", "")],
                separators: vec![],
                examples: vec![("aaa1", Some((0, "aaa"))), ("b", Some((0, "")))],
            },
            // regex with repeats in sequences
            Row {
                rules: vec![Rule::pattern("a((bc)+|(de)*)f", "")],
                separators: vec![],
                examples: vec![
                    ("af1", Some((0, "af"))),
                    ("adedef1", Some((0, "adedef"))),
                    ("abcbcbcf1", Some((0, "abcbcbcf"))),
                    ("a", None),
                ],
            },
            // regex with character ranges
            Row {
                rules: vec![Rule::pattern("[a-fA-F0-9]+", "")],
                separators: vec![],
                examples: vec![("A1ff0.", Some((0, "A1ff0")))],
            },
            // regex with perl character classes
            Row {
                rules: vec![Rule::pattern("\\w\\d\\s", "")],
                separators: vec![],
                examples: vec![("_0  ", Some((0, "_0 ")))],
            },
            // string
            Row {
                rules: vec![Rule::string("abc")],
                separators: vec![],
                examples: vec![("abcd", Some((0, "abc"))), ("ab", None)],
            },
            // complex rule containing strings and regexes
            Row {
                rules: vec![Rule::repeat(Rule::seq(vec![
                    Rule::string("{"),
                    Rule::pattern("[a-f]+", ""),
                    Rule::string("}"),
                ]))],
                separators: vec![],
                examples: vec![
                    ("{a}{", Some((0, "{a}"))),
                    ("{a}{d", Some((0, "{a}"))),
                    ("ab", None),
                ],
            },
            // longest match rule
            Row {
                rules: vec![
                    Rule::pattern("a|bc", ""),
                    Rule::pattern("aa", ""),
                    Rule::pattern("bcd", ""),
                ],
                separators: vec![],
                examples: vec![
                    ("a.", Some((0, "a"))),
                    ("bc.", Some((0, "bc"))),
                    ("aa.", Some((1, "aa"))),
                    ("bcd?", Some((2, "bcd"))),
                    ("b.", None),
                    ("c.", None),
                ],
            },
            // regex with an alternative including the empty string
            Row {
                rules: vec![Rule::pattern("a(b|)+c", "")],
                separators: vec![],
                examples: vec![
                    ("ac.", Some((0, "ac"))),
                    ("abc.", Some((0, "abc"))),
                    ("abbc.", Some((0, "abbc"))),
                ],
            },
            // separators
            Row {
                rules: vec![Rule::pattern("[a-f]+", "")],
                separators: vec![Rule::string("\\\n"), Rule::pattern("\\s", "")],
                examples: vec![
                    ("  a", Some((0, "a"))),
                    ("  \nb", Some((0, "b"))),
                    ("  \\a", None),
                    ("  \\\na", Some((0, "a"))),
                ],
            },
            // shorter tokens with higher precedence
            Row {
                rules: vec![
                    Rule::prec(Precedence::Integer(2), Rule::pattern("abc", "")),
                    Rule::prec(Precedence::Integer(1), Rule::pattern("ab[cd]e", "")),
                    Rule::pattern("[a-e]+", ""),
                ],
                separators: vec![Rule::string("\\\n"), Rule::pattern("\\s", "")],
                examples: vec![
                    ("abceef", Some((0, "abc"))),
                    ("abdeef", Some((1, "abde"))),
                    ("aeeeef", Some((2, "aeeee"))),
                ],
            },
            // immediate tokens with higher precedence
            Row {
                rules: vec![
                    Rule::prec(Precedence::Integer(1), Rule::pattern("[^a]+", "")),
                    Rule::immediate_token(Rule::prec(
                        Precedence::Integer(2),
                        Rule::pattern("[^ab]+", ""),
                    )),
                ],
                separators: vec![Rule::pattern("\\s", "")],
                examples: vec![("cccb", Some((1, "ccc")))],
            },
            Row {
                rules: vec![Rule::seq(vec![
                    Rule::string("a"),
                    Rule::choice(vec![Rule::string("b"), Rule::string("c")]),
                    Rule::string("d"),
                ])],
                separators: vec![],
                examples: vec![
                    ("abd", Some((0, "abd"))),
                    ("acd", Some((0, "acd"))),
                    ("abc", None),
                    ("ad", None),
                    ("d", None),
                    ("a", None),
                ],
            },
            // nested choices within sequences
            Row {
                rules: vec![Rule::seq(vec![
                    Rule::pattern("[0-9]+", ""),
                    Rule::choice(vec![
                        Rule::Blank,
                        Rule::choice(vec![Rule::seq(vec![
                            Rule::choice(vec![Rule::string("e"), Rule::string("E")]),
                            Rule::choice(vec![
                                Rule::Blank,
                                Rule::choice(vec![Rule::string("+"), Rule::string("-")]),
                            ]),
                            Rule::pattern("[0-9]+", ""),
                        ])]),
                    ]),
                ])],
                separators: vec![],
                examples: vec![
                    ("12", Some((0, "12"))),
                    ("12e", Some((0, "12"))),
                    ("12g", Some((0, "12"))),
                    ("12e3", Some((0, "12e3"))),
                    ("12e+", Some((0, "12"))),
                    ("12E+34 +", Some((0, "12E+34"))),
                    ("12e34", Some((0, "12e34"))),
                ],
            },
            // nested groups
            Row {
                rules: vec![Rule::seq(vec![Rule::pattern(r"([^x\\]|\\(.|\n))+", "")])],
                separators: vec![],
                examples: vec![("abcx", Some((0, "abc"))), ("abc\\0x", Some((0, "abc\\0")))],
            },
            // allowing unrecognized escape sequences
            Row {
                rules: vec![
                    // Escaped forward slash (used in JS because '/' is the regex delimiter)
                    Rule::pattern(r"\/", ""),
                    // Escaped quotes
                    Rule::pattern(r#"\"\'"#, ""),
                    // Quote preceded by a literal backslash
                    Rule::pattern(r"[\\']+", ""),
                ],
                separators: vec![],
                examples: vec![
                    ("/", Some((0, "/"))),
                    ("\"\'", Some((1, "\"\'"))),
                    (r"'\'a", Some((2, r"'\'"))),
                ],
            },
            // unicode property escapes
            Row {
                rules: vec![
                    Rule::pattern(r"\p{L}+\P{L}+", ""),
                    Rule::pattern(r"\p{White_Space}+\P{White_Space}+[\p{White_Space}]*", ""),
                ],
                separators: vec![],
                examples: vec![
                    ("  123   abc", Some((1, "  123   "))),
                    ("ბΨƁ___ƀƔ", Some((0, "ბΨƁ___"))),
                ],
            },
            // unicode property escapes in bracketed sets
            Row {
                rules: vec![Rule::pattern(r"[\p{L}\p{Nd}]+", "")],
                separators: vec![],
                examples: vec![("abΨ12٣٣, ok", Some((0, "abΨ12٣٣")))],
            },
            // unicode character escapes
            Row {
                rules: vec![
                    Rule::pattern(r"\u{00dc}", ""),
                    Rule::pattern(r"\U{000000dd}", ""),
                    Rule::pattern(r"\u00de", ""),
                    Rule::pattern(r"\U000000df", ""),
                ],
                separators: vec![],
                examples: vec![
                    ("\u{00dc}", Some((0, "\u{00dc}"))),
                    ("\u{00dd}", Some((1, "\u{00dd}"))),
                    ("\u{00de}", Some((2, "\u{00de}"))),
                    ("\u{00df}", Some((3, "\u{00df}"))),
                ],
            },
            Row {
                rules: vec![
                    Rule::pattern(r"u\{[0-9a-fA-F]+\}", ""),
                    // Already-escaped curly braces
                    Rule::pattern(r"\{[ab]{3}\}", ""),
                    // Unicode codepoints
                    Rule::pattern(r"\u{1000A}", ""),
                    // Unicode codepoints (lowercase)
                    Rule::pattern(r"\u{1000b}", ""),
                ],
                separators: vec![],
                examples: vec![
                    ("u{1234} ok", Some((0, "u{1234}"))),
                    ("{aba}}", Some((1, "{aba}"))),
                    ("\u{1000A}", Some((2, "\u{1000A}"))),
                    ("\u{1000b}", Some((3, "\u{1000b}"))),
                ],
            },
            // Emojis
            Row {
                rules: vec![Rule::pattern(r"\p{Emoji}+", "")],
                separators: vec![],
                examples: vec![
                    ("🐎", Some((0, "🐎"))),
                    ("🐴🐴", Some((0, "🐴🐴"))),
                    ("#0", Some((0, "#0"))), // These chars are technically emojis!
                    ("⻢", None),
                    ("♞", None),
                    ("horse", None),
                ],
            },
            // Intersection
            Row {
                rules: vec![Rule::pattern(r"[[0-7]&&[4-9]]+", "")],
                separators: vec![],
                examples: vec![
                    ("456", Some((0, "456"))),
                    ("64", Some((0, "64"))),
                    ("452", Some((0, "45"))),
                    ("91", None),
                    ("8", None),
                    ("3", None),
                ],
            },
            // Difference
            Row {
                rules: vec![Rule::pattern(r"[[0-9]--[4-7]]+", "")],
                separators: vec![],
                examples: vec![
                    ("123", Some((0, "123"))),
                    ("83", Some((0, "83"))),
                    ("9", Some((0, "9"))),
                    ("124", Some((0, "12"))),
                    ("67", None),
                    ("4", None),
                ],
            },
            // Symmetric difference
            Row {
                rules: vec![Rule::pattern(r"[[0-7]~~[4-9]]+", "")],
                separators: vec![],
                examples: vec![
                    ("123", Some((0, "123"))),
                    ("83", Some((0, "83"))),
                    ("9", Some((0, "9"))),
                    ("124", Some((0, "12"))),
                    ("67", None),
                    ("4", None),
                ],
            },
            // Nested set operations
            Row {
                //               0 1 2 3 4 5 6 7 8 9
                // [0-5]:        y y y y y y
                // [2-4]:            y y y
                // [0-5]--[2-4]: y y       y
                // [3-9]:              y y y y y y y
                // [6-7]:                    y y
                // [3-9]--[5-7]:       y y y     y y
                // final regex:  y y   y y       y y
                rules: vec![Rule::pattern(r"[[[0-5]--[2-4]]~~[[3-9]--[6-7]]]+", "")],
                separators: vec![],
                examples: vec![
                    ("01", Some((0, "01"))),
                    ("432", Some((0, "43"))),
                    ("8", Some((0, "8"))),
                    ("9", Some((0, "9"))),
                    ("2", None),
                    ("567", None),
                ],
            },
        ];

        for Row {
            rules,
            separators,
            examples,
        } in &table
        {
            let grammar = expand_tokens(ExtractedLexicalGrammar {
                separators: separators.clone(),
                variables: rules
                    .iter()
                    .map(|rule| Variable::named("", rule.clone()))
                    .collect(),
            })
            .unwrap();

            for (haystack, needle) in examples {
                assert_eq!(simulate_nfa(&grammar, haystack), *needle);
            }
        }
    }
}



================================================
FILE: crates/generate/src/prepare_grammar/extract_default_aliases.rs
================================================
use crate::{
    grammars::{LexicalGrammar, SyntaxGrammar},
    rules::{Alias, AliasMap, Symbol, SymbolType},
};

#[derive(Clone, Default)]
struct SymbolStatus {
    aliases: Vec<(Alias, usize)>,
    appears_unaliased: bool,
}

// Update the grammar by finding symbols that always are aliased, and for each such symbol,
// promoting one of its aliases to a "default alias", which is applied globally instead
// of in a context-specific way.
//
// This has two benefits:
// * It reduces the overhead of storing production-specific alias info in the parse table.
// * Within an `ERROR` node, no context-specific aliases will be applied. This transformation
//   ensures that the children of an `ERROR` node have symbols that are consistent with the way that
//   they would appear in a valid syntax tree.
pub(super) fn extract_default_aliases(
    syntax_grammar: &mut SyntaxGrammar,
    lexical_grammar: &LexicalGrammar,
) -> AliasMap {
    let mut terminal_status_list = vec![SymbolStatus::default(); lexical_grammar.variables.len()];
    let mut non_terminal_status_list =
        vec![SymbolStatus::default(); syntax_grammar.variables.len()];
    let mut external_status_list =
        vec![SymbolStatus::default(); syntax_grammar.external_tokens.len()];

    // For each grammar symbol, find all of the aliases under which the symbol appears,
    // and determine whether or not the symbol ever appears *unaliased*.
    for variable in &syntax_grammar.variables {
        for production in &variable.productions {
            for step in &production.steps {
                let status = match step.symbol.kind {
                    SymbolType::External => &mut external_status_list[step.symbol.index],
                    SymbolType::NonTerminal => &mut non_terminal_status_list[step.symbol.index],
                    SymbolType::Terminal => &mut terminal_status_list[step.symbol.index],
                    SymbolType::End | SymbolType::EndOfNonTerminalExtra => {
                        panic!("Unexpected end token")
                    }
                };

                // Default aliases don't work for inlined variables.
                if syntax_grammar.variables_to_inline.contains(&step.symbol) {
                    continue;
                }

                if let Some(alias) = &step.alias {
                    if let Some(count_for_alias) = status
                        .aliases
                        .iter_mut()
                        .find_map(|(a, count)| if a == alias { Some(count) } else { None })
                    {
                        *count_for_alias += 1;
                    } else {
                        status.aliases.push((alias.clone(), 1));
                    }
                } else {
                    status.appears_unaliased = true;
                }
            }
        }
    }

    for symbol in &syntax_grammar.extra_symbols {
        let status = match symbol.kind {
            SymbolType::External => &mut external_status_list[symbol.index],
            SymbolType::NonTerminal => &mut non_terminal_status_list[symbol.index],
            SymbolType::Terminal => &mut terminal_status_list[symbol.index],
            SymbolType::End | SymbolType::EndOfNonTerminalExtra => {
                panic!("Unexpected end token")
            }
        };
        status.appears_unaliased = true;
    }

    let symbols_with_statuses = (terminal_status_list
        .iter_mut()
        .enumerate()
        .map(|(i, status)| (Symbol::terminal(i), status)))
    .chain(
        non_terminal_status_list
            .iter_mut()
            .enumerate()
            .map(|(i, status)| (Symbol::non_terminal(i), status)),
    )
    .chain(
        external_status_list
            .iter_mut()
            .enumerate()
            .map(|(i, status)| (Symbol::external(i), status)),
    );

    // For each symbol that always appears aliased, find the alias the occurs most often,
    // and designate that alias as the symbol's "default alias". Store all of these
    // default aliases in a map that will be returned.
    let mut result = AliasMap::new();
    for (symbol, status) in symbols_with_statuses {
        if status.appears_unaliased {
            status.aliases.clear();
        } else if let Some(default_entry) = status
            .aliases
            .iter()
            .enumerate()
            .max_by_key(|(i, (_, count))| (count, -(*i as i64)))
            .map(|(_, entry)| entry.clone())
        {
            status.aliases.clear();
            status.aliases.push(default_entry.clone());
            result.insert(symbol, default_entry.0);
        }
    }

    // Wherever a symbol is aliased as its default alias, remove the usage of the alias,
    // because it will now be redundant.
    let mut alias_positions_to_clear = Vec::new();
    for variable in &mut syntax_grammar.variables {
        alias_positions_to_clear.clear();

        for (i, production) in variable.productions.iter().enumerate() {
            for (j, step) in production.steps.iter().enumerate() {
                let status = match step.symbol.kind {
                    SymbolType::External => &mut external_status_list[step.symbol.index],
                    SymbolType::NonTerminal => &mut non_terminal_status_list[step.symbol.index],
                    SymbolType::Terminal => &mut terminal_status_list[step.symbol.index],
                    SymbolType::End | SymbolType::EndOfNonTerminalExtra => {
                        panic!("Unexpected end token")
                    }
                };

                // If this step is aliased as the symbol's default alias, then remove that alias.
                if step.alias.is_some()
                    && step.alias.as_ref() == status.aliases.first().map(|t| &t.0)
                {
                    let mut other_productions_must_use_this_alias_at_this_index = false;
                    for (other_i, other_production) in variable.productions.iter().enumerate() {
                        if other_i != i
                            && other_production.steps.len() > j
                            && other_production.steps[j].alias == step.alias
                            && result.get(&other_production.steps[j].symbol) != step.alias.as_ref()
                        {
                            other_productions_must_use_this_alias_at_this_index = true;
                            break;
                        }
                    }

                    if !other_productions_must_use_this_alias_at_this_index {
                        alias_positions_to_clear.push((i, j));
                    }
                }
            }
        }

        for (production_index, step_index) in &alias_positions_to_clear {
            variable.productions[*production_index].steps[*step_index].alias = None;
        }
    }

    result
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::{
        grammars::{LexicalVariable, Production, ProductionStep, SyntaxVariable, VariableType},
        nfa::Nfa,
    };

    #[test]
    fn test_extract_simple_aliases() {
        let mut syntax_grammar = SyntaxGrammar {
            variables: vec![
                SyntaxVariable {
                    name: "v1".to_owned(),
                    kind: VariableType::Named,
                    productions: vec![Production {
                        dynamic_precedence: 0,
                        steps: vec![
                            ProductionStep::new(Symbol::terminal(0)).with_alias("a1", true),
                            ProductionStep::new(Symbol::terminal(1)).with_alias("a2", true),
                            ProductionStep::new(Symbol::terminal(2)).with_alias("a3", true),
                            ProductionStep::new(Symbol::terminal(3)).with_alias("a4", true),
                        ],
                    }],
                },
                SyntaxVariable {
                    name: "v2".to_owned(),
                    kind: VariableType::Named,
                    productions: vec![Production {
                        dynamic_precedence: 0,
                        steps: vec![
                            // Token 0 is always aliased as "a1".
                            ProductionStep::new(Symbol::terminal(0)).with_alias("a1", true),
                            // Token 1 is aliased within rule `v1` above, but not here.
                            ProductionStep::new(Symbol::terminal(1)),
                            // Token 2 is aliased differently here than in `v1`. The alias from
                            // `v1` should be promoted to the default alias, because `v1` appears
                            // first in the grammar.
                            ProductionStep::new(Symbol::terminal(2)).with_alias("a5", true),
                            // Token 3 is also aliased differently here than in `v1`. In this case,
                            // this alias should be promoted to the default alias, because it is
                            // used a greater number of times (twice).
                            ProductionStep::new(Symbol::terminal(3)).with_alias("a6", true),
                            ProductionStep::new(Symbol::terminal(3)).with_alias("a6", true),
                        ],
                    }],
                },
            ],
            ..Default::default()
        };

        let lexical_grammar = LexicalGrammar {
            nfa: Nfa::new(),
            variables: vec![
                LexicalVariable {
                    name: "t0".to_string(),
                    kind: VariableType::Anonymous,
                    implicit_precedence: 0,
                    start_state: 0,
                },
                LexicalVariable {
                    name: "t1".to_string(),
                    kind: VariableType::Anonymous,
                    implicit_precedence: 0,
                    start_state: 0,
                },
                LexicalVariable {
                    name: "t2".to_string(),
                    kind: VariableType::Anonymous,
                    implicit_precedence: 0,
                    start_state: 0,
                },
                LexicalVariable {
                    name: "t3".to_string(),
                    kind: VariableType::Anonymous,
                    implicit_precedence: 0,
                    start_state: 0,
                },
            ],
        };

        let default_aliases = extract_default_aliases(&mut syntax_grammar, &lexical_grammar);
        assert_eq!(default_aliases.len(), 3);

        assert_eq!(
            default_aliases.get(&Symbol::terminal(0)),
            Some(&Alias {
                value: "a1".to_string(),
                is_named: true,
            })
        );
        assert_eq!(
            default_aliases.get(&Symbol::terminal(2)),
            Some(&Alias {
                value: "a3".to_string(),
                is_named: true,
            })
        );
        assert_eq!(
            default_aliases.get(&Symbol::terminal(3)),
            Some(&Alias {
                value: "a6".to_string(),
                is_named: true,
            })
        );
        assert_eq!(default_aliases.get(&Symbol::terminal(1)), None);

        assert_eq!(
            syntax_grammar.variables,
            vec![
                SyntaxVariable {
                    name: "v1".to_owned(),
                    kind: VariableType::Named,
                    productions: vec![Production {
                        dynamic_precedence: 0,
                        steps: vec![
                            ProductionStep::new(Symbol::terminal(0)),
                            ProductionStep::new(Symbol::terminal(1)).with_alias("a2", true),
                            ProductionStep::new(Symbol::terminal(2)),
                            ProductionStep::new(Symbol::terminal(3)).with_alias("a4", true),
                        ],
                    },],
                },
                SyntaxVariable {
                    name: "v2".to_owned(),
                    kind: VariableType::Named,
                    productions: vec![Production {
                        dynamic_precedence: 0,
                        steps: vec![
                            ProductionStep::new(Symbol::terminal(0)),
                            ProductionStep::new(Symbol::terminal(1)),
                            ProductionStep::new(Symbol::terminal(2)).with_alias("a5", true),
                            ProductionStep::new(Symbol::terminal(3)),
                            ProductionStep::new(Symbol::terminal(3)),
                        ],
                    },],
                },
            ]
        );
    }
}



================================================
FILE: crates/generate/src/prepare_grammar/extract_tokens.rs
================================================
use std::collections::HashMap;

use anyhow::Result;
use serde::Serialize;
use thiserror::Error;

use super::{ExtractedLexicalGrammar, ExtractedSyntaxGrammar, InternedGrammar};
use crate::{
    grammars::{ExternalToken, ReservedWordContext, Variable, VariableType},
    rules::{MetadataParams, Rule, Symbol, SymbolType},
};

pub type ExtractTokensResult<T> = Result<T, ExtractTokensError>;

#[derive(Debug, Error, Serialize)]
pub enum ExtractTokensError {
    #[error(
        "The rule `{0}` contains an empty string.

Tree-sitter does not support syntactic rules that contain an empty string
unless they are used only as the grammar's start rule.
"
    )]
    EmptyString(String),
    #[error("Rule '{0}' cannot be used as both an external token and a non-terminal rule")]
    ExternalTokenNonTerminal(String),
    #[error("Non-symbol rules cannot be used as external tokens")]
    NonSymbolExternalToken,
    #[error(transparent)]
    WordToken(NonTerminalWordTokenError),
    #[error("Reserved word '{0}' must be a token")]
    NonTokenReservedWord(String),
}

#[derive(Debug, Error, Serialize)]
pub struct NonTerminalWordTokenError {
    pub symbol_name: String,
    pub conflicting_symbol_name: Option<String>,
}

impl std::fmt::Display for NonTerminalWordTokenError {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(
            f,
            "Non-terminal symbol '{}' cannot be used as the word token",
            self.symbol_name
        )?;
        if let Some(conflicting_name) = &self.conflicting_symbol_name {
            writeln!(
                f,
                ", because its rule is duplicated in '{conflicting_name}'",
            )
        } else {
            writeln!(f)
        }
    }
}

pub(super) fn extract_tokens(
    mut grammar: InternedGrammar,
) -> ExtractTokensResult<(ExtractedSyntaxGrammar, ExtractedLexicalGrammar)> {
    let mut extractor = TokenExtractor {
        current_variable_name: String::new(),
        current_variable_token_count: 0,
        is_first_rule: false,
        extracted_variables: Vec::new(),
        extracted_usage_counts: Vec::new(),
    };

    for (i, variable) in &mut grammar.variables.iter_mut().enumerate() {
        extractor.extract_tokens_in_variable(i == 0, variable)?;
    }

    for variable in &mut grammar.external_tokens {
        extractor.extract_tokens_in_variable(false, variable)?;
    }

    let mut lexical_variables = Vec::with_capacity(extractor.extracted_variables.len());
    for variable in extractor.extracted_variables {
        lexical_variables.push(variable);
    }

    // If a variable's entire rule was extracted as a token and that token didn't
    // appear within any other rule, then remove that variable from the syntax
    // grammar, giving its name to the token in the lexical grammar. Any symbols
    // that pointed to that variable will need to be updated to point to the
    // variable in the lexical grammar. Symbols that pointed to later variables
    // will need to have their indices decremented.
    let mut variables = Vec::with_capacity(grammar.variables.len());
    let mut symbol_replacer = SymbolReplacer {
        replacements: HashMap::new(),
    };
    for (i, variable) in grammar.variables.into_iter().enumerate() {
        if let Rule::Symbol(Symbol {
            kind: SymbolType::Terminal,
            index,
        }) = variable.rule
        {
            if i > 0 && extractor.extracted_usage_counts[index] == 1 {
                let lexical_variable = &mut lexical_variables[index];
                if lexical_variable.kind == VariableType::Auxiliary
                    || variable.kind != VariableType::Hidden
                {
                    lexical_variable.kind = variable.kind;
                    lexical_variable.name = variable.name;
                    symbol_replacer.replacements.insert(i, index);
                    continue;
                }
            }
        }
        variables.push(variable);
    }

    for variable in &mut variables {
        variable.rule = symbol_replacer.replace_symbols_in_rule(&variable.rule);
    }

    let expected_conflicts = grammar
        .expected_conflicts
        .into_iter()
        .map(|conflict| {
            let mut result = conflict
                .iter()
                .map(|symbol| symbol_replacer.replace_symbol(*symbol))
                .collect::<Vec<_>>();
            result.sort_unstable();
            result.dedup();
            result
        })
        .collect();

    let supertype_symbols = grammar
        .supertype_symbols
        .into_iter()
        .map(|symbol| symbol_replacer.replace_symbol(symbol))
        .collect();

    let variables_to_inline = grammar
        .variables_to_inline
        .into_iter()
        .map(|symbol| symbol_replacer.replace_symbol(symbol))
        .collect();

    let mut separators = Vec::new();
    let mut extra_symbols = Vec::new();
    for rule in grammar.extra_symbols {
        if let Rule::Symbol(symbol) = rule {
            extra_symbols.push(symbol_replacer.replace_symbol(symbol));
        } else if let Some(index) = lexical_variables.iter().position(|v| v.rule == rule) {
            extra_symbols.push(Symbol::terminal(index));
        } else {
            separators.push(rule);
        }
    }

    let mut external_tokens = Vec::new();
    for external_token in grammar.external_tokens {
        let rule = symbol_replacer.replace_symbols_in_rule(&external_token.rule);
        if let Rule::Symbol(symbol) = rule {
            if symbol.is_non_terminal() {
                Err(ExtractTokensError::ExternalTokenNonTerminal(
                    variables[symbol.index].name.clone(),
                ))?;
            }

            if symbol.is_external() {
                external_tokens.push(ExternalToken {
                    name: external_token.name,
                    kind: external_token.kind,
                    corresponding_internal_token: None,
                });
            } else {
                external_tokens.push(ExternalToken {
                    name: lexical_variables[symbol.index].name.clone(),
                    kind: external_token.kind,
                    corresponding_internal_token: Some(symbol),
                });
            }
        } else {
            Err(ExtractTokensError::NonSymbolExternalToken)?;
        }
    }

    let word_token = if let Some(token) = grammar.word_token {
        let token = symbol_replacer.replace_symbol(token);
        if token.is_non_terminal() {
            let word_token_variable = &variables[token.index];
            let conflicting_symbol_name = variables
                .iter()
                .enumerate()
                .find(|(i, v)| *i != token.index && v.rule == word_token_variable.rule)
                .map(|(_, v)| v.name.clone());

            Err(ExtractTokensError::WordToken(NonTerminalWordTokenError {
                symbol_name: word_token_variable.name.clone(),
                conflicting_symbol_name,
            }))?;
        }
        Some(token)
    } else {
        None
    };

    let mut reserved_word_contexts = Vec::with_capacity(grammar.reserved_word_sets.len());
    for reserved_word_context in grammar.reserved_word_sets {
        let mut reserved_words = Vec::with_capacity(reserved_word_contexts.len());
        for reserved_rule in reserved_word_context.reserved_words {
            if let Rule::Symbol(symbol) = reserved_rule {
                reserved_words.push(symbol_replacer.replace_symbol(symbol));
            } else if let Some(index) = lexical_variables
                .iter()
                .position(|v| v.rule == reserved_rule)
            {
                reserved_words.push(Symbol::terminal(index));
            } else {
                let token_name = match &reserved_rule {
                    Rule::String(s) => s.clone(),
                    Rule::Pattern(p, _) => p.clone(),
                    _ => "unknown".to_string(),
                };
                Err(ExtractTokensError::NonTokenReservedWord(token_name))?;
            }
        }
        reserved_word_contexts.push(ReservedWordContext {
            name: reserved_word_context.name,
            reserved_words,
        });
    }

    Ok((
        ExtractedSyntaxGrammar {
            variables,
            expected_conflicts,
            extra_symbols,
            variables_to_inline,
            supertype_symbols,
            external_tokens,
            word_token,
            precedence_orderings: grammar.precedence_orderings,
            reserved_word_sets: reserved_word_contexts,
        },
        ExtractedLexicalGrammar {
            variables: lexical_variables,
            separators,
        },
    ))
}

struct TokenExtractor {
    current_variable_name: String,
    current_variable_token_count: usize,
    is_first_rule: bool,
    extracted_variables: Vec<Variable>,
    extracted_usage_counts: Vec<usize>,
}

struct SymbolReplacer {
    replacements: HashMap<usize, usize>,
}

impl TokenExtractor {
    fn extract_tokens_in_variable(
        &mut self,
        is_first: bool,
        variable: &mut Variable,
    ) -> ExtractTokensResult<()> {
        self.current_variable_name.clear();
        self.current_variable_name.push_str(&variable.name);
        self.current_variable_token_count = 0;
        self.is_first_rule = is_first;
        variable.rule = self.extract_tokens_in_rule(&variable.rule)?;
        Ok(())
    }

    fn extract_tokens_in_rule(&mut self, input: &Rule) -> ExtractTokensResult<Rule> {
        match input {
            Rule::String(name) => Ok(self.extract_token(input, Some(name))?.into()),
            Rule::Pattern(..) => Ok(self.extract_token(input, None)?.into()),
            Rule::Metadata { params, rule } => {
                if params.is_token {
                    let mut params = params.clone();
                    params.is_token = false;

                    let string_value = if let Rule::String(value) = rule.as_ref() {
                        Some(value)
                    } else {
                        None
                    };

                    let rule_to_extract = if params == MetadataParams::default() {
                        rule.as_ref()
                    } else {
                        input
                    };

                    Ok(self.extract_token(rule_to_extract, string_value)?.into())
                } else {
                    Ok(Rule::Metadata {
                        params: params.clone(),
                        rule: Box::new(self.extract_tokens_in_rule(rule)?),
                    })
                }
            }
            Rule::Repeat(content) => Ok(Rule::Repeat(Box::new(
                self.extract_tokens_in_rule(content)?,
            ))),
            Rule::Seq(elements) => Ok(Rule::Seq(
                elements
                    .iter()
                    .map(|e| self.extract_tokens_in_rule(e))
                    .collect::<ExtractTokensResult<Vec<_>>>()?,
            )),
            Rule::Choice(elements) => Ok(Rule::Choice(
                elements
                    .iter()
                    .map(|e| self.extract_tokens_in_rule(e))
                    .collect::<ExtractTokensResult<Vec<_>>>()?,
            )),
            Rule::Reserved { rule, context_name } => Ok(Rule::Reserved {
                rule: Box::new(self.extract_tokens_in_rule(rule)?),
                context_name: context_name.clone(),
            }),
            _ => Ok(input.clone()),
        }
    }

    fn extract_token(
        &mut self,
        rule: &Rule,
        string_value: Option<&String>,
    ) -> ExtractTokensResult<Symbol> {
        for (i, variable) in self.extracted_variables.iter_mut().enumerate() {
            if variable.rule == *rule {
                self.extracted_usage_counts[i] += 1;
                return Ok(Symbol::terminal(i));
            }
        }

        let index = self.extracted_variables.len();
        let variable = if let Some(string_value) = string_value {
            if string_value.is_empty() && !self.is_first_rule {
                Err(ExtractTokensError::EmptyString(
                    self.current_variable_name.clone(),
                ))?;
            }
            Variable {
                name: string_value.clone(),
                kind: VariableType::Anonymous,
                rule: rule.clone(),
            }
        } else {
            self.current_variable_token_count += 1;
            Variable {
                name: format!(
                    "{}_token{}",
                    self.current_variable_name, self.current_variable_token_count
                ),
                kind: VariableType::Auxiliary,
                rule: rule.clone(),
            }
        };

        self.extracted_variables.push(variable);
        self.extracted_usage_counts.push(1);
        Ok(Symbol::terminal(index))
    }
}

impl SymbolReplacer {
    fn replace_symbols_in_rule(&mut self, rule: &Rule) -> Rule {
        match rule {
            Rule::Symbol(symbol) => self.replace_symbol(*symbol).into(),
            Rule::Choice(elements) => Rule::Choice(
                elements
                    .iter()
                    .map(|e| self.replace_symbols_in_rule(e))
                    .collect(),
            ),
            Rule::Seq(elements) => Rule::Seq(
                elements
                    .iter()
                    .map(|e| self.replace_symbols_in_rule(e))
                    .collect(),
            ),
            Rule::Repeat(content) => Rule::Repeat(Box::new(self.replace_symbols_in_rule(content))),
            Rule::Metadata { rule, params } => Rule::Metadata {
                params: params.clone(),
                rule: Box::new(self.replace_symbols_in_rule(rule)),
            },
            Rule::Reserved { rule, context_name } => Rule::Reserved {
                rule: Box::new(self.replace_symbols_in_rule(rule)),
                context_name: context_name.clone(),
            },
            _ => rule.clone(),
        }
    }

    fn replace_symbol(&self, symbol: Symbol) -> Symbol {
        if !symbol.is_non_terminal() {
            return symbol;
        }

        if let Some(replacement) = self.replacements.get(&symbol.index) {
            return Symbol::terminal(*replacement);
        }

        let mut adjusted_index = symbol.index;
        for replaced_index in self.replacements.keys() {
            if *replaced_index < symbol.index {
                adjusted_index -= 1;
            }
        }

        Symbol::non_terminal(adjusted_index)
    }
}

#[cfg(test)]
mod test {
    use super::*;

    #[test]
    fn test_extraction() {
        let (syntax_grammar, lexical_grammar) = extract_tokens(build_grammar(vec![
            Variable::named(
                "rule_0",
                Rule::repeat(Rule::seq(vec![
                    Rule::string("a"),
                    Rule::pattern("b", ""),
                    Rule::choice(vec![
                        Rule::non_terminal(1),
                        Rule::non_terminal(2),
                        Rule::token(Rule::repeat(Rule::choice(vec![
                            Rule::string("c"),
                            Rule::string("d"),
                        ]))),
                    ]),
                ])),
            ),
            Variable::named("rule_1", Rule::pattern("e", "")),
            Variable::named("rule_2", Rule::pattern("b", "")),
            Variable::named(
                "rule_3",
                Rule::seq(vec![Rule::non_terminal(2), Rule::Blank]),
            ),
        ]))
        .unwrap();

        assert_eq!(
            syntax_grammar.variables,
            vec![
                Variable::named(
                    "rule_0",
                    Rule::repeat(Rule::seq(vec![
                        // The string "a" was replaced by a symbol referencing the lexical grammar
                        Rule::terminal(0),
                        // The pattern "b" was replaced by a symbol referencing the lexical grammar
                        Rule::terminal(1),
                        Rule::choice(vec![
                            // The symbol referencing `rule_1` was replaced by a symbol referencing
                            // the lexical grammar.
                            Rule::terminal(3),
                            // The symbol referencing `rule_2` had its index decremented because
                            // `rule_1` was moved to the lexical grammar.
                            Rule::non_terminal(1),
                            // The rule wrapped in `token` was replaced by a symbol referencing
                            // the lexical grammar.
                            Rule::terminal(2),
                        ])
                    ]))
                ),
                // The pattern "e" was only used in once place: as the definition of `rule_1`,
                // so that rule was moved to the lexical grammar. The pattern "b" appeared in
                // two places, so it was not moved into the lexical grammar.
                Variable::named("rule_2", Rule::terminal(1)),
                Variable::named(
                    "rule_3",
                    Rule::seq(vec![Rule::non_terminal(1), Rule::Blank,])
                ),
            ]
        );

        assert_eq!(
            lexical_grammar.variables,
            vec![
                Variable::anonymous("a", Rule::string("a")),
                Variable::auxiliary("rule_0_token1", Rule::pattern("b", "")),
                Variable::auxiliary(
                    "rule_0_token2",
                    Rule::repeat(Rule::choice(vec![Rule::string("c"), Rule::string("d"),]))
                ),
                Variable::named("rule_1", Rule::pattern("e", "")),
            ]
        );
    }

    #[test]
    fn test_start_rule_is_token() {
        let (syntax_grammar, lexical_grammar) =
            extract_tokens(build_grammar(vec![Variable::named(
                "rule_0",
                Rule::string("hello"),
            )]))
            .unwrap();

        assert_eq!(
            syntax_grammar.variables,
            vec![Variable::named("rule_0", Rule::terminal(0)),]
        );
        assert_eq!(
            lexical_grammar.variables,
            vec![Variable::anonymous("hello", Rule::string("hello")),]
        );
    }

    #[test]
    fn test_extracting_extra_symbols() {
        let mut grammar = build_grammar(vec![
            Variable::named("rule_0", Rule::string("x")),
            Variable::named("comment", Rule::pattern("//.*", "")),
        ]);
        grammar.extra_symbols = vec![Rule::string(" "), Rule::non_terminal(1)];

        let (syntax_grammar, lexical_grammar) = extract_tokens(grammar).unwrap();
        assert_eq!(syntax_grammar.extra_symbols, vec![Symbol::terminal(1),]);
        assert_eq!(lexical_grammar.separators, vec![Rule::string(" "),]);
    }

    #[test]
    fn test_extract_externals() {
        let mut grammar = build_grammar(vec![
            Variable::named(
                "rule_0",
                Rule::seq(vec![
                    Rule::external(0),
                    Rule::string("a"),
                    Rule::non_terminal(1),
                    Rule::non_terminal(2),
                ]),
            ),
            Variable::named("rule_1", Rule::string("b")),
            Variable::named("rule_2", Rule::string("c")),
        ]);
        grammar.external_tokens = vec![
            Variable::named("external_0", Rule::external(0)),
            Variable::anonymous("a", Rule::string("a")),
            Variable::named("rule_2", Rule::non_terminal(2)),
        ];

        let (syntax_grammar, _) = extract_tokens(grammar).unwrap();

        assert_eq!(
            syntax_grammar.external_tokens,
            vec![
                ExternalToken {
                    name: "external_0".to_string(),
                    kind: VariableType::Named,
                    corresponding_internal_token: None,
                },
                ExternalToken {
                    name: "a".to_string(),
                    kind: VariableType::Anonymous,
                    corresponding_internal_token: Some(Symbol::terminal(0)),
                },
                ExternalToken {
                    name: "rule_2".to_string(),
                    kind: VariableType::Named,
                    corresponding_internal_token: Some(Symbol::terminal(2)),
                },
            ]
        );
    }

    #[test]
    fn test_error_on_external_with_same_name_as_non_terminal() {
        let mut grammar = build_grammar(vec![
            Variable::named(
                "rule_0",
                Rule::seq(vec![Rule::non_terminal(1), Rule::non_terminal(2)]),
            ),
            Variable::named(
                "rule_1",
                Rule::seq(vec![Rule::non_terminal(2), Rule::non_terminal(2)]),
            ),
            Variable::named("rule_2", Rule::string("a")),
        ]);
        grammar.external_tokens = vec![Variable::named("rule_1", Rule::non_terminal(1))];

        match extract_tokens(grammar) {
            Err(e) => {
                assert_eq!(e.to_string(), "Rule 'rule_1' cannot be used as both an external token and a non-terminal rule");
            }
            _ => {
                panic!("Expected an error but got no error");
            }
        }
    }

    #[test]
    fn test_extraction_on_hidden_terminal() {
        let (syntax_grammar, lexical_grammar) = extract_tokens(build_grammar(vec![
            Variable::named("rule_0", Rule::non_terminal(1)),
            Variable::hidden("_rule_1", Rule::string("a")),
        ]))
        .unwrap();

        // The rule `_rule_1` should not "absorb" the
        // terminal "a", since it is hidden,
        // so we expect two variables still
        assert_eq!(
            syntax_grammar.variables,
            vec![
                Variable::named("rule_0", Rule::non_terminal(1)),
                Variable::hidden("_rule_1", Rule::terminal(0)),
            ]
        );

        // We should not have a hidden rule in our lexical grammar, only the terminal "a"
        assert_eq!(
            lexical_grammar.variables,
            vec![Variable::anonymous("a", Rule::string("a"))]
        );
    }

    #[test]
    fn test_extraction_with_empty_string() {
        assert!(extract_tokens(build_grammar(vec![
            Variable::named("rule_0", Rule::non_terminal(1)),
            Variable::hidden("_rule_1", Rule::string("")),
        ]))
        .is_err());
    }

    fn build_grammar(variables: Vec<Variable>) -> InternedGrammar {
        InternedGrammar {
            variables,
            ..Default::default()
        }
    }
}



================================================
FILE: crates/generate/src/prepare_grammar/flatten_grammar.rs
================================================
use std::collections::HashMap;

use anyhow::Result;
use serde::Serialize;
use thiserror::Error;

use super::ExtractedSyntaxGrammar;
use crate::{
    grammars::{
        Production, ProductionStep, ReservedWordSetId, SyntaxGrammar, SyntaxVariable, Variable,
    },
    rules::{Alias, Associativity, Precedence, Rule, Symbol, TokenSet},
};

pub type FlattenGrammarResult<T> = Result<T, FlattenGrammarError>;

#[derive(Debug, Error, Serialize)]
pub enum FlattenGrammarError {
    #[error("No such reserved word set: {0}")]
    NoReservedWordSet(String),
    #[error(
        "The rule `{0}` matches the empty string.

Tree-sitter does not support syntactic rules that match the empty string
unless they are used only as the grammar's start rule.
"
    )]
    EmptyString(String),
    #[error("Rule `{0}` cannot be inlined because it contains a reference to itself")]
    RecursiveInline(String),
}

struct RuleFlattener {
    production: Production,
    reserved_word_set_ids: HashMap<String, ReservedWordSetId>,
    precedence_stack: Vec<Precedence>,
    associativity_stack: Vec<Associativity>,
    reserved_word_stack: Vec<ReservedWordSetId>,
    alias_stack: Vec<Alias>,
    field_name_stack: Vec<String>,
}

impl RuleFlattener {
    const fn new(reserved_word_set_ids: HashMap<String, ReservedWordSetId>) -> Self {
        Self {
            production: Production {
                steps: Vec::new(),
                dynamic_precedence: 0,
            },
            reserved_word_set_ids,
            precedence_stack: Vec::new(),
            associativity_stack: Vec::new(),
            reserved_word_stack: Vec::new(),
            alias_stack: Vec::new(),
            field_name_stack: Vec::new(),
        }
    }

    fn flatten_variable(&mut self, variable: Variable) -> FlattenGrammarResult<SyntaxVariable> {
        let choices = extract_choices(variable.rule);
        let mut productions = Vec::with_capacity(choices.len());
        for rule in choices {
            let production = self.flatten_rule(rule)?;
            if !productions.contains(&production) {
                productions.push(production);
            }
        }
        Ok(SyntaxVariable {
            name: variable.name,
            kind: variable.kind,
            productions,
        })
    }

    fn flatten_rule(&mut self, rule: Rule) -> FlattenGrammarResult<Production> {
        self.production = Production::default();
        self.alias_stack.clear();
        self.reserved_word_stack.clear();
        self.precedence_stack.clear();
        self.associativity_stack.clear();
        self.field_name_stack.clear();
        self.apply(rule, true)?;
        Ok(self.production.clone())
    }

    fn apply(&mut self, rule: Rule, at_end: bool) -> FlattenGrammarResult<bool> {
        match rule {
            Rule::Seq(members) => {
                let mut result = false;
                let last_index = members.len() - 1;
                for (i, member) in members.into_iter().enumerate() {
                    result |= self.apply(member, i == last_index && at_end)?;
                }
                Ok(result)
            }
            Rule::Metadata { rule, params } => {
                let mut has_precedence = false;
                if !params.precedence.is_none() {
                    has_precedence = true;
                    self.precedence_stack.push(params.precedence);
                }

                let mut has_associativity = false;
                if let Some(associativity) = params.associativity {
                    has_associativity = true;
                    self.associativity_stack.push(associativity);
                }

                let mut has_alias = false;
                if let Some(alias) = params.alias {
                    has_alias = true;
                    self.alias_stack.push(alias);
                }

                let mut has_field_name = false;
                if let Some(field_name) = params.field_name {
                    has_field_name = true;
                    self.field_name_stack.push(field_name);
                }

                if params.dynamic_precedence.abs() > self.production.dynamic_precedence.abs() {
                    self.production.dynamic_precedence = params.dynamic_precedence;
                }

                let did_push = self.apply(*rule, at_end)?;

                if has_precedence {
                    self.precedence_stack.pop();
                    if did_push && !at_end {
                        self.production.steps.last_mut().unwrap().precedence = self
                            .precedence_stack
                            .last()
                            .cloned()
                            .unwrap_or(Precedence::None);
                    }
                }

                if has_associativity {
                    self.associativity_stack.pop();
                    if did_push && !at_end {
                        self.production.steps.last_mut().unwrap().associativity =
                            self.associativity_stack.last().copied();
                    }
                }

                if has_alias {
                    self.alias_stack.pop();
                }

                if has_field_name {
                    self.field_name_stack.pop();
                }

                Ok(did_push)
            }
            Rule::Reserved { rule, context_name } => {
                self.reserved_word_stack.push(
                    self.reserved_word_set_ids
                        .get(&context_name)
                        .copied()
                        .ok_or_else(|| {
                            FlattenGrammarError::NoReservedWordSet(context_name.clone())
                        })?,
                );
                let did_push = self.apply(*rule, at_end)?;
                self.reserved_word_stack.pop();
                Ok(did_push)
            }
            Rule::Symbol(symbol) => {
                self.production.steps.push(ProductionStep {
                    symbol,
                    precedence: self
                        .precedence_stack
                        .last()
                        .cloned()
                        .unwrap_or(Precedence::None),
                    associativity: self.associativity_stack.last().copied(),
                    reserved_word_set_id: self
                        .reserved_word_stack
                        .last()
                        .copied()
                        .unwrap_or(ReservedWordSetId::default()),
                    alias: self.alias_stack.last().cloned(),
                    field_name: self.field_name_stack.last().cloned(),
                });
                Ok(true)
            }
            _ => Ok(false),
        }
    }
}

fn extract_choices(rule: Rule) -> Vec<Rule> {
    match rule {
        Rule::Seq(elements) => {
            let mut result = vec![Rule::Blank];
            for element in elements {
                let extraction = extract_choices(element);
                let mut next_result = Vec::with_capacity(result.len());
                for entry in result {
                    for extraction_entry in &extraction {
                        next_result.push(Rule::Seq(vec![entry.clone(), extraction_entry.clone()]));
                    }
                }
                result = next_result;
            }
            result
        }
        Rule::Choice(elements) => {
            let mut result = Vec::with_capacity(elements.len());
            for element in elements {
                for rule in extract_choices(element) {
                    result.push(rule);
                }
            }
            result
        }
        Rule::Metadata { rule, params } => extract_choices(*rule)
            .into_iter()
            .map(|rule| Rule::Metadata {
                rule: Box::new(rule),
                params: params.clone(),
            })
            .collect(),
        Rule::Reserved { rule, context_name } => extract_choices(*rule)
            .into_iter()
            .map(|rule| Rule::Reserved {
                rule: Box::new(rule),
                context_name: context_name.clone(),
            })
            .collect(),
        _ => vec![rule],
    }
}

fn symbol_is_used(variables: &[SyntaxVariable], symbol: Symbol) -> bool {
    for variable in variables {
        for production in &variable.productions {
            for step in &production.steps {
                if step.symbol == symbol {
                    return true;
                }
            }
        }
    }
    false
}

pub(super) fn flatten_grammar(
    grammar: ExtractedSyntaxGrammar,
) -> FlattenGrammarResult<SyntaxGrammar> {
    let mut reserved_word_set_ids_by_name = HashMap::new();
    for (ix, set) in grammar.reserved_word_sets.iter().enumerate() {
        reserved_word_set_ids_by_name.insert(set.name.clone(), ReservedWordSetId(ix));
    }

    let mut flattener = RuleFlattener::new(reserved_word_set_ids_by_name);
    let variables = grammar
        .variables
        .into_iter()
        .map(|variable| flattener.flatten_variable(variable))
        .collect::<FlattenGrammarResult<Vec<_>>>()?;

    for (i, variable) in variables.iter().enumerate() {
        let symbol = Symbol::non_terminal(i);
        let used = symbol_is_used(&variables, symbol);

        for production in &variable.productions {
            if used && production.steps.is_empty() {
                Err(FlattenGrammarError::EmptyString(variable.name.clone()))?;
            }

            if grammar.variables_to_inline.contains(&symbol)
                && production.steps.iter().any(|step| step.symbol == symbol)
            {
                Err(FlattenGrammarError::RecursiveInline(variable.name.clone()))?;
            }
        }
    }
    let mut reserved_word_sets = grammar
        .reserved_word_sets
        .into_iter()
        .map(|set| set.reserved_words.into_iter().collect())
        .collect::<Vec<_>>();

    // If no default reserved word set is specified, there are no reserved words.
    if reserved_word_sets.is_empty() {
        reserved_word_sets.push(TokenSet::default());
    }

    Ok(SyntaxGrammar {
        extra_symbols: grammar.extra_symbols,
        expected_conflicts: grammar.expected_conflicts,
        variables_to_inline: grammar.variables_to_inline,
        precedence_orderings: grammar.precedence_orderings,
        external_tokens: grammar.external_tokens,
        supertype_symbols: grammar.supertype_symbols,
        word_token: grammar.word_token,
        reserved_word_sets,
        variables,
    })
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::grammars::VariableType;

    #[test]
    fn test_flatten_grammar() {
        let mut flattener = RuleFlattener::new(HashMap::default());
        let result = flattener
            .flatten_variable(Variable {
                name: "test".to_string(),
                kind: VariableType::Named,
                rule: Rule::seq(vec![
                    Rule::non_terminal(1),
                    Rule::prec_left(
                        Precedence::Integer(101),
                        Rule::seq(vec![
                            Rule::non_terminal(2),
                            Rule::choice(vec![
                                Rule::prec_right(
                                    Precedence::Integer(102),
                                    Rule::seq(vec![Rule::non_terminal(3), Rule::non_terminal(4)]),
                                ),
                                Rule::non_terminal(5),
                            ]),
                            Rule::non_terminal(6),
                        ]),
                    ),
                    Rule::non_terminal(7),
                ]),
            })
            .unwrap();

        assert_eq!(
            result.productions,
            vec![
                Production {
                    dynamic_precedence: 0,
                    steps: vec![
                        ProductionStep::new(Symbol::non_terminal(1)),
                        ProductionStep::new(Symbol::non_terminal(2))
                            .with_prec(Precedence::Integer(101), Some(Associativity::Left)),
                        ProductionStep::new(Symbol::non_terminal(3))
                            .with_prec(Precedence::Integer(102), Some(Associativity::Right)),
                        ProductionStep::new(Symbol::non_terminal(4))
                            .with_prec(Precedence::Integer(101), Some(Associativity::Left)),
                        ProductionStep::new(Symbol::non_terminal(6)),
                        ProductionStep::new(Symbol::non_terminal(7)),
                    ]
                },
                Production {
                    dynamic_precedence: 0,
                    steps: vec![
                        ProductionStep::new(Symbol::non_terminal(1)),
                        ProductionStep::new(Symbol::non_terminal(2))
                            .with_prec(Precedence::Integer(101), Some(Associativity::Left)),
                        ProductionStep::new(Symbol::non_terminal(5))
                            .with_prec(Precedence::Integer(101), Some(Associativity::Left)),
                        ProductionStep::new(Symbol::non_terminal(6)),
                        ProductionStep::new(Symbol::non_terminal(7)),
                    ]
                },
            ]
        );
    }

    #[test]
    fn test_flatten_grammar_with_maximum_dynamic_precedence() {
        let mut flattener = RuleFlattener::new(HashMap::default());
        let result = flattener
            .flatten_variable(Variable {
                name: "test".to_string(),
                kind: VariableType::Named,
                rule: Rule::seq(vec![
                    Rule::non_terminal(1),
                    Rule::prec_dynamic(
                        101,
                        Rule::seq(vec![
                            Rule::non_terminal(2),
                            Rule::choice(vec![
                                Rule::prec_dynamic(
                                    102,
                                    Rule::seq(vec![Rule::non_terminal(3), Rule::non_terminal(4)]),
                                ),
                                Rule::non_terminal(5),
                            ]),
                            Rule::non_terminal(6),
                        ]),
                    ),
                    Rule::non_terminal(7),
                ]),
            })
            .unwrap();

        assert_eq!(
            result.productions,
            vec![
                Production {
                    dynamic_precedence: 102,
                    steps: vec![
                        ProductionStep::new(Symbol::non_terminal(1)),
                        ProductionStep::new(Symbol::non_terminal(2)),
                        ProductionStep::new(Symbol::non_terminal(3)),
                        ProductionStep::new(Symbol::non_terminal(4)),
                        ProductionStep::new(Symbol::non_terminal(6)),
                        ProductionStep::new(Symbol::non_terminal(7)),
                    ],
                },
                Production {
                    dynamic_precedence: 101,
                    steps: vec![
                        ProductionStep::new(Symbol::non_terminal(1)),
                        ProductionStep::new(Symbol::non_terminal(2)),
                        ProductionStep::new(Symbol::non_terminal(5)),
                        ProductionStep::new(Symbol::non_terminal(6)),
                        ProductionStep::new(Symbol::non_terminal(7)),
                    ],
                },
            ]
        );
    }

    #[test]
    fn test_flatten_grammar_with_final_precedence() {
        let mut flattener = RuleFlattener::new(HashMap::default());
        let result = flattener
            .flatten_variable(Variable {
                name: "test".to_string(),
                kind: VariableType::Named,
                rule: Rule::prec_left(
                    Precedence::Integer(101),
                    Rule::seq(vec![Rule::non_terminal(1), Rule::non_terminal(2)]),
                ),
            })
            .unwrap();

        assert_eq!(
            result.productions,
            vec![Production {
                dynamic_precedence: 0,
                steps: vec![
                    ProductionStep::new(Symbol::non_terminal(1))
                        .with_prec(Precedence::Integer(101), Some(Associativity::Left)),
                    ProductionStep::new(Symbol::non_terminal(2))
                        .with_prec(Precedence::Integer(101), Some(Associativity::Left)),
                ]
            }]
        );

        let result = flattener
            .flatten_variable(Variable {
                name: "test".to_string(),
                kind: VariableType::Named,
                rule: Rule::prec_left(
                    Precedence::Integer(101),
                    Rule::seq(vec![Rule::non_terminal(1)]),
                ),
            })
            .unwrap();

        assert_eq!(
            result.productions,
            vec![Production {
                dynamic_precedence: 0,
                steps: vec![ProductionStep::new(Symbol::non_terminal(1))
                    .with_prec(Precedence::Integer(101), Some(Associativity::Left)),]
            }]
        );
    }

    #[test]
    fn test_flatten_grammar_with_field_names() {
        let mut flattener = RuleFlattener::new(HashMap::default());
        let result = flattener
            .flatten_variable(Variable {
                name: "test".to_string(),
                kind: VariableType::Named,
                rule: Rule::seq(vec![
                    Rule::field("first-thing".to_string(), Rule::terminal(1)),
                    Rule::terminal(2),
                    Rule::choice(vec![
                        Rule::Blank,
                        Rule::field("second-thing".to_string(), Rule::terminal(3)),
                    ]),
                ]),
            })
            .unwrap();

        assert_eq!(
            result.productions,
            vec![
                Production {
                    dynamic_precedence: 0,
                    steps: vec![
                        ProductionStep::new(Symbol::terminal(1)).with_field_name("first-thing"),
                        ProductionStep::new(Symbol::terminal(2))
                    ]
                },
                Production {
                    dynamic_precedence: 0,
                    steps: vec![
                        ProductionStep::new(Symbol::terminal(1)).with_field_name("first-thing"),
                        ProductionStep::new(Symbol::terminal(2)),
                        ProductionStep::new(Symbol::terminal(3)).with_field_name("second-thing"),
                    ]
                },
            ]
        );
    }

    #[test]
    fn test_flatten_grammar_with_recursive_inline_variable() {
        let result = flatten_grammar(ExtractedSyntaxGrammar {
            extra_symbols: Vec::new(),
            expected_conflicts: Vec::new(),
            variables_to_inline: vec![Symbol::non_terminal(0)],
            precedence_orderings: Vec::new(),
            external_tokens: Vec::new(),
            supertype_symbols: Vec::new(),
            word_token: None,
            reserved_word_sets: Vec::new(),
            variables: vec![Variable {
                name: "test".to_string(),
                kind: VariableType::Named,
                rule: Rule::seq(vec![
                    Rule::non_terminal(0),
                    Rule::non_terminal(1),
                    Rule::non_terminal(2),
                ]),
            }],
        });

        assert_eq!(
            result.unwrap_err().to_string(),
            "Rule `test` cannot be inlined because it contains a reference to itself",
        );
    }
}



================================================
FILE: crates/generate/src/prepare_grammar/intern_symbols.rs
================================================
use anyhow::Result;
use serde::Serialize;
use thiserror::Error;

use super::InternedGrammar;
use crate::{
    grammars::{InputGrammar, ReservedWordContext, Variable, VariableType},
    rules::{Rule, Symbol},
};

pub type InternSymbolsResult<T> = Result<T, InternSymbolsError>;

#[derive(Debug, Error, Serialize)]
pub enum InternSymbolsError {
    #[error("A grammar's start rule must be visible.")]
    HiddenStartRule,
    #[error("Undefined symbol `{0}`")]
    Undefined(String),
    #[error("Undefined symbol `{0}` in grammar's supertypes array")]
    UndefinedSupertype(String),
    #[error("Undefined symbol `{0}` in grammar's conflicts array")]
    UndefinedConflict(String),
    #[error("Undefined symbol `{0}` as grammar's word token")]
    UndefinedWordToken(String),
}

pub(super) fn intern_symbols(grammar: &InputGrammar) -> InternSymbolsResult<InternedGrammar> {
    let interner = Interner { grammar };

    if variable_type_for_name(&grammar.variables[0].name) == VariableType::Hidden {
        Err(InternSymbolsError::HiddenStartRule)?;
    }

    let mut variables = Vec::with_capacity(grammar.variables.len());
    for variable in &grammar.variables {
        variables.push(Variable {
            name: variable.name.clone(),
            kind: variable_type_for_name(&variable.name),
            rule: interner.intern_rule(&variable.rule, Some(&variable.name))?,
        });
    }

    let mut external_tokens = Vec::with_capacity(grammar.external_tokens.len());
    for external_token in &grammar.external_tokens {
        let rule = interner.intern_rule(external_token, None)?;
        let (name, kind) = if let Rule::NamedSymbol(name) = external_token {
            (name.clone(), variable_type_for_name(name))
        } else {
            (String::new(), VariableType::Anonymous)
        };
        external_tokens.push(Variable { name, kind, rule });
    }

    let mut extra_symbols = Vec::with_capacity(grammar.extra_symbols.len());
    for extra_token in &grammar.extra_symbols {
        extra_symbols.push(interner.intern_rule(extra_token, None)?);
    }

    let mut supertype_symbols = Vec::with_capacity(grammar.supertype_symbols.len());
    for supertype_symbol_name in &grammar.supertype_symbols {
        supertype_symbols.push(interner.intern_name(supertype_symbol_name).ok_or_else(|| {
            InternSymbolsError::UndefinedSupertype(supertype_symbol_name.clone())
        })?);
    }

    let mut reserved_words = Vec::with_capacity(grammar.reserved_words.len());
    for reserved_word_set in &grammar.reserved_words {
        let mut interned_set = Vec::with_capacity(reserved_word_set.reserved_words.len());
        for rule in &reserved_word_set.reserved_words {
            interned_set.push(interner.intern_rule(rule, None)?);
        }
        reserved_words.push(ReservedWordContext {
            name: reserved_word_set.name.clone(),
            reserved_words: interned_set,
        });
    }

    let mut expected_conflicts = Vec::with_capacity(grammar.expected_conflicts.len());
    for conflict in &grammar.expected_conflicts {
        let mut interned_conflict = Vec::with_capacity(conflict.len());
        for name in conflict {
            interned_conflict.push(
                interner
                    .intern_name(name)
                    .ok_or_else(|| InternSymbolsError::UndefinedConflict(name.clone()))?,
            );
        }
        expected_conflicts.push(interned_conflict);
    }

    let mut variables_to_inline = Vec::new();
    for name in &grammar.variables_to_inline {
        if let Some(symbol) = interner.intern_name(name) {
            variables_to_inline.push(symbol);
        }
    }

    let word_token = if let Some(name) = grammar.word_token.as_ref() {
        Some(
            interner
                .intern_name(name)
                .ok_or_else(|| InternSymbolsError::UndefinedWordToken(name.clone()))?,
        )
    } else {
        None
    };

    for (i, variable) in variables.iter_mut().enumerate() {
        if supertype_symbols.contains(&Symbol::non_terminal(i)) {
            variable.kind = VariableType::Hidden;
        }
    }

    Ok(InternedGrammar {
        variables,
        external_tokens,
        extra_symbols,
        expected_conflicts,
        variables_to_inline,
        supertype_symbols,
        word_token,
        precedence_orderings: grammar.precedence_orderings.clone(),
        reserved_word_sets: reserved_words,
    })
}

struct Interner<'a> {
    grammar: &'a InputGrammar,
}

impl Interner<'_> {
    fn intern_rule(&self, rule: &Rule, name: Option<&str>) -> InternSymbolsResult<Rule> {
        match rule {
            Rule::Choice(elements) => {
                self.check_single(elements, name);
                let mut result = Vec::with_capacity(elements.len());
                for element in elements {
                    result.push(self.intern_rule(element, name)?);
                }
                Ok(Rule::Choice(result))
            }
            Rule::Seq(elements) => {
                self.check_single(elements, name);
                let mut result = Vec::with_capacity(elements.len());
                for element in elements {
                    result.push(self.intern_rule(element, name)?);
                }
                Ok(Rule::Seq(result))
            }
            Rule::Repeat(content) => Ok(Rule::Repeat(Box::new(self.intern_rule(content, name)?))),
            Rule::Metadata { rule, params } => Ok(Rule::Metadata {
                rule: Box::new(self.intern_rule(rule, name)?),
                params: params.clone(),
            }),
            Rule::Reserved { rule, context_name } => Ok(Rule::Reserved {
                rule: Box::new(self.intern_rule(rule, name)?),
                context_name: context_name.clone(),
            }),
            Rule::NamedSymbol(name) => self.intern_name(name).map_or_else(
                || Err(InternSymbolsError::Undefined(name.clone())),
                |symbol| Ok(Rule::Symbol(symbol)),
            ),
            _ => Ok(rule.clone()),
        }
    }

    fn intern_name(&self, symbol: &str) -> Option<Symbol> {
        for (i, variable) in self.grammar.variables.iter().enumerate() {
            if variable.name == symbol {
                return Some(Symbol::non_terminal(i));
            }
        }

        for (i, external_token) in self.grammar.external_tokens.iter().enumerate() {
            if let Rule::NamedSymbol(name) = external_token {
                if name == symbol {
                    return Some(Symbol::external(i));
                }
            }
        }

        None
    }

    // In the case of a seq or choice rule of 1 element in a hidden rule, weird
    // inconsistent behavior with queries can occur. So we should warn the user about it.
    fn check_single(&self, elements: &[Rule], name: Option<&str>) {
        if elements.len() == 1 && matches!(elements[0], Rule::String(_) | Rule::Pattern(_, _)) {
            eprintln!(
                "Warning: rule {} contains a `seq` or `choice` rule with a single element. This is unnecessary.",
                name.unwrap_or_default()
            );
        }
    }
}

fn variable_type_for_name(name: &str) -> VariableType {
    if name.starts_with('_') {
        VariableType::Hidden
    } else {
        VariableType::Named
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_basic_repeat_expansion() {
        let grammar = intern_symbols(&build_grammar(vec![
            Variable::named("x", Rule::choice(vec![Rule::named("y"), Rule::named("_z")])),
            Variable::named("y", Rule::named("_z")),
            Variable::named("_z", Rule::string("a")),
        ]))
        .unwrap();

        assert_eq!(
            grammar.variables,
            vec![
                Variable::named(
                    "x",
                    Rule::choice(vec![Rule::non_terminal(1), Rule::non_terminal(2),])
                ),
                Variable::named("y", Rule::non_terminal(2)),
                Variable::hidden("_z", Rule::string("a")),
            ]
        );
    }

    #[test]
    fn test_interning_external_token_names() {
        // Variable `y` is both an internal and an external token.
        // Variable `z` is just an external token.
        let mut input_grammar = build_grammar(vec![
            Variable::named(
                "w",
                Rule::choice(vec![Rule::named("x"), Rule::named("y"), Rule::named("z")]),
            ),
            Variable::named("x", Rule::string("a")),
            Variable::named("y", Rule::string("b")),
        ]);
        input_grammar
            .external_tokens
            .extend(vec![Rule::named("y"), Rule::named("z")]);

        let grammar = intern_symbols(&input_grammar).unwrap();

        // Variable `y` is referred to by its internal index.
        // Variable `z` is referred to by its external index.
        assert_eq!(
            grammar.variables,
            vec![
                Variable::named(
                    "w",
                    Rule::choice(vec![
                        Rule::non_terminal(1),
                        Rule::non_terminal(2),
                        Rule::external(1),
                    ])
                ),
                Variable::named("x", Rule::string("a")),
                Variable::named("y", Rule::string("b")),
            ]
        );

        // The external token for `y` refers back to its internal index.
        assert_eq!(
            grammar.external_tokens,
            vec![
                Variable::named("y", Rule::non_terminal(2)),
                Variable::named("z", Rule::external(1)),
            ]
        );
    }

    #[test]
    fn test_grammar_with_undefined_symbols() {
        let result = intern_symbols(&build_grammar(vec![Variable::named("x", Rule::named("y"))]));

        match result {
            Err(e) => assert_eq!(e.to_string(), "Undefined symbol `y`"),
            _ => panic!("Expected an error but got none"),
        }
    }

    fn build_grammar(variables: Vec<Variable>) -> InputGrammar {
        InputGrammar {
            variables,
            name: "the_language".to_string(),
            ..Default::default()
        }
    }
}



================================================
FILE: crates/generate/src/prepare_grammar/process_inlines.rs
================================================
use std::collections::HashMap;

use anyhow::Result;
use serde::Serialize;
use thiserror::Error;

use crate::{
    grammars::{InlinedProductionMap, LexicalGrammar, Production, ProductionStep, SyntaxGrammar},
    rules::SymbolType,
};

#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
struct ProductionStepId {
    // A `None` value here means that the production itself was produced via inlining,
    // and is stored in the builder's `productions` vector, as opposed to being
    // stored in one of the grammar's variables.
    variable_index: Option<usize>,
    production_index: usize,
    step_index: usize,
}

struct InlinedProductionMapBuilder {
    production_indices_by_step_id: HashMap<ProductionStepId, Vec<usize>>,
    productions: Vec<Production>,
}

impl InlinedProductionMapBuilder {
    fn build(mut self, grammar: &SyntaxGrammar) -> InlinedProductionMap {
        let mut step_ids_to_process = Vec::new();
        for (variable_index, variable) in grammar.variables.iter().enumerate() {
            for production_index in 0..variable.productions.len() {
                step_ids_to_process.push(ProductionStepId {
                    variable_index: Some(variable_index),
                    production_index,
                    step_index: 0,
                });
                while !step_ids_to_process.is_empty() {
                    let mut i = 0;
                    while i < step_ids_to_process.len() {
                        let step_id = step_ids_to_process[i];
                        if let Some(step) = self.production_step_for_id(step_id, grammar) {
                            if grammar.variables_to_inline.contains(&step.symbol) {
                                let inlined_step_ids = self
                                    .inline_production_at_step(step_id, grammar)
                                    .iter()
                                    .copied()
                                    .map(|production_index| ProductionStepId {
                                        variable_index: None,
                                        production_index,
                                        step_index: step_id.step_index,
                                    });
                                step_ids_to_process.splice(i..=i, inlined_step_ids);
                            } else {
                                step_ids_to_process[i] = ProductionStepId {
                                    variable_index: step_id.variable_index,
                                    production_index: step_id.production_index,
                                    step_index: step_id.step_index + 1,
                                };
                                i += 1;
                            }
                        } else {
                            step_ids_to_process.remove(i);
                        }
                    }
                }
            }
        }

        let productions = self.productions;
        let production_indices_by_step_id = self.production_indices_by_step_id;
        let production_map = production_indices_by_step_id
            .into_iter()
            .map(|(step_id, production_indices)| {
                let production = step_id.variable_index.map_or_else(
                    || &productions[step_id.production_index],
                    |variable_index| {
                        &grammar.variables[variable_index].productions[step_id.production_index]
                    },
                ) as *const Production;
                ((production, step_id.step_index as u32), production_indices)
            })
            .collect();

        InlinedProductionMap {
            productions,
            production_map,
        }
    }

    fn inline_production_at_step<'a>(
        &'a mut self,
        step_id: ProductionStepId,
        grammar: &'a SyntaxGrammar,
    ) -> &'a [usize] {
        // Build a list of productions produced by inlining rules.
        let mut i = 0;
        let step_index = step_id.step_index;
        let mut productions_to_add = vec![self.production_for_id(step_id, grammar).clone()];
        while i < productions_to_add.len() {
            if let Some(step) = productions_to_add[i].steps.get(step_index) {
                let symbol = step.symbol;
                if grammar.variables_to_inline.contains(&symbol) {
                    // Remove the production from the vector, replacing it with a placeholder.
                    let production = productions_to_add
                        .splice(i..=i, std::iter::once(&Production::default()).cloned())
                        .next()
                        .unwrap();

                    // Replace the placeholder with the inlined productions.
                    productions_to_add.splice(
                        i..=i,
                        grammar.variables[symbol.index].productions.iter().map(|p| {
                            let mut production = production.clone();
                            let removed_step = production
                                .steps
                                .splice(step_index..=step_index, p.steps.iter().cloned())
                                .next()
                                .unwrap();
                            let inserted_steps =
                                &mut production.steps[step_index..(step_index + p.steps.len())];
                            if let Some(alias) = removed_step.alias {
                                for inserted_step in inserted_steps.iter_mut() {
                                    inserted_step.alias = Some(alias.clone());
                                }
                            }
                            if let Some(field_name) = removed_step.field_name {
                                for inserted_step in inserted_steps.iter_mut() {
                                    inserted_step.field_name = Some(field_name.clone());
                                }
                            }
                            if let Some(last_inserted_step) = inserted_steps.last_mut() {
                                if last_inserted_step.precedence.is_none() {
                                    last_inserted_step.precedence = removed_step.precedence;
                                }
                                if last_inserted_step.associativity.is_none() {
                                    last_inserted_step.associativity = removed_step.associativity;
                                }
                            }
                            if p.dynamic_precedence.abs() > production.dynamic_precedence.abs() {
                                production.dynamic_precedence = p.dynamic_precedence;
                            }
                            production
                        }),
                    );

                    continue;
                }
            }
            i += 1;
        }

        // Store all the computed productions.
        let result = productions_to_add
            .into_iter()
            .map(|production| {
                self.productions
                    .iter()
                    .position(|p| *p == production)
                    .unwrap_or_else(|| {
                        self.productions.push(production);
                        self.productions.len() - 1
                    })
            })
            .collect();

        // Cache these productions based on the original production step.
        self.production_indices_by_step_id
            .entry(step_id)
            .or_insert(result)
    }

    fn production_for_id<'a>(
        &'a self,
        id: ProductionStepId,
        grammar: &'a SyntaxGrammar,
    ) -> &'a Production {
        id.variable_index.map_or_else(
            || &self.productions[id.production_index],
            |variable_index| &grammar.variables[variable_index].productions[id.production_index],
        )
    }

    fn production_step_for_id<'a>(
        &'a self,
        id: ProductionStepId,
        grammar: &'a SyntaxGrammar,
    ) -> Option<&'a ProductionStep> {
        self.production_for_id(id, grammar).steps.get(id.step_index)
    }
}

pub type ProcessInlinesResult<T> = Result<T, ProcessInlinesError>;

#[derive(Debug, Error, Serialize)]
pub enum ProcessInlinesError {
    #[error("External token `{0}` cannot be inlined")]
    ExternalToken(String),
    #[error("Token `{0}` cannot be inlined")]
    Token(String),
    #[error("Rule `{0}` cannot be inlined because it is the first rule")]
    FirstRule(String),
}

pub(super) fn process_inlines(
    grammar: &SyntaxGrammar,
    lexical_grammar: &LexicalGrammar,
) -> ProcessInlinesResult<InlinedProductionMap> {
    for symbol in &grammar.variables_to_inline {
        match symbol.kind {
            SymbolType::External => {
                Err(ProcessInlinesError::ExternalToken(
                    grammar.external_tokens[symbol.index].name.clone(),
                ))?;
            }
            SymbolType::Terminal => {
                Err(ProcessInlinesError::Token(
                    lexical_grammar.variables[symbol.index].name.clone(),
                ))?;
            }
            SymbolType::NonTerminal if symbol.index == 0 => {
                Err(ProcessInlinesError::FirstRule(
                    grammar.variables[symbol.index].name.clone(),
                ))?;
            }
            _ => {}
        }
    }

    Ok(InlinedProductionMapBuilder {
        productions: Vec::new(),
        production_indices_by_step_id: HashMap::new(),
    }
    .build(grammar))
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::{
        grammars::{LexicalVariable, SyntaxVariable, VariableType},
        rules::{Associativity, Precedence, Symbol},
    };

    #[test]
    fn test_basic_inlining() {
        let grammar = SyntaxGrammar {
            variables_to_inline: vec![Symbol::non_terminal(1)],
            variables: vec![
                SyntaxVariable {
                    name: "non-terminal-0".to_string(),
                    kind: VariableType::Named,
                    productions: vec![Production {
                        dynamic_precedence: 0,
                        steps: vec![
                            ProductionStep::new(Symbol::terminal(10)),
                            ProductionStep::new(Symbol::non_terminal(1)), // inlined
                            ProductionStep::new(Symbol::terminal(11)),
                        ],
                    }],
                },
                SyntaxVariable {
                    name: "non-terminal-1".to_string(),
                    kind: VariableType::Named,
                    productions: vec![
                        Production {
                            dynamic_precedence: 0,
                            steps: vec![
                                ProductionStep::new(Symbol::terminal(12)),
                                ProductionStep::new(Symbol::terminal(13)),
                            ],
                        },
                        Production {
                            dynamic_precedence: -2,
                            steps: vec![ProductionStep::new(Symbol::terminal(14))],
                        },
                    ],
                },
            ],
            ..Default::default()
        };

        let inline_map = process_inlines(&grammar, &LexicalGrammar::default()).unwrap();

        // Nothing to inline at step 0.
        assert!(inline_map
            .inlined_productions(&grammar.variables[0].productions[0], 0)
            .is_none());

        // Inlining variable 1 yields two productions.
        assert_eq!(
            inline_map
                .inlined_productions(&grammar.variables[0].productions[0], 1)
                .unwrap()
                .cloned()
                .collect::<Vec<_>>(),
            vec![
                Production {
                    dynamic_precedence: 0,
                    steps: vec![
                        ProductionStep::new(Symbol::terminal(10)),
                        ProductionStep::new(Symbol::terminal(12)),
                        ProductionStep::new(Symbol::terminal(13)),
                        ProductionStep::new(Symbol::terminal(11)),
                    ],
                },
                Production {
                    dynamic_precedence: -2,
                    steps: vec![
                        ProductionStep::new(Symbol::terminal(10)),
                        ProductionStep::new(Symbol::terminal(14)),
                        ProductionStep::new(Symbol::terminal(11)),
                    ],
                },
            ]
        );
    }

    #[test]
    fn test_nested_inlining() {
        let grammar = SyntaxGrammar {
            variables: vec![
                SyntaxVariable {
                    name: "non-terminal-0".to_string(),
                    kind: VariableType::Named,
                    productions: vec![Production {
                        dynamic_precedence: 0,
                        steps: vec![
                            ProductionStep::new(Symbol::terminal(10)),
                            ProductionStep::new(Symbol::non_terminal(1)), // inlined
                            ProductionStep::new(Symbol::terminal(11)),
                            ProductionStep::new(Symbol::non_terminal(2)), // inlined
                            ProductionStep::new(Symbol::terminal(12)),
                        ],
                    }],
                },
                SyntaxVariable {
                    name: "non-terminal-1".to_string(),
                    kind: VariableType::Named,
                    productions: vec![
                        Production {
                            dynamic_precedence: 0,
                            steps: vec![ProductionStep::new(Symbol::terminal(13))],
                        },
                        Production {
                            dynamic_precedence: 0,
                            steps: vec![
                                ProductionStep::new(Symbol::non_terminal(3)), // inlined
                                ProductionStep::new(Symbol::terminal(14)),
                            ],
                        },
                    ],
                },
                SyntaxVariable {
                    name: "non-terminal-2".to_string(),
                    kind: VariableType::Named,
                    productions: vec![Production {
                        dynamic_precedence: 0,
                        steps: vec![ProductionStep::new(Symbol::terminal(15))],
                    }],
                },
                SyntaxVariable {
                    name: "non-terminal-3".to_string(),
                    kind: VariableType::Named,
                    productions: vec![Production {
                        dynamic_precedence: 0,
                        steps: vec![ProductionStep::new(Symbol::terminal(16))],
                    }],
                },
            ],
            variables_to_inline: vec![
                Symbol::non_terminal(1),
                Symbol::non_terminal(2),
                Symbol::non_terminal(3),
            ],
            ..Default::default()
        };

        let inline_map = process_inlines(&grammar, &LexicalGrammar::default()).unwrap();

        let productions = inline_map
            .inlined_productions(&grammar.variables[0].productions[0], 1)
            .unwrap()
            .collect::<Vec<_>>();

        assert_eq!(
            productions.iter().copied().cloned().collect::<Vec<_>>(),
            vec![
                Production {
                    dynamic_precedence: 0,
                    steps: vec![
                        ProductionStep::new(Symbol::terminal(10)),
                        ProductionStep::new(Symbol::terminal(13)),
                        ProductionStep::new(Symbol::terminal(11)),
                        ProductionStep::new(Symbol::non_terminal(2)),
                        ProductionStep::new(Symbol::terminal(12)),
                    ],
                },
                Production {
                    dynamic_precedence: 0,
                    steps: vec![
                        ProductionStep::new(Symbol::terminal(10)),
                        ProductionStep::new(Symbol::terminal(16)),
                        ProductionStep::new(Symbol::terminal(14)),
                        ProductionStep::new(Symbol::terminal(11)),
                        ProductionStep::new(Symbol::non_terminal(2)),
                        ProductionStep::new(Symbol::terminal(12)),
                    ],
                },
            ]
        );

        assert_eq!(
            inline_map
                .inlined_productions(productions[0], 3)
                .unwrap()
                .cloned()
                .collect::<Vec<_>>(),
            vec![Production {
                dynamic_precedence: 0,
                steps: vec![
                    ProductionStep::new(Symbol::terminal(10)),
                    ProductionStep::new(Symbol::terminal(13)),
                    ProductionStep::new(Symbol::terminal(11)),
                    ProductionStep::new(Symbol::terminal(15)),
                    ProductionStep::new(Symbol::terminal(12)),
                ],
            },]
        );
    }

    #[test]
    fn test_inlining_with_precedence_and_alias() {
        let grammar = SyntaxGrammar {
            variables_to_inline: vec![Symbol::non_terminal(1), Symbol::non_terminal(2)],
            variables: vec![
                SyntaxVariable {
                    name: "non-terminal-0".to_string(),
                    kind: VariableType::Named,
                    productions: vec![Production {
                        dynamic_precedence: 0,
                        steps: vec![
                            // inlined
                            ProductionStep::new(Symbol::non_terminal(1))
                                .with_prec(Precedence::Integer(1), Some(Associativity::Left)),
                            ProductionStep::new(Symbol::terminal(10)),
                            // inlined
                            ProductionStep::new(Symbol::non_terminal(2))
                                .with_alias("outer_alias", true),
                        ],
                    }],
                },
                SyntaxVariable {
                    name: "non-terminal-1".to_string(),
                    kind: VariableType::Named,
                    productions: vec![Production {
                        dynamic_precedence: 0,
                        steps: vec![
                            ProductionStep::new(Symbol::terminal(11))
                                .with_prec(Precedence::Integer(2), None)
                                .with_alias("inner_alias", true),
                            ProductionStep::new(Symbol::terminal(12)),
                        ],
                    }],
                },
                SyntaxVariable {
                    name: "non-terminal-2".to_string(),
                    kind: VariableType::Named,
                    productions: vec![Production {
                        dynamic_precedence: 0,
                        steps: vec![ProductionStep::new(Symbol::terminal(13))],
                    }],
                },
            ],
            ..Default::default()
        };

        let inline_map = process_inlines(&grammar, &LexicalGrammar::default()).unwrap();

        let productions = inline_map
            .inlined_productions(&grammar.variables[0].productions[0], 0)
            .unwrap()
            .collect::<Vec<_>>();

        assert_eq!(
            productions.iter().copied().cloned().collect::<Vec<_>>(),
            vec![Production {
                dynamic_precedence: 0,
                steps: vec![
                    // The first step in the inlined production retains its precedence
                    // and alias.
                    ProductionStep::new(Symbol::terminal(11))
                        .with_prec(Precedence::Integer(2), None)
                        .with_alias("inner_alias", true),
                    // The final step of the inlined production inherits the precedence of
                    // the inlined step.
                    ProductionStep::new(Symbol::terminal(12))
                        .with_prec(Precedence::Integer(1), Some(Associativity::Left)),
                    ProductionStep::new(Symbol::terminal(10)),
                    ProductionStep::new(Symbol::non_terminal(2)).with_alias("outer_alias", true),
                ]
            }],
        );

        assert_eq!(
            inline_map
                .inlined_productions(productions[0], 3)
                .unwrap()
                .cloned()
                .collect::<Vec<_>>(),
            vec![Production {
                dynamic_precedence: 0,
                steps: vec![
                    ProductionStep::new(Symbol::terminal(11))
                        .with_prec(Precedence::Integer(2), None)
                        .with_alias("inner_alias", true),
                    ProductionStep::new(Symbol::terminal(12))
                        .with_prec(Precedence::Integer(1), Some(Associativity::Left)),
                    ProductionStep::new(Symbol::terminal(10)),
                    // All steps of the inlined production inherit their alias from the
                    // inlined step.
                    ProductionStep::new(Symbol::terminal(13)).with_alias("outer_alias", true),
                ]
            }],
        );
    }

    #[test]
    fn test_error_when_inlining_tokens() {
        let lexical_grammar = LexicalGrammar {
            variables: vec![LexicalVariable {
                name: "something".to_string(),
                kind: VariableType::Named,
                implicit_precedence: 0,
                start_state: 0,
            }],
            ..Default::default()
        };

        let grammar = SyntaxGrammar {
            variables_to_inline: vec![Symbol::terminal(0)],
            variables: vec![SyntaxVariable {
                name: "non-terminal-0".to_string(),
                kind: VariableType::Named,
                productions: vec![Production {
                    dynamic_precedence: 0,
                    steps: vec![ProductionStep::new(Symbol::terminal(0))],
                }],
            }],
            ..Default::default()
        };

        if let Err(error) = process_inlines(&grammar, &lexical_grammar) {
            assert_eq!(error.to_string(), "Token `something` cannot be inlined");
        } else {
            panic!("expected an error, but got none");
        }
    }
}



================================================
FILE: crates/generate/src/templates/alloc.h
================================================
#ifndef TREE_SITTER_ALLOC_H_
#define TREE_SITTER_ALLOC_H_

#ifdef __cplusplus
extern "C" {
#endif

#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>

// Allow clients to override allocation functions
#ifdef TREE_SITTER_REUSE_ALLOCATOR

extern void *(*ts_current_malloc)(size_t size);
extern void *(*ts_current_calloc)(size_t count, size_t size);
extern void *(*ts_current_realloc)(void *ptr, size_t size);
extern void (*ts_current_free)(void *ptr);

#ifndef ts_malloc
#define ts_malloc  ts_current_malloc
#endif
#ifndef ts_calloc
#define ts_calloc  ts_current_calloc
#endif
#ifndef ts_realloc
#define ts_realloc ts_current_realloc
#endif
#ifndef ts_free
#define ts_free    ts_current_free
#endif

#else

#ifndef ts_malloc
#define ts_malloc  malloc
#endif
#ifndef ts_calloc
#define ts_calloc  calloc
#endif
#ifndef ts_realloc
#define ts_realloc realloc
#endif
#ifndef ts_free
#define ts_free    free
#endif

#endif

#ifdef __cplusplus
}
#endif

#endif // TREE_SITTER_ALLOC_H_



================================================
FILE: crates/generate/src/templates/array.h
================================================
#ifndef TREE_SITTER_ARRAY_H_
#define TREE_SITTER_ARRAY_H_

#ifdef __cplusplus
extern "C" {
#endif

#include "./alloc.h"

#include <assert.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>

#ifdef _MSC_VER
#pragma warning(push)
#pragma warning(disable : 4101)
#elif defined(__GNUC__) || defined(__clang__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-variable"
#endif

#define Array(T)       \
  struct {             \
    T *contents;       \
    uint32_t size;     \
    uint32_t capacity; \
  }

/// Initialize an array.
#define array_init(self) \
  ((self)->size = 0, (self)->capacity = 0, (self)->contents = NULL)

/// Create an empty array.
#define array_new() \
  { NULL, 0, 0 }

/// Get a pointer to the element at a given `index` in the array.
#define array_get(self, _index) \
  (assert((uint32_t)(_index) < (self)->size), &(self)->contents[_index])

/// Get a pointer to the first element in the array.
#define array_front(self) array_get(self, 0)

/// Get a pointer to the last element in the array.
#define array_back(self) array_get(self, (self)->size - 1)

/// Clear the array, setting its size to zero. Note that this does not free any
/// memory allocated for the array's contents.
#define array_clear(self) ((self)->size = 0)

/// Reserve `new_capacity` elements of space in the array. If `new_capacity` is
/// less than the array's current capacity, this function has no effect.
#define array_reserve(self, new_capacity) \
  _array__reserve((Array *)(self), array_elem_size(self), new_capacity)

/// Free any memory allocated for this array. Note that this does not free any
/// memory allocated for the array's contents.
#define array_delete(self) _array__delete((Array *)(self))

/// Push a new `element` onto the end of the array.
#define array_push(self, element)                            \
  (_array__grow((Array *)(self), 1, array_elem_size(self)), \
   (self)->contents[(self)->size++] = (element))

/// Increase the array's size by `count` elements.
/// New elements are zero-initialized.
#define array_grow_by(self, count) \
  do { \
    if ((count) == 0) break; \
    _array__grow((Array *)(self), count, array_elem_size(self)); \
    memset((self)->contents + (self)->size, 0, (count) * array_elem_size(self)); \
    (self)->size += (count); \
  } while (0)

/// Append all elements from one array to the end of another.
#define array_push_all(self, other)                                       \
  array_extend((self), (other)->size, (other)->contents)

/// Append `count` elements to the end of the array, reading their values from the
/// `contents` pointer.
#define array_extend(self, count, contents)                    \
  _array__splice(                                               \
    (Array *)(self), array_elem_size(self), (self)->size, \
    0, count,  contents                                        \
  )

/// Remove `old_count` elements from the array starting at the given `index`. At
/// the same index, insert `new_count` new elements, reading their values from the
/// `new_contents` pointer.
#define array_splice(self, _index, old_count, new_count, new_contents)  \
  _array__splice(                                                       \
    (Array *)(self), array_elem_size(self), _index,                \
    old_count, new_count, new_contents                                 \
  )

/// Insert one `element` into the array at the given `index`.
#define array_insert(self, _index, element) \
  _array__splice((Array *)(self), array_elem_size(self), _index, 0, 1, &(element))

/// Remove one element from the array at the given `index`.
#define array_erase(self, _index) \
  _array__erase((Array *)(self), array_elem_size(self), _index)

/// Pop the last element off the array, returning the element by value.
#define array_pop(self) ((self)->contents[--(self)->size])

/// Assign the contents of one array to another, reallocating if necessary.
#define array_assign(self, other) \
  _array__assign((Array *)(self), (const Array *)(other), array_elem_size(self))

/// Swap one array with another
#define array_swap(self, other) \
  _array__swap((Array *)(self), (Array *)(other))

/// Get the size of the array contents
#define array_elem_size(self) (sizeof *(self)->contents)

/// Search a sorted array for a given `needle` value, using the given `compare`
/// callback to determine the order.
///
/// If an existing element is found to be equal to `needle`, then the `index`
/// out-parameter is set to the existing value's index, and the `exists`
/// out-parameter is set to true. Otherwise, `index` is set to an index where
/// `needle` should be inserted in order to preserve the sorting, and `exists`
/// is set to false.
#define array_search_sorted_with(self, compare, needle, _index, _exists) \
  _array__search_sorted(self, 0, compare, , needle, _index, _exists)

/// Search a sorted array for a given `needle` value, using integer comparisons
/// of a given struct field (specified with a leading dot) to determine the order.
///
/// See also `array_search_sorted_with`.
#define array_search_sorted_by(self, field, needle, _index, _exists) \
  _array__search_sorted(self, 0, _compare_int, field, needle, _index, _exists)

/// Insert a given `value` into a sorted array, using the given `compare`
/// callback to determine the order.
#define array_insert_sorted_with(self, compare, value) \
  do { \
    unsigned _index, _exists; \
    array_search_sorted_with(self, compare, &(value), &_index, &_exists); \
    if (!_exists) array_insert(self, _index, value); \
  } while (0)

/// Insert a given `value` into a sorted array, using integer comparisons of
/// a given struct field (specified with a leading dot) to determine the order.
///
/// See also `array_search_sorted_by`.
#define array_insert_sorted_by(self, field, value) \
  do { \
    unsigned _index, _exists; \
    array_search_sorted_by(self, field, (value) field, &_index, &_exists); \
    if (!_exists) array_insert(self, _index, value); \
  } while (0)

// Private

typedef Array(void) Array;

/// This is not what you're looking for, see `array_delete`.
static inline void _array__delete(Array *self) {
  if (self->contents) {
    ts_free(self->contents);
    self->contents = NULL;
    self->size = 0;
    self->capacity = 0;
  }
}

/// This is not what you're looking for, see `array_erase`.
static inline void _array__erase(Array *self, size_t element_size,
                                uint32_t index) {
  assert(index < self->size);
  char *contents = (char *)self->contents;
  memmove(contents + index * element_size, contents + (index + 1) * element_size,
          (self->size - index - 1) * element_size);
  self->size--;
}

/// This is not what you're looking for, see `array_reserve`.
static inline void _array__reserve(Array *self, size_t element_size, uint32_t new_capacity) {
  if (new_capacity > self->capacity) {
    if (self->contents) {
      self->contents = ts_realloc(self->contents, new_capacity * element_size);
    } else {
      self->contents = ts_malloc(new_capacity * element_size);
    }
    self->capacity = new_capacity;
  }
}

/// This is not what you're looking for, see `array_assign`.
static inline void _array__assign(Array *self, const Array *other, size_t element_size) {
  _array__reserve(self, element_size, other->size);
  self->size = other->size;
  memcpy(self->contents, other->contents, self->size * element_size);
}

/// This is not what you're looking for, see `array_swap`.
static inline void _array__swap(Array *self, Array *other) {
  Array swap = *other;
  *other = *self;
  *self = swap;
}

/// This is not what you're looking for, see `array_push` or `array_grow_by`.
static inline void _array__grow(Array *self, uint32_t count, size_t element_size) {
  uint32_t new_size = self->size + count;
  if (new_size > self->capacity) {
    uint32_t new_capacity = self->capacity * 2;
    if (new_capacity < 8) new_capacity = 8;
    if (new_capacity < new_size) new_capacity = new_size;
    _array__reserve(self, element_size, new_capacity);
  }
}

/// This is not what you're looking for, see `array_splice`.
static inline void _array__splice(Array *self, size_t element_size,
                                 uint32_t index, uint32_t old_count,
                                 uint32_t new_count, const void *elements) {
  uint32_t new_size = self->size + new_count - old_count;
  uint32_t old_end = index + old_count;
  uint32_t new_end = index + new_count;
  assert(old_end <= self->size);

  _array__reserve(self, element_size, new_size);

  char *contents = (char *)self->contents;
  if (self->size > old_end) {
    memmove(
      contents + new_end * element_size,
      contents + old_end * element_size,
      (self->size - old_end) * element_size
    );
  }
  if (new_count > 0) {
    if (elements) {
      memcpy(
        (contents + index * element_size),
        elements,
        new_count * element_size
      );
    } else {
      memset(
        (contents + index * element_size),
        0,
        new_count * element_size
      );
    }
  }
  self->size += new_count - old_count;
}

/// A binary search routine, based on Rust's `std::slice::binary_search_by`.
/// This is not what you're looking for, see `array_search_sorted_with` or `array_search_sorted_by`.
#define _array__search_sorted(self, start, compare, suffix, needle, _index, _exists) \
  do { \
    *(_index) = start; \
    *(_exists) = false; \
    uint32_t size = (self)->size - *(_index); \
    if (size == 0) break; \
    int comparison; \
    while (size > 1) { \
      uint32_t half_size = size / 2; \
      uint32_t mid_index = *(_index) + half_size; \
      comparison = compare(&((self)->contents[mid_index] suffix), (needle)); \
      if (comparison <= 0) *(_index) = mid_index; \
      size -= half_size; \
    } \
    comparison = compare(&((self)->contents[*(_index)] suffix), (needle)); \
    if (comparison == 0) *(_exists) = true; \
    else if (comparison < 0) *(_index) += 1; \
  } while (0)

/// Helper macro for the `_sorted_by` routines below. This takes the left (existing)
/// parameter by reference in order to work with the generic sorting function above.
#define _compare_int(a, b) ((int)*(a) - (int)(b))

#ifdef _MSC_VER
#pragma warning(pop)
#elif defined(__GNUC__) || defined(__clang__)
#pragma GCC diagnostic pop
#endif

#ifdef __cplusplus
}
#endif

#endif  // TREE_SITTER_ARRAY_H_



================================================
FILE: crates/highlight/README.md
================================================
# Tree-sitter Highlight

[![crates.io badge]][crates.io]

[crates.io]: https://crates.io/crates/tree-sitter-highlight
[crates.io badge]: https://img.shields.io/crates/v/tree-sitter-highlight.svg?color=%23B48723

## Usage

Add this crate, and the language-specific crates for whichever languages you want
to parse, to your `Cargo.toml`:

```toml
[dependencies]
tree-sitter-highlight = "0.25.4"
tree-sitter-javascript = "0.23.1"
```

Define the list of highlight names that you will recognize:

```rust
let highlight_names = [
    "attribute",
    "comment",
    "constant",
    "constant.builtin",
    "constructor",
    "embedded",
    "function",
    "function.builtin",
    "keyword",
    "module",
    "number",
    "operator",
    "property",
    "property.builtin",
    "punctuation",
    "punctuation.bracket",
    "punctuation.delimiter",
    "punctuation.special",
    "string",
    "string.special",
    "tag",
    "type",
    "type.builtin",
    "variable",
    "variable.builtin",
    "variable.parameter",
];
```

Create a highlighter. You need one of these for each thread that you're using for
syntax highlighting:

```rust
use tree_sitter_highlight::Highlighter;

let mut highlighter = Highlighter::new();
```

Load some highlighting queries from the `queries` directory of the language repository:

```rust
use tree_sitter_highlight::HighlightConfiguration;

let javascript_language = tree_sitter_javascript::LANGUAGE.into();

let mut javascript_config = HighlightConfiguration::new(
    javascript_language,
    "javascript",
    tree_sitter_javascript::HIGHLIGHT_QUERY,
    tree_sitter_javascript::INJECTIONS_QUERY,
    tree_sitter_javascript::LOCALS_QUERY,
).unwrap();
```

Configure the recognized names:

```rust
javascript_config.configure(&highlight_names);
```

Highlight some code:

```rust
use tree_sitter_highlight::HighlightEvent;

let highlights = highlighter.highlight(
    &javascript_config,
    b"const x = new Y();",
    None,
    |_| None
).unwrap();

for event in highlights {
    match event.unwrap() {
        HighlightEvent::Source {start, end} => {
            eprintln!("source: {start}-{end}");
        },
        HighlightEvent::HighlightStart(s) => {
            eprintln!("highlight style started: {s:?}");
        },
        HighlightEvent::HighlightEnd => {
            eprintln!("highlight style ended");
        },
    }
}
```

The last parameter to `highlight` is a _language injection_ callback. This allows
other languages to be retrieved when Tree-sitter detects an embedded document
(for example, a piece of JavaScript code inside a `script` tag within HTML).



================================================
FILE: crates/highlight/Cargo.toml
================================================
[package]
name = "tree-sitter-highlight"
version.workspace = true
description = "Library for performing syntax highlighting with Tree-sitter"
authors = [
  "Max Brunsfeld <maxbrunsfeld@gmail.com>",
  "Tim Clem <timothy.clem@gmail.com>",
]
edition.workspace = true
rust-version.workspace = true
readme = "README.md"
homepage.workspace = true
repository.workspace = true
documentation = "https://docs.rs/tree-sitter-highlight"
license.workspace = true
keywords = ["incremental", "parsing", "syntax", "highlighting"]
categories = ["parsing", "text-editors"]

[lints]
workspace = true

[lib]
path = "src/highlight.rs"
crate-type = ["lib", "staticlib"]

[dependencies]
regex.workspace = true
thiserror.workspace = true
streaming-iterator.workspace = true

tree-sitter.workspace = true



================================================
FILE: crates/highlight/include/tree_sitter/highlight.h
================================================
#ifndef TREE_SITTER_HIGHLIGHT_H_
#define TREE_SITTER_HIGHLIGHT_H_

#ifdef __cplusplus
extern "C" {
#endif

#include <stdint.h>

typedef enum {
  TSHighlightOk,
  TSHighlightUnknownScope,
  TSHighlightTimeout,
  TSHighlightInvalidLanguage,
  TSHighlightInvalidUtf8,
  TSHighlightInvalidRegex,
  TSHighlightInvalidQuery,
} TSHighlightError;

typedef struct TSHighlighter TSHighlighter;
typedef struct TSHighlightBuffer TSHighlightBuffer;

// Construct a `TSHighlighter` by providing a list of strings containing
// the HTML attributes that should be applied for each highlight value.
TSHighlighter *ts_highlighter_new(
  const char **highlight_names,
  const char **attribute_strings,
  uint32_t highlight_count
);

// Delete a syntax highlighter.
void ts_highlighter_delete(TSHighlighter *);

// Add a `TSLanguage` to a highlighter. The language is associated with a
// scope name, which can be used later to select a language for syntax
// highlighting. Along with the language, you must provide a JSON string
// containing the compiled PropertySheet to use for syntax highlighting
// with that language. You can also optionally provide an 'injection regex',
// which is used to detect when this language has been embedded in a document
// written in a different language.
TSHighlightError ts_highlighter_add_language(
  TSHighlighter *self,
  const char *language_name,
  const char *scope_name,
  const char *injection_regex,
  const TSLanguage *language,
  const char *highlight_query,
  const char *injection_query,
  const char *locals_query,
  uint32_t highlight_query_len,
  uint32_t injection_query_len,
  uint32_t locals_query_len
);

// Compute syntax highlighting for a given document. You must first
// create a `TSHighlightBuffer` to hold the output.
TSHighlightError ts_highlighter_highlight(
  const TSHighlighter *self,
  const char *scope_name,
  const char *source_code,
  uint32_t source_code_len,
  TSHighlightBuffer *output,
  const size_t *cancellation_flag
);

// TSHighlightBuffer: This struct stores the HTML output of syntax
// highlighting. It can be reused for multiple highlighting calls.
TSHighlightBuffer *ts_highlight_buffer_new();

// Delete a highlight buffer.
void ts_highlight_buffer_delete(TSHighlightBuffer *);

// Access the HTML content of a highlight buffer.
const uint8_t *ts_highlight_buffer_content(const TSHighlightBuffer *);
const uint32_t *ts_highlight_buffer_line_offsets(const TSHighlightBuffer *);
uint32_t ts_highlight_buffer_len(const TSHighlightBuffer *);
uint32_t ts_highlight_buffer_line_count(const TSHighlightBuffer *);

#ifdef __cplusplus
}
#endif

#endif  // TREE_SITTER_HIGHLIGHT_H_



================================================
FILE: crates/highlight/src/c_lib.rs
================================================
use std::{
    collections::HashMap, ffi::CStr, fmt, os::raw::c_char, process::abort, slice, str,
    sync::atomic::AtomicUsize,
};

use regex::Regex;
use tree_sitter::Language;

use super::{Error, Highlight, HighlightConfiguration, Highlighter, HtmlRenderer};

pub struct TSHighlighter {
    pub languages: HashMap<String, (Option<Regex>, HighlightConfiguration)>,
    pub attribute_strings: Vec<&'static [u8]>,
    pub highlight_names: Vec<String>,
    pub carriage_return_index: Option<usize>,
}

pub struct TSHighlightBuffer {
    highlighter: Highlighter,
    renderer: HtmlRenderer,
}

#[repr(C)]
pub enum ErrorCode {
    Ok,
    UnknownScope,
    Timeout,
    InvalidLanguage,
    InvalidUtf8,
    InvalidRegex,
    InvalidQuery,
    InvalidLanguageName,
}

/// Create a new [`TSHighlighter`] instance.
///
/// # Safety
///
/// The caller must ensure that the `highlight_names` and `attribute_strings` arrays are valid for
/// the lifetime of the returned [`TSHighlighter`] instance, and are non-null.
#[no_mangle]
pub unsafe extern "C" fn ts_highlighter_new(
    highlight_names: *const *const c_char,
    attribute_strings: *const *const c_char,
    highlight_count: u32,
) -> *mut TSHighlighter {
    let highlight_names = slice::from_raw_parts(highlight_names, highlight_count as usize);
    let attribute_strings = slice::from_raw_parts(attribute_strings, highlight_count as usize);
    let highlight_names = highlight_names
        .iter()
        .map(|s| CStr::from_ptr(*s).to_string_lossy().to_string())
        .collect::<Vec<_>>();
    let attribute_strings = attribute_strings
        .iter()
        .map(|s| CStr::from_ptr(*s).to_bytes())
        .collect();
    let carriage_return_index = highlight_names.iter().position(|s| s == "carriage-return");
    Box::into_raw(Box::new(TSHighlighter {
        languages: HashMap::new(),
        attribute_strings,
        highlight_names,
        carriage_return_index,
    }))
}

/// Add a language to a [`TSHighlighter`] instance.
///
/// Returns an [`ErrorCode`] indicating whether the language was added successfully or not.
///
/// # Safety
///
/// `this` must be non-null and must be a valid pointer to a [`TSHighlighter`] instance
/// created by [`ts_highlighter_new`].
///
/// The caller must ensure that any `*const c_char` (C-style string) parameters are valid for the
/// lifetime of the [`TSHighlighter`] instance, and are non-null.
#[no_mangle]
pub unsafe extern "C" fn ts_highlighter_add_language(
    this: *mut TSHighlighter,
    language_name: *const c_char,
    scope_name: *const c_char,
    injection_regex: *const c_char,
    language: Language,
    highlight_query: *const c_char,
    injection_query: *const c_char,
    locals_query: *const c_char,
    highlight_query_len: u32,
    injection_query_len: u32,
    locals_query_len: u32,
) -> ErrorCode {
    let f = move || {
        let this = unwrap_mut_ptr(this);
        let scope_name = CStr::from_ptr(scope_name);
        let scope_name = scope_name
            .to_str()
            .or(Err(ErrorCode::InvalidUtf8))?
            .to_string();
        let injection_regex = if injection_regex.is_null() {
            None
        } else {
            let pattern = CStr::from_ptr(injection_regex);
            let pattern = pattern.to_str().or(Err(ErrorCode::InvalidUtf8))?;
            Some(Regex::new(pattern).or(Err(ErrorCode::InvalidRegex))?)
        };

        let highlight_query =
            slice::from_raw_parts(highlight_query.cast::<u8>(), highlight_query_len as usize);

        let highlight_query = str::from_utf8(highlight_query).or(Err(ErrorCode::InvalidUtf8))?;

        let injection_query = if injection_query_len > 0 {
            let query =
                slice::from_raw_parts(injection_query.cast::<u8>(), injection_query_len as usize);
            str::from_utf8(query).or(Err(ErrorCode::InvalidUtf8))?
        } else {
            ""
        };

        let locals_query = if locals_query_len > 0 {
            let query = slice::from_raw_parts(locals_query.cast::<u8>(), locals_query_len as usize);
            str::from_utf8(query).or(Err(ErrorCode::InvalidUtf8))?
        } else {
            ""
        };

        let lang = CStr::from_ptr(language_name)
            .to_str()
            .or(Err(ErrorCode::InvalidLanguageName))?;

        let mut config = HighlightConfiguration::new(
            language,
            lang,
            highlight_query,
            injection_query,
            locals_query,
        )
        .or(Err(ErrorCode::InvalidQuery))?;
        config.configure(this.highlight_names.as_slice());
        this.languages.insert(scope_name, (injection_regex, config));

        Ok(())
    };

    match f() {
        Ok(()) => ErrorCode::Ok,
        Err(e) => e,
    }
}

#[no_mangle]
pub extern "C" fn ts_highlight_buffer_new() -> *mut TSHighlightBuffer {
    Box::into_raw(Box::new(TSHighlightBuffer {
        highlighter: Highlighter::new(),
        renderer: HtmlRenderer::new(),
    }))
}

/// Deletes a [`TSHighlighter`] instance.
///
/// # Safety
///
/// `this` must be non-null and must be a valid pointer to a [`TSHighlighter`] instance
/// created by [`ts_highlighter_new`].
///
/// It cannot be used after this function is called.
#[no_mangle]
pub unsafe extern "C" fn ts_highlighter_delete(this: *mut TSHighlighter) {
    drop(Box::from_raw(this));
}

/// Deletes a [`TSHighlightBuffer`] instance.
///
/// # Safety
///
/// `this` must be non-null and must be a valid pointer to a [`TSHighlightBuffer`] instance
/// created by [`ts_highlight_buffer_new`]
///
/// It cannot be used after this function is called.
#[no_mangle]
pub unsafe extern "C" fn ts_highlight_buffer_delete(this: *mut TSHighlightBuffer) {
    drop(Box::from_raw(this));
}

/// Get the HTML content of a [`TSHighlightBuffer`] instance as a raw pointer.
///
/// # Safety
///
/// `this` must be non-null and must be a valid pointer to a [`TSHighlightBuffer`] instance
/// created by [`ts_highlight_buffer_new`].
///
/// The returned pointer, a C-style string, must not outlive the [`TSHighlightBuffer`] instance,
/// else the data will point to garbage.
///
/// To get the length of the HTML content, use [`ts_highlight_buffer_len`].
#[no_mangle]
pub unsafe extern "C" fn ts_highlight_buffer_content(this: *const TSHighlightBuffer) -> *const u8 {
    let this = unwrap_ptr(this);
    this.renderer.html.as_slice().as_ptr()
}

/// Get the line offsets of a [`TSHighlightBuffer`] instance as a C-style array.
///
/// # Safety
///
/// `this` must be non-null and must be a valid pointer to a [`TSHighlightBuffer`] instance
/// created by [`ts_highlight_buffer_new`].
///
/// The returned pointer, a C-style array of [`u32`]s, must not outlive the [`TSHighlightBuffer`]
/// instance, else the data will point to garbage.
///
/// To get the length of the array, use [`ts_highlight_buffer_line_count`].
#[no_mangle]
pub unsafe extern "C" fn ts_highlight_buffer_line_offsets(
    this: *const TSHighlightBuffer,
) -> *const u32 {
    let this = unwrap_ptr(this);
    this.renderer.line_offsets.as_slice().as_ptr()
}

/// Get the length of the HTML content of a [`TSHighlightBuffer`] instance.
///
/// # Safety
///
/// `this` must be non-null and must be a valid pointer to a [`TSHighlightBuffer`] instance
/// created by [`ts_highlight_buffer_new`].
#[no_mangle]
pub unsafe extern "C" fn ts_highlight_buffer_len(this: *const TSHighlightBuffer) -> u32 {
    let this = unwrap_ptr(this);
    this.renderer.html.len() as u32
}

/// Get the number of lines in a [`TSHighlightBuffer`] instance.
///
/// # Safety
///
/// `this` must be non-null and must be a valid pointer to a [`TSHighlightBuffer`] instance
/// created by [`ts_highlight_buffer_new`].
#[no_mangle]
pub unsafe extern "C" fn ts_highlight_buffer_line_count(this: *const TSHighlightBuffer) -> u32 {
    let this = unwrap_ptr(this);
    this.renderer.line_offsets.len() as u32
}

/// Highlight a string of source code.
///
/// # Safety
///
/// The caller must ensure that `scope_name`, `source_code`, `output`, and `cancellation_flag` are
/// valid for the lifetime of the [`TSHighlighter`] instance, and are non-null.
///
/// `this` must be a non-null pointer to a [`TSHighlighter`] instance created by
/// [`ts_highlighter_new`]
#[no_mangle]
pub unsafe extern "C" fn ts_highlighter_highlight(
    this: *const TSHighlighter,
    scope_name: *const c_char,
    source_code: *const c_char,
    source_code_len: u32,
    output: *mut TSHighlightBuffer,
    cancellation_flag: *const AtomicUsize,
) -> ErrorCode {
    let this = unwrap_ptr(this);
    let output = unwrap_mut_ptr(output);
    let scope_name = unwrap(CStr::from_ptr(scope_name).to_str());
    let source_code = slice::from_raw_parts(source_code.cast::<u8>(), source_code_len as usize);
    let cancellation_flag = cancellation_flag.as_ref();
    this.highlight(source_code, scope_name, output, cancellation_flag)
}

impl TSHighlighter {
    fn highlight(
        &self,
        source_code: &[u8],
        scope_name: &str,
        output: &mut TSHighlightBuffer,
        cancellation_flag: Option<&AtomicUsize>,
    ) -> ErrorCode {
        let entry = self.languages.get(scope_name);
        if entry.is_none() {
            return ErrorCode::UnknownScope;
        }
        let (_, configuration) = entry.unwrap();
        let languages = &self.languages;

        let highlights = output.highlighter.highlight(
            configuration,
            source_code,
            cancellation_flag,
            move |injection_string| {
                languages.values().find_map(|(injection_regex, config)| {
                    injection_regex.as_ref().and_then(|regex| {
                        if regex.is_match(injection_string) {
                            Some(config)
                        } else {
                            None
                        }
                    })
                })
            },
        );

        if let Ok(highlights) = highlights {
            output.renderer.reset();
            output
                .renderer
                .set_carriage_return_highlight(self.carriage_return_index.map(Highlight));
            let result = output.renderer.render(highlights, source_code, &|s, out| {
                out.extend(self.attribute_strings[s.0]);
            });
            match result {
                Err(Error::Cancelled | Error::Unknown) => ErrorCode::Timeout,
                Err(Error::InvalidLanguage) => ErrorCode::InvalidLanguage,
                Ok(()) => ErrorCode::Ok,
            }
        } else {
            ErrorCode::Timeout
        }
    }
}

unsafe fn unwrap_ptr<'a, T>(result: *const T) -> &'a T {
    result.as_ref().unwrap_or_else(|| {
        eprintln!("{}:{} - pointer must not be null", file!(), line!());
        abort();
    })
}

unsafe fn unwrap_mut_ptr<'a, T>(result: *mut T) -> &'a mut T {
    result.as_mut().unwrap_or_else(|| {
        eprintln!("{}:{} - pointer must not be null", file!(), line!());
        abort();
    })
}

fn unwrap<T, E: fmt::Display>(result: Result<T, E>) -> T {
    result.unwrap_or_else(|error| {
        eprintln!("tree-sitter highlight error: {error}");
        abort();
    })
}



================================================
FILE: crates/highlight/src/highlight.rs
================================================
#![doc = include_str!("../README.md")]

pub mod c_lib;
use core::slice;
use std::{
    collections::HashSet,
    iter,
    marker::PhantomData,
    mem::{self, MaybeUninit},
    ops, str,
    sync::{
        atomic::{AtomicUsize, Ordering},
        LazyLock,
    },
};

pub use c_lib as c;
use streaming_iterator::StreamingIterator;
use thiserror::Error;
use tree_sitter::{
    ffi, Language, LossyUtf8, Node, ParseOptions, Parser, Point, Query, QueryCapture,
    QueryCaptures, QueryCursor, QueryError, QueryMatch, Range, TextProvider, Tree,
};

const CANCELLATION_CHECK_INTERVAL: usize = 100;
const BUFFER_HTML_RESERVE_CAPACITY: usize = 10 * 1024;
const BUFFER_LINES_RESERVE_CAPACITY: usize = 1000;

static STANDARD_CAPTURE_NAMES: LazyLock<HashSet<&'static str>> = LazyLock::new(|| {
    vec![
        "attribute",
        "boolean",
        "carriage-return",
        "comment",
        "comment.documentation",
        "constant",
        "constant.builtin",
        "constructor",
        "constructor.builtin",
        "embedded",
        "error",
        "escape",
        "function",
        "function.builtin",
        "keyword",
        "markup",
        "markup.bold",
        "markup.heading",
        "markup.italic",
        "markup.link",
        "markup.link.url",
        "markup.list",
        "markup.list.checked",
        "markup.list.numbered",
        "markup.list.unchecked",
        "markup.list.unnumbered",
        "markup.quote",
        "markup.raw",
        "markup.raw.block",
        "markup.raw.inline",
        "markup.strikethrough",
        "module",
        "number",
        "operator",
        "property",
        "property.builtin",
        "punctuation",
        "punctuation.bracket",
        "punctuation.delimiter",
        "punctuation.special",
        "string",
        "string.escape",
        "string.regexp",
        "string.special",
        "string.special.symbol",
        "tag",
        "type",
        "type.builtin",
        "variable",
        "variable.builtin",
        "variable.member",
        "variable.parameter",
    ]
    .into_iter()
    .collect()
});

/// Indicates which highlight should be applied to a region of source code.
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub struct Highlight(pub usize);

/// Represents the reason why syntax highlighting failed.
#[derive(Debug, Error, PartialEq, Eq)]
pub enum Error {
    #[error("Cancelled")]
    Cancelled,
    #[error("Invalid language")]
    InvalidLanguage,
    #[error("Unknown error")]
    Unknown,
}

/// Represents a single step in rendering a syntax-highlighted document.
#[derive(Copy, Clone, Debug)]
pub enum HighlightEvent {
    Source { start: usize, end: usize },
    HighlightStart(Highlight),
    HighlightEnd,
}

/// Contains the data needed to highlight code written in a particular language.
///
/// This struct is immutable and can be shared between threads.
pub struct HighlightConfiguration {
    pub language: Language,
    pub language_name: String,
    pub query: Query,
    combined_injections_query: Option<Query>,
    locals_pattern_index: usize,
    highlights_pattern_index: usize,
    highlight_indices: Vec<Option<Highlight>>,
    non_local_variable_patterns: Vec<bool>,
    injection_content_capture_index: Option<u32>,
    injection_language_capture_index: Option<u32>,
    local_scope_capture_index: Option<u32>,
    local_def_capture_index: Option<u32>,
    local_def_value_capture_index: Option<u32>,
    local_ref_capture_index: Option<u32>,
}

/// Performs syntax highlighting, recognizing a given list of highlight names.
///
/// For the best performance `Highlighter` values should be reused between
/// syntax highlighting calls. A separate highlighter is needed for each thread that
/// is performing highlighting.
pub struct Highlighter {
    pub parser: Parser,
    cursors: Vec<QueryCursor>,
}

/// Converts a general-purpose syntax highlighting iterator into a sequence of lines of HTML.
pub struct HtmlRenderer {
    pub html: Vec<u8>,
    pub line_offsets: Vec<u32>,
    carriage_return_highlight: Option<Highlight>,
    // The offset in `self.html` of the last carriage return.
    last_carriage_return: Option<usize>,
}

#[derive(Debug)]
struct LocalDef<'a> {
    name: &'a str,
    value_range: ops::Range<usize>,
    highlight: Option<Highlight>,
}

#[derive(Debug)]
struct LocalScope<'a> {
    inherits: bool,
    range: ops::Range<usize>,
    local_defs: Vec<LocalDef<'a>>,
}

struct HighlightIter<'a, F>
where
    F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a,
{
    source: &'a [u8],
    language_name: &'a str,
    byte_offset: usize,
    highlighter: &'a mut Highlighter,
    injection_callback: F,
    cancellation_flag: Option<&'a AtomicUsize>,
    layers: Vec<HighlightIterLayer<'a>>,
    iter_count: usize,
    next_event: Option<HighlightEvent>,
    last_highlight_range: Option<(usize, usize, usize)>,
}

struct HighlightIterLayer<'a> {
    _tree: Tree,
    cursor: QueryCursor,
    captures: iter::Peekable<_QueryCaptures<'a, 'a, &'a [u8], &'a [u8]>>,
    config: &'a HighlightConfiguration,
    highlight_end_stack: Vec<usize>,
    scope_stack: Vec<LocalScope<'a>>,
    ranges: Vec<Range>,
    depth: usize,
}

pub struct _QueryCaptures<'query, 'tree: 'query, T: TextProvider<I>, I: AsRef<[u8]>> {
    ptr: *mut ffi::TSQueryCursor,
    query: &'query Query,
    text_provider: T,
    buffer1: Vec<u8>,
    buffer2: Vec<u8>,
    _current_match: Option<(QueryMatch<'query, 'tree>, usize)>,
    _options: Option<*mut ffi::TSQueryCursorOptions>,
    _phantom: PhantomData<(&'tree (), I)>,
}

struct _QueryMatch<'cursor, 'tree> {
    pub _pattern_index: usize,
    pub _captures: &'cursor [QueryCapture<'tree>],
    _id: u32,
    _cursor: *mut ffi::TSQueryCursor,
}

impl<'tree> _QueryMatch<'_, 'tree> {
    fn new(m: &ffi::TSQueryMatch, cursor: *mut ffi::TSQueryCursor) -> Self {
        _QueryMatch {
            _cursor: cursor,
            _id: m.id,
            _pattern_index: m.pattern_index as usize,
            _captures: (m.capture_count > 0)
                .then(|| unsafe {
                    slice::from_raw_parts(
                        m.captures.cast::<QueryCapture<'tree>>(),
                        m.capture_count as usize,
                    )
                })
                .unwrap_or_default(),
        }
    }
}

impl<'query, 'tree: 'query, T: TextProvider<I>, I: AsRef<[u8]>> Iterator
    for _QueryCaptures<'query, 'tree, T, I>
{
    type Item = (QueryMatch<'query, 'tree>, usize);

    fn next(&mut self) -> Option<Self::Item> {
        unsafe {
            loop {
                let mut capture_index = 0u32;
                let mut m = MaybeUninit::<ffi::TSQueryMatch>::uninit();
                if ffi::ts_query_cursor_next_capture(
                    self.ptr,
                    m.as_mut_ptr(),
                    core::ptr::addr_of_mut!(capture_index),
                ) {
                    let result = std::mem::transmute::<_QueryMatch, QueryMatch>(_QueryMatch::new(
                        &m.assume_init(),
                        self.ptr,
                    ));
                    if result.satisfies_text_predicates(
                        self.query,
                        &mut self.buffer1,
                        &mut self.buffer2,
                        &mut self.text_provider,
                    ) {
                        return Some((result, capture_index as usize));
                    }
                    result.remove();
                } else {
                    return None;
                }
            }
        }
    }
}

impl Default for Highlighter {
    fn default() -> Self {
        Self::new()
    }
}

impl Highlighter {
    #[must_use]
    pub fn new() -> Self {
        Self {
            parser: Parser::new(),
            cursors: Vec::new(),
        }
    }

    pub fn parser(&mut self) -> &mut Parser {
        &mut self.parser
    }

    /// Iterate over the highlighted regions for a given slice of source code.
    pub fn highlight<'a>(
        &'a mut self,
        config: &'a HighlightConfiguration,
        source: &'a [u8],
        cancellation_flag: Option<&'a AtomicUsize>,
        mut injection_callback: impl FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a,
    ) -> Result<impl Iterator<Item = Result<HighlightEvent, Error>> + 'a, Error> {
        let layers = HighlightIterLayer::new(
            source,
            None,
            self,
            cancellation_flag,
            &mut injection_callback,
            config,
            0,
            vec![Range {
                start_byte: 0,
                end_byte: usize::MAX,
                start_point: Point::new(0, 0),
                end_point: Point::new(usize::MAX, usize::MAX),
            }],
        )?;
        assert_ne!(layers.len(), 0);
        let mut result = HighlightIter {
            source,
            language_name: &config.language_name,
            byte_offset: 0,
            injection_callback,
            cancellation_flag,
            highlighter: self,
            iter_count: 0,
            layers,
            next_event: None,
            last_highlight_range: None,
        };
        result.sort_layers();
        Ok(result)
    }
}

impl HighlightConfiguration {
    /// Creates a `HighlightConfiguration` for a given `Language` and set of highlighting
    /// queries.
    ///
    /// # Parameters
    ///
    /// * `language`  - The Tree-sitter `Language` that should be used for parsing.
    /// * `highlights_query` - A string containing tree patterns for syntax highlighting. This
    ///   should be non-empty, otherwise no syntax highlights will be added.
    /// * `injections_query` -  A string containing tree patterns for injecting other languages into
    ///   the document. This can be empty if no injections are desired.
    /// * `locals_query` - A string containing tree patterns for tracking local variable definitions
    ///   and references. This can be empty if local variable tracking is not needed.
    ///
    /// Returns a `HighlightConfiguration` that can then be used with the `highlight` method.
    pub fn new(
        language: Language,
        name: impl Into<String>,
        highlights_query: &str,
        injection_query: &str,
        locals_query: &str,
    ) -> Result<Self, QueryError> {
        // Concatenate the query strings, keeping track of the start offset of each section.
        let mut query_source = String::new();
        query_source.push_str(injection_query);
        let locals_query_offset = query_source.len();
        query_source.push_str(locals_query);
        let highlights_query_offset = query_source.len();
        query_source.push_str(highlights_query);

        // Construct a single query by concatenating the three query strings, but record the
        // range of pattern indices that belong to each individual string.
        let mut query = Query::new(&language, &query_source)?;
        let mut locals_pattern_index = 0;
        let mut highlights_pattern_index = 0;
        for i in 0..(query.pattern_count()) {
            let pattern_offset = query.start_byte_for_pattern(i);
            if pattern_offset < highlights_query_offset {
                if pattern_offset < highlights_query_offset {
                    highlights_pattern_index += 1;
                }
                if pattern_offset < locals_query_offset {
                    locals_pattern_index += 1;
                }
            }
        }

        // Construct a separate query just for dealing with the 'combined injections'.
        // Disable the combined injection patterns in the main query.
        let mut combined_injections_query = Query::new(&language, injection_query)?;
        let mut has_combined_queries = false;
        for pattern_index in 0..locals_pattern_index {
            let settings = query.property_settings(pattern_index);
            if settings.iter().any(|s| &*s.key == "injection.combined") {
                has_combined_queries = true;
                query.disable_pattern(pattern_index);
            } else {
                combined_injections_query.disable_pattern(pattern_index);
            }
        }
        let combined_injections_query = if has_combined_queries {
            Some(combined_injections_query)
        } else {
            None
        };

        // Find all of the highlighting patterns that are disabled for nodes that
        // have been identified as local variables.
        let non_local_variable_patterns = (0..query.pattern_count())
            .map(|i| {
                query
                    .property_predicates(i)
                    .iter()
                    .any(|(prop, positive)| !*positive && prop.key.as_ref() == "local")
            })
            .collect();

        // Store the numeric ids for all of the special captures.
        let mut injection_content_capture_index = None;
        let mut injection_language_capture_index = None;
        let mut local_def_capture_index = None;
        let mut local_def_value_capture_index = None;
        let mut local_ref_capture_index = None;
        let mut local_scope_capture_index = None;
        for (i, name) in query.capture_names().iter().enumerate() {
            let i = Some(i as u32);
            match *name {
                "injection.content" => injection_content_capture_index = i,
                "injection.language" => injection_language_capture_index = i,
                "local.definition" => local_def_capture_index = i,
                "local.definition-value" => local_def_value_capture_index = i,
                "local.reference" => local_ref_capture_index = i,
                "local.scope" => local_scope_capture_index = i,
                _ => {}
            }
        }

        let highlight_indices = vec![None; query.capture_names().len()];
        Ok(Self {
            language,
            language_name: name.into(),
            query,
            combined_injections_query,
            locals_pattern_index,
            highlights_pattern_index,
            highlight_indices,
            non_local_variable_patterns,
            injection_content_capture_index,
            injection_language_capture_index,
            local_def_capture_index,
            local_def_value_capture_index,
            local_ref_capture_index,
            local_scope_capture_index,
        })
    }

    /// Get a slice containing all of the highlight names used in the configuration.
    #[must_use]
    pub const fn names(&self) -> &[&str] {
        self.query.capture_names()
    }

    /// Set the list of recognized highlight names.
    ///
    /// Tree-sitter syntax-highlighting queries specify highlights in the form of dot-separated
    /// highlight names like `punctuation.bracket` and `function.method.builtin`. Consumers of
    /// these queries can choose to recognize highlights with different levels of specificity.
    /// For example, the string `function.builtin` will match against `function.method.builtin`
    /// and `function.builtin.constructor`, but will not match `function.method`.
    ///
    /// When highlighting, results are returned as `Highlight` values, which contain the index
    /// of the matched highlight this list of highlight names.
    pub fn configure(&mut self, recognized_names: &[impl AsRef<str>]) {
        let mut capture_parts = Vec::new();
        self.highlight_indices.clear();
        self.highlight_indices
            .extend(self.query.capture_names().iter().map(move |capture_name| {
                capture_parts.clear();
                capture_parts.extend(capture_name.split('.'));

                let mut best_index = None;
                let mut best_match_len = 0;
                for (i, recognized_name) in recognized_names.iter().enumerate() {
                    let mut len = 0;
                    let mut matches = true;
                    for part in recognized_name.as_ref().split('.') {
                        len += 1;
                        if !capture_parts.contains(&part) {
                            matches = false;
                            break;
                        }
                    }
                    if matches && len > best_match_len {
                        best_index = Some(i);
                        best_match_len = len;
                    }
                }
                best_index.map(Highlight)
            }));
    }

    // Return the list of this configuration's capture names that are neither present in the
    // list of predefined 'canonical' names nor start with an underscore (denoting 'private'
    // captures used as part of capture internals).
    #[must_use]
    pub fn nonconformant_capture_names(&self, capture_names: &HashSet<&str>) -> Vec<&str> {
        let capture_names = if capture_names.is_empty() {
            &*STANDARD_CAPTURE_NAMES
        } else {
            capture_names
        };
        self.names()
            .iter()
            .filter(|&n| !(n.starts_with('_') || capture_names.contains(n)))
            .copied()
            .collect()
    }
}

impl<'a> HighlightIterLayer<'a> {
    /// Create a new 'layer' of highlighting for this document.
    ///
    /// In the event that the new layer contains "combined injections" (injections where multiple
    /// disjoint ranges are parsed as one syntax tree), these will be eagerly processed and
    /// added to the returned vector.
    #[allow(clippy::too_many_arguments)]
    fn new<F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a>(
        source: &'a [u8],
        parent_name: Option<&str>,
        highlighter: &mut Highlighter,
        cancellation_flag: Option<&'a AtomicUsize>,
        injection_callback: &mut F,
        mut config: &'a HighlightConfiguration,
        mut depth: usize,
        mut ranges: Vec<Range>,
    ) -> Result<Vec<Self>, Error> {
        let mut result = Vec::with_capacity(1);
        let mut queue = Vec::new();
        loop {
            if highlighter.parser.set_included_ranges(&ranges).is_ok() {
                highlighter
                    .parser
                    .set_language(&config.language)
                    .map_err(|_| Error::InvalidLanguage)?;

                let tree = highlighter
                    .parser
                    .parse_with_options(
                        &mut |i, _| {
                            if i < source.len() {
                                &source[i..]
                            } else {
                                &[]
                            }
                        },
                        None,
                        Some(ParseOptions::new().progress_callback(&mut |_| {
                            if let Some(cancellation_flag) = cancellation_flag {
                                cancellation_flag.load(Ordering::SeqCst) != 0
                            } else {
                                false
                            }
                        })),
                    )
                    .ok_or(Error::Cancelled)?;
                let mut cursor = highlighter.cursors.pop().unwrap_or_default();

                // Process combined injections.
                if let Some(combined_injections_query) = &config.combined_injections_query {
                    let mut injections_by_pattern_index =
                        vec![(None, Vec::new(), false); combined_injections_query.pattern_count()];
                    let mut matches =
                        cursor.matches(combined_injections_query, tree.root_node(), source);
                    while let Some(mat) = matches.next() {
                        let entry = &mut injections_by_pattern_index[mat.pattern_index];
                        let (language_name, content_node, include_children) = injection_for_match(
                            config,
                            parent_name,
                            combined_injections_query,
                            mat,
                            source,
                        );
                        if language_name.is_some() {
                            entry.0 = language_name;
                        }
                        if let Some(content_node) = content_node {
                            entry.1.push(content_node);
                        }
                        entry.2 = include_children;
                    }
                    for (lang_name, content_nodes, includes_children) in injections_by_pattern_index
                    {
                        if let (Some(lang_name), false) = (lang_name, content_nodes.is_empty()) {
                            if let Some(next_config) = (injection_callback)(lang_name) {
                                let ranges = Self::intersect_ranges(
                                    &ranges,
                                    &content_nodes,
                                    includes_children,
                                );
                                if !ranges.is_empty() {
                                    queue.push((next_config, depth + 1, ranges));
                                }
                            }
                        }
                    }
                }

                // The `captures` iterator borrows the `Tree` and the `QueryCursor`, which
                // prevents them from being moved. But both of these values are really just
                // pointers, so it's actually ok to move them.
                let tree_ref = unsafe { mem::transmute::<&Tree, &'static Tree>(&tree) };
                let cursor_ref = unsafe {
                    mem::transmute::<&mut QueryCursor, &'static mut QueryCursor>(&mut cursor)
                };
                let captures = unsafe {
                    std::mem::transmute::<QueryCaptures<_, _>, _QueryCaptures<_, _>>(
                        cursor_ref.captures(&config.query, tree_ref.root_node(), source),
                    )
                }
                .peekable();

                result.push(HighlightIterLayer {
                    highlight_end_stack: Vec::new(),
                    scope_stack: vec![LocalScope {
                        inherits: false,
                        range: 0..usize::MAX,
                        local_defs: Vec::new(),
                    }],
                    cursor,
                    depth,
                    _tree: tree,
                    captures,
                    config,
                    ranges,
                });
            }

            if queue.is_empty() {
                break;
            }

            let (next_config, next_depth, next_ranges) = queue.remove(0);
            config = next_config;
            depth = next_depth;
            ranges = next_ranges;
        }

        Ok(result)
    }

    // Compute the ranges that should be included when parsing an injection.
    // This takes into account three things:
    // * `parent_ranges` - The ranges must all fall within the *current* layer's ranges.
    // * `nodes` - Every injection takes place within a set of nodes. The injection ranges are the
    //   ranges of those nodes.
    // * `includes_children` - For some injections, the content nodes' children should be excluded
    //   from the nested document, so that only the content nodes' *own* content is reparsed. For
    //   other injections, the content nodes' entire ranges should be reparsed, including the ranges
    //   of their children.
    fn intersect_ranges(
        parent_ranges: &[Range],
        nodes: &[Node],
        includes_children: bool,
    ) -> Vec<Range> {
        let mut cursor = nodes[0].walk();
        let mut result = Vec::new();
        let mut parent_range_iter = parent_ranges.iter();
        let mut parent_range = parent_range_iter
            .next()
            .expect("Layers should only be constructed with non-empty ranges vectors");
        for node in nodes {
            let mut preceding_range = Range {
                start_byte: 0,
                start_point: Point::new(0, 0),
                end_byte: node.start_byte(),
                end_point: node.start_position(),
            };
            let following_range = Range {
                start_byte: node.end_byte(),
                start_point: node.end_position(),
                end_byte: usize::MAX,
                end_point: Point::new(usize::MAX, usize::MAX),
            };

            for excluded_range in node
                .children(&mut cursor)
                .filter_map(|child| {
                    if includes_children {
                        None
                    } else {
                        Some(child.range())
                    }
                })
                .chain(std::iter::once(following_range))
            {
                let mut range = Range {
                    start_byte: preceding_range.end_byte,
                    start_point: preceding_range.end_point,
                    end_byte: excluded_range.start_byte,
                    end_point: excluded_range.start_point,
                };
                preceding_range = excluded_range;

                if range.end_byte < parent_range.start_byte {
                    continue;
                }

                while parent_range.start_byte <= range.end_byte {
                    if parent_range.end_byte > range.start_byte {
                        if range.start_byte < parent_range.start_byte {
                            range.start_byte = parent_range.start_byte;
                            range.start_point = parent_range.start_point;
                        }

                        if parent_range.end_byte < range.end_byte {
                            if range.start_byte < parent_range.end_byte {
                                result.push(Range {
                                    start_byte: range.start_byte,
                                    start_point: range.start_point,
                                    end_byte: parent_range.end_byte,
                                    end_point: parent_range.end_point,
                                });
                            }
                            range.start_byte = parent_range.end_byte;
                            range.start_point = parent_range.end_point;
                        } else {
                            if range.start_byte < range.end_byte {
                                result.push(range);
                            }
                            break;
                        }
                    }

                    if let Some(next_range) = parent_range_iter.next() {
                        parent_range = next_range;
                    } else {
                        return result;
                    }
                }
            }
        }
        result
    }

    // First, sort scope boundaries by their byte offset in the document. At a
    // given position, emit scope endings before scope beginnings. Finally, emit
    // scope boundaries from deeper layers first.
    fn sort_key(&mut self) -> Option<(usize, bool, isize)> {
        let depth = -(self.depth as isize);
        let next_start = self
            .captures
            .peek()
            .map(|(m, i)| m.captures[*i].node.start_byte());
        let next_end = self.highlight_end_stack.last().copied();
        match (next_start, next_end) {
            (Some(start), Some(end)) => {
                if start < end {
                    Some((start, true, depth))
                } else {
                    Some((end, false, depth))
                }
            }
            (Some(i), None) => Some((i, true, depth)),
            (None, Some(j)) => Some((j, false, depth)),
            _ => None,
        }
    }
}

impl<'a, F> HighlightIter<'a, F>
where
    F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a,
{
    fn emit_event(
        &mut self,
        offset: usize,
        event: Option<HighlightEvent>,
    ) -> Option<Result<HighlightEvent, Error>> {
        let result;
        if self.byte_offset < offset {
            result = Some(Ok(HighlightEvent::Source {
                start: self.byte_offset,
                end: offset,
            }));
            self.byte_offset = offset;
            self.next_event = event;
        } else {
            result = event.map(Ok);
        }
        self.sort_layers();
        result
    }

    fn sort_layers(&mut self) {
        while !self.layers.is_empty() {
            if let Some(sort_key) = self.layers[0].sort_key() {
                let mut i = 0;
                while i + 1 < self.layers.len() {
                    if let Some(next_offset) = self.layers[i + 1].sort_key() {
                        if next_offset < sort_key {
                            i += 1;
                            continue;
                        }
                    }
                    break;
                }
                if i > 0 {
                    self.layers[0..=i].rotate_left(1);
                }
                break;
            }
            let layer = self.layers.remove(0);
            self.highlighter.cursors.push(layer.cursor);
        }
    }

    fn insert_layer(&mut self, mut layer: HighlightIterLayer<'a>) {
        if let Some(sort_key) = layer.sort_key() {
            let mut i = 1;
            while i < self.layers.len() {
                if let Some(sort_key_i) = self.layers[i].sort_key() {
                    if sort_key_i > sort_key {
                        self.layers.insert(i, layer);
                        return;
                    }
                    i += 1;
                } else {
                    self.layers.remove(i);
                }
            }
            self.layers.push(layer);
        }
    }
}

impl<'a, F> Iterator for HighlightIter<'a, F>
where
    F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a,
{
    type Item = Result<HighlightEvent, Error>;

    fn next(&mut self) -> Option<Self::Item> {
        'main: loop {
            // If we've already determined the next highlight boundary, just return it.
            if let Some(e) = self.next_event.take() {
                return Some(Ok(e));
            }

            // Periodically check for cancellation, returning `Cancelled` error if the
            // cancellation flag was flipped.
            if let Some(cancellation_flag) = self.cancellation_flag {
                self.iter_count += 1;
                if self.iter_count >= CANCELLATION_CHECK_INTERVAL {
                    self.iter_count = 0;
                    if cancellation_flag.load(Ordering::Relaxed) != 0 {
                        return Some(Err(Error::Cancelled));
                    }
                }
            }

            // If none of the layers have any more highlight boundaries, terminate.
            if self.layers.is_empty() {
                return if self.byte_offset < self.source.len() {
                    let result = Some(Ok(HighlightEvent::Source {
                        start: self.byte_offset,
                        end: self.source.len(),
                    }));
                    self.byte_offset = self.source.len();
                    result
                } else {
                    None
                };
            }

            // Get the next capture from whichever layer has the earliest highlight boundary.
            let range;
            let layer = &mut self.layers[0];
            if let Some((next_match, capture_index)) = layer.captures.peek() {
                let next_capture = next_match.captures[*capture_index];
                range = next_capture.node.byte_range();

                // If any previous highlight ends before this node starts, then before
                // processing this capture, emit the source code up until the end of the
                // previous highlight, and an end event for that highlight.
                if let Some(end_byte) = layer.highlight_end_stack.last().copied() {
                    if end_byte <= range.start {
                        layer.highlight_end_stack.pop();
                        return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd));
                    }
                }
            }
            // If there are no more captures, then emit any remaining highlight end events.
            // And if there are none of those, then just advance to the end of the document.
            else {
                if let Some(end_byte) = layer.highlight_end_stack.last().copied() {
                    layer.highlight_end_stack.pop();
                    return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd));
                }
                return self.emit_event(self.source.len(), None);
            }

            let (mut match_, capture_index) = layer.captures.next().unwrap();
            let mut capture = match_.captures[capture_index];

            // If this capture represents an injection, then process the injection.
            if match_.pattern_index < layer.config.locals_pattern_index {
                let (language_name, content_node, include_children) = injection_for_match(
                    layer.config,
                    Some(self.language_name),
                    &layer.config.query,
                    &match_,
                    self.source,
                );

                // Explicitly remove this match so that none of its other captures will remain
                // in the stream of captures.
                match_.remove();

                // If a language is found with the given name, then add a new language layer
                // to the highlighted document.
                if let (Some(language_name), Some(content_node)) = (language_name, content_node) {
                    if let Some(config) = (self.injection_callback)(language_name) {
                        let ranges = HighlightIterLayer::intersect_ranges(
                            &self.layers[0].ranges,
                            &[content_node],
                            include_children,
                        );
                        if !ranges.is_empty() {
                            match HighlightIterLayer::new(
                                self.source,
                                Some(self.language_name),
                                self.highlighter,
                                self.cancellation_flag,
                                &mut self.injection_callback,
                                config,
                                self.layers[0].depth + 1,
                                ranges,
                            ) {
                                Ok(layers) => {
                                    for layer in layers {
                                        self.insert_layer(layer);
                                    }
                                }
                                Err(e) => return Some(Err(e)),
                            }
                        }
                    }
                }

                self.sort_layers();
                continue 'main;
            }

            // Remove from the local scope stack any local scopes that have already ended.
            while range.start > layer.scope_stack.last().unwrap().range.end {
                layer.scope_stack.pop();
            }

            // If this capture is for tracking local variables, then process the
            // local variable info.
            let mut reference_highlight = None;
            let mut definition_highlight = None;
            while match_.pattern_index < layer.config.highlights_pattern_index {
                // If the node represents a local scope, push a new local scope onto
                // the scope stack.
                if Some(capture.index) == layer.config.local_scope_capture_index {
                    definition_highlight = None;
                    let mut scope = LocalScope {
                        inherits: true,
                        range: range.clone(),
                        local_defs: Vec::new(),
                    };
                    for prop in layer.config.query.property_settings(match_.pattern_index) {
                        if prop.key.as_ref() == "local.scope-inherits" {
                            scope.inherits =
                                prop.value.as_ref().is_none_or(|r| r.as_ref() == "true");
                        }
                    }
                    layer.scope_stack.push(scope);
                }
                // If the node represents a definition, add a new definition to the
                // local scope at the top of the scope stack.
                else if Some(capture.index) == layer.config.local_def_capture_index {
                    reference_highlight = None;
                    definition_highlight = None;
                    let scope = layer.scope_stack.last_mut().unwrap();

                    let mut value_range = 0..0;
                    for capture in match_.captures {
                        if Some(capture.index) == layer.config.local_def_value_capture_index {
                            value_range = capture.node.byte_range();
                        }
                    }

                    if let Ok(name) = str::from_utf8(&self.source[range.clone()]) {
                        scope.local_defs.push(LocalDef {
                            name,
                            value_range,
                            highlight: None,
                        });
                        definition_highlight =
                            scope.local_defs.last_mut().map(|s| &mut s.highlight);
                    }
                }
                // If the node represents a reference, then try to find the corresponding
                // definition in the scope stack.
                else if Some(capture.index) == layer.config.local_ref_capture_index
                    && definition_highlight.is_none()
                {
                    definition_highlight = None;
                    if let Ok(name) = str::from_utf8(&self.source[range.clone()]) {
                        for scope in layer.scope_stack.iter().rev() {
                            if let Some(highlight) = scope.local_defs.iter().rev().find_map(|def| {
                                if def.name == name && range.start >= def.value_range.end {
                                    Some(def.highlight)
                                } else {
                                    None
                                }
                            }) {
                                reference_highlight = highlight;
                                break;
                            }
                            if !scope.inherits {
                                break;
                            }
                        }
                    }
                }

                // Continue processing any additional matches for the same node.
                if let Some((next_match, next_capture_index)) = layer.captures.peek() {
                    let next_capture = next_match.captures[*next_capture_index];
                    if next_capture.node == capture.node {
                        capture = next_capture;
                        match_ = layer.captures.next().unwrap().0;
                        continue;
                    }
                }

                self.sort_layers();
                continue 'main;
            }

            // Otherwise, this capture must represent a highlight.
            // If this exact range has already been highlighted by an earlier pattern, or by
            // a different layer, then skip over this one.
            if let Some((last_start, last_end, last_depth)) = self.last_highlight_range {
                if range.start == last_start && range.end == last_end && layer.depth < last_depth {
                    self.sort_layers();
                    continue 'main;
                }
            }

            // Once a highlighting pattern is found for the current node, keep iterating over
            // any later highlighting patterns that also match this node and set the match to it.
            // Captures for a given node are ordered by pattern index, so these subsequent
            // captures are guaranteed to be for highlighting, not injections or
            // local variables.
            while let Some((next_match, next_capture_index)) = layer.captures.peek() {
                let next_capture = next_match.captures[*next_capture_index];
                if next_capture.node == capture.node {
                    let following_match = layer.captures.next().unwrap().0;
                    // If the current node was found to be a local variable, then ignore
                    // the following match if it's a highlighting pattern that is disabled
                    // for local variables.
                    if (definition_highlight.is_some() || reference_highlight.is_some())
                        && layer.config.non_local_variable_patterns[following_match.pattern_index]
                    {
                        continue;
                    }
                    match_.remove();
                    capture = next_capture;
                    match_ = following_match;
                } else {
                    break;
                }
            }

            let current_highlight = layer.config.highlight_indices[capture.index as usize];

            // If this node represents a local definition, then store the current
            // highlight value on the local scope entry representing this node.
            if let Some(definition_highlight) = definition_highlight {
                *definition_highlight = current_highlight;
            }

            // Emit a scope start event and push the node's end position to the stack.
            if let Some(highlight) = reference_highlight.or(current_highlight) {
                self.last_highlight_range = Some((range.start, range.end, layer.depth));
                layer.highlight_end_stack.push(range.end);
                return self
                    .emit_event(range.start, Some(HighlightEvent::HighlightStart(highlight)));
            }

            self.sort_layers();
        }
    }
}

impl Default for HtmlRenderer {
    fn default() -> Self {
        Self::new()
    }
}

impl HtmlRenderer {
    #[must_use]
    pub fn new() -> Self {
        let mut result = Self {
            html: Vec::with_capacity(BUFFER_HTML_RESERVE_CAPACITY),
            line_offsets: Vec::with_capacity(BUFFER_LINES_RESERVE_CAPACITY),
            carriage_return_highlight: None,
            last_carriage_return: None,
        };
        result.line_offsets.push(0);
        result
    }

    pub fn set_carriage_return_highlight(&mut self, highlight: Option<Highlight>) {
        self.carriage_return_highlight = highlight;
    }

    pub fn reset(&mut self) {
        shrink_and_clear(&mut self.html, BUFFER_HTML_RESERVE_CAPACITY);
        shrink_and_clear(&mut self.line_offsets, BUFFER_LINES_RESERVE_CAPACITY);
        self.line_offsets.push(0);
    }

    pub fn render<F>(
        &mut self,
        highlighter: impl Iterator<Item = Result<HighlightEvent, Error>>,
        source: &[u8],
        attribute_callback: &F,
    ) -> Result<(), Error>
    where
        F: Fn(Highlight, &mut Vec<u8>),
    {
        let mut highlights = Vec::new();
        for event in highlighter {
            match event {
                Ok(HighlightEvent::HighlightStart(s)) => {
                    highlights.push(s);
                    self.start_highlight(s, &attribute_callback);
                }
                Ok(HighlightEvent::HighlightEnd) => {
                    highlights.pop();
                    self.end_highlight();
                }
                Ok(HighlightEvent::Source { start, end }) => {
                    self.add_text(&source[start..end], &highlights, &attribute_callback);
                }
                Err(a) => return Err(a),
            }
        }
        if let Some(offset) = self.last_carriage_return.take() {
            self.add_carriage_return(offset, attribute_callback);
        }
        if self.html.last() != Some(&b'\n') {
            self.html.push(b'\n');
        }
        if self.line_offsets.last() == Some(&(self.html.len() as u32)) {
            self.line_offsets.pop();
        }
        Ok(())
    }

    pub fn lines(&self) -> impl Iterator<Item = &str> {
        self.line_offsets
            .iter()
            .enumerate()
            .map(move |(i, line_start)| {
                let line_start = *line_start as usize;
                let line_end = if i + 1 == self.line_offsets.len() {
                    self.html.len()
                } else {
                    self.line_offsets[i + 1] as usize
                };
                str::from_utf8(&self.html[line_start..line_end]).unwrap()
            })
    }

    fn add_carriage_return<F>(&mut self, offset: usize, attribute_callback: &F)
    where
        F: Fn(Highlight, &mut Vec<u8>),
    {
        if let Some(highlight) = self.carriage_return_highlight {
            // If a CR is the last character in a `HighlightEvent::Source`
            // region, then we don't know until the next `Source` event or EOF
            // whether it is part of CRLF or on its own. To avoid unbounded
            // lookahead, save the offset of the CR and insert there now that we
            // know.
            let rest = self.html.split_off(offset);
            self.html.extend(b"<span ");
            (attribute_callback)(highlight, &mut self.html);
            self.html.extend(b"></span>");
            self.html.extend(rest);
        }
    }

    fn start_highlight<F>(&mut self, h: Highlight, attribute_callback: &F)
    where
        F: Fn(Highlight, &mut Vec<u8>),
    {
        self.html.extend(b"<span ");
        (attribute_callback)(h, &mut self.html);
        self.html.extend(b">");
    }

    fn end_highlight(&mut self) {
        self.html.extend(b"</span>");
    }

    fn add_text<F>(&mut self, src: &[u8], highlights: &[Highlight], attribute_callback: &F)
    where
        F: Fn(Highlight, &mut Vec<u8>),
    {
        pub const fn html_escape(c: u8) -> Option<&'static [u8]> {
            match c as char {
                '>' => Some(b"&gt;"),
                '<' => Some(b"&lt;"),
                '&' => Some(b"&amp;"),
                '\'' => Some(b"&#39;"),
                '"' => Some(b"&quot;"),
                _ => None,
            }
        }

        for c in LossyUtf8::new(src).flat_map(|p| p.bytes()) {
            // Don't render carriage return characters, but allow lone carriage returns (not
            // followed by line feeds) to be styled via the attribute callback.
            if c == b'\r' {
                self.last_carriage_return = Some(self.html.len());
                continue;
            }
            if let Some(offset) = self.last_carriage_return.take() {
                if c != b'\n' {
                    self.add_carriage_return(offset, attribute_callback);
                }
            }

            // At line boundaries, close and re-open all of the open tags.
            if c == b'\n' {
                highlights.iter().for_each(|_| self.end_highlight());
                self.html.push(c);
                self.line_offsets.push(self.html.len() as u32);
                highlights
                    .iter()
                    .for_each(|scope| self.start_highlight(*scope, attribute_callback));
            } else if let Some(escape) = html_escape(c) {
                self.html.extend_from_slice(escape);
            } else {
                self.html.push(c);
            }
        }
    }
}

fn injection_for_match<'a>(
    config: &'a HighlightConfiguration,
    parent_name: Option<&'a str>,
    query: &'a Query,
    query_match: &QueryMatch<'a, 'a>,
    source: &'a [u8],
) -> (Option<&'a str>, Option<Node<'a>>, bool) {
    let content_capture_index = config.injection_content_capture_index;
    let language_capture_index = config.injection_language_capture_index;

    let mut language_name = None;
    let mut content_node = None;

    for capture in query_match.captures {
        let index = Some(capture.index);
        if index == language_capture_index {
            language_name = capture.node.utf8_text(source).ok();
        } else if index == content_capture_index {
            content_node = Some(capture.node);
        }
    }

    let mut include_children = false;
    for prop in query.property_settings(query_match.pattern_index) {
        match prop.key.as_ref() {
            // In addition to specifying the language name via the text of a
            // captured node, it can also be hard-coded via a `#set!` predicate
            // that sets the injection.language key.
            "injection.language" => {
                if language_name.is_none() {
                    language_name = prop.value.as_ref().map(std::convert::AsRef::as_ref);
                }
            }

            // Setting the `injection.self` key can be used to specify that the
            // language name should be the same as the language of the current
            // layer.
            "injection.self" => {
                if language_name.is_none() {
                    language_name = Some(config.language_name.as_str());
                }
            }

            // Setting the `injection.parent` key can be used to specify that
            // the language name should be the same as the language of the
            // parent layer
            "injection.parent" => {
                if language_name.is_none() {
                    language_name = parent_name;
                }
            }

            // By default, injections do not include the *children* of an
            // `injection.content` node - only the ranges that belong to the
            // node itself. This can be changed using a `#set!` predicate that
            // sets the `injection.include-children` key.
            "injection.include-children" => include_children = true,
            _ => {}
        }
    }

    (language_name, content_node, include_children)
}

fn shrink_and_clear<T>(vec: &mut Vec<T>, capacity: usize) {
    if vec.len() > capacity {
        vec.truncate(capacity);
        vec.shrink_to_fit();
    }
    vec.clear();
}



================================================
FILE: crates/language/README.md
================================================
# Tree-sitter Language

This crate provides a `LanguageFn` type for grammars to create `Language` instances from a parser,
without having to worry about the `tree-sitter` crate version not matching.



================================================
FILE: crates/language/Cargo.toml
================================================
[package]
name = "tree-sitter-language"
description = "The tree-sitter Language type, used by the library and by language implementations"
version = "0.1.4"
authors.workspace = true
edition.workspace = true
rust-version = "1.76"
readme = "README.md"
homepage.workspace = true
repository.workspace = true
documentation = "https://docs.rs/tree-sitter-language"
license.workspace = true
keywords.workspace = true
categories = ["api-bindings", "development-tools::ffi", "parsing"]

[lints]
workspace = true

[lib]
path = "src/language.rs"



================================================
FILE: crates/language/src/language.rs
================================================
#![no_std]
/// `LanguageFn` wraps a C function that returns a pointer to a tree-sitter grammar.
#[repr(transparent)]
#[derive(Clone, Copy)]
pub struct LanguageFn(unsafe extern "C" fn() -> *const ());

impl LanguageFn {
    /// Creates a [`LanguageFn`].
    ///
    /// # Safety
    ///
    /// Only call this with language functions generated from grammars
    /// by the Tree-sitter CLI.
    pub const unsafe fn from_raw(f: unsafe extern "C" fn() -> *const ()) -> Self {
        Self(f)
    }

    /// Gets the function wrapped by this [`LanguageFn`].
    #[must_use]
    pub const fn into_raw(self) -> unsafe extern "C" fn() -> *const () {
        self.0
    }
}



================================================
FILE: crates/loader/README.md
================================================
# Tree-sitter Loader

The `tree-sitter` command-line program will dynamically find and build grammars
at runtime, if you have cloned the grammars' repositories to your local
filesystem. This helper crate implements that logic, so that you can use it in
your own program analysis tools, as well.



================================================
FILE: crates/loader/build.rs
================================================
fn main() {
    println!(
        "cargo:rustc-env=BUILD_TARGET={}",
        std::env::var("TARGET").unwrap()
    );
    println!(
        "cargo:rustc-env=BUILD_HOST={}",
        std::env::var("HOST").unwrap()
    );
}



================================================
FILE: crates/loader/Cargo.toml
================================================
[package]
name = "tree-sitter-loader"
version.workspace = true
description = "Locates, builds, and loads tree-sitter grammars at runtime"
authors.workspace = true
edition.workspace = true
rust-version.workspace = true
readme = "README.md"
homepage.workspace = true
repository.workspace = true
documentation = "https://docs.rs/tree-sitter-loader"
license.workspace = true
keywords.workspace = true
categories.workspace = true

[package.metadata.docs.rs]
all-features = true
rustdoc-args = ["--cfg", "docsrs"]

[lib]
path = "src/loader.rs"

[lints]
workspace = true

[features]
wasm = ["tree-sitter/wasm"]
default = ["tree-sitter-highlight", "tree-sitter-tags"]

[dependencies]
anyhow.workspace = true
cc.workspace = true
etcetera.workspace = true
flate2.workspace = true
fs4.workspace = true
indoc.workspace = true
libloading.workspace = true
once_cell.workspace = true
path-slash.workspace = true
regex.workspace = true
semver.workspace = true
serde.workspace = true
serde_json.workspace = true
tar.workspace = true
tempfile.workspace = true
url.workspace = true
ureq = "3.0.12"

tree-sitter = { workspace = true }
tree-sitter-highlight = { workspace = true, optional = true }
tree-sitter-tags = { workspace = true, optional = true }



================================================
FILE: crates/loader/emscripten-version
================================================
4.0.5



================================================
FILE: crates/tags/README.md
================================================
# Tree-sitter Tags

[![crates.io badge]][crates.io]

[crates.io]: https://crates.io/crates/tree-sitter-tags
[crates.io badge]: https://img.shields.io/crates/v/tree-sitter-tags.svg?color=%23B48723

### Usage

Add this crate, and the language-specific crates for whichever languages you want to parse, to your `Cargo.toml`:

```toml
[dependencies]
tree-sitter-tags = "0.19"
tree-sitter-javascript = "0.19"
tree-sitter-python = "0.19"
```

Create a tag context. You need one of these for each thread that you're using for tag computation:

```rust
use tree_sitter_tags::TagsContext;

let context = TagsContext::new();
```

Load some tagging queries from the `queries` directory of some language repositories:

```rust
use tree_sitter_tags::TagsConfiguration;

let python_config = TagsConfiguration::new(
    tree_sitter_python::language(),
    tree_sitter_python::TAGGING_QUERY,
    "",
).unwrap();

let javascript_config = TagsConfiguration::new(
    tree_sitter_javascript::language(),
    tree_sitter_javascript::TAGGING_QUERY,
    tree_sitter_javascript::LOCALS_QUERY,
).unwrap();
```

Compute code navigation tags for some source code:

```rust
let tags = context.generate_tags(
    &javascript_config,
    b"class A { getB() { return c(); } }",
    None,
);

for tag in tags {
    println!("kind: {:?}", tag.kind);
    println!("range: {:?}", tag.range);
    println!("name_range: {:?}", tag.name_range);
    println!("docs: {:?}", tag.docs);
}
```



================================================
FILE: crates/tags/Cargo.toml
================================================
[package]
name = "tree-sitter-tags"
version.workspace = true
description = "Library for extracting tag information"
authors = [
  "Max Brunsfeld <maxbrunsfeld@gmail.com>",
  "Patrick Thomson <patrickt@github.com>",
]
edition.workspace = true
rust-version.workspace = true
readme = "README.md"
homepage.workspace = true
repository.workspace = true
documentation = "https://docs.rs/tree-sitter-tags"
license.workspace = true
keywords = ["incremental", "parsing", "syntax", "tagging"]
categories = ["parsing", "text-editors"]

[lints]
workspace = true

[lib]
path = "src/tags.rs"
crate-type = ["lib", "staticlib"]

[dependencies]
memchr.workspace = true
regex.workspace = true
streaming-iterator.workspace = true
thiserror.workspace = true

tree-sitter.workspace = true



================================================
FILE: crates/tags/include/tree_sitter/tags.h
================================================
#ifndef TREE_SITTER_TAGS_H_
#define TREE_SITTER_TAGS_H_

#ifdef __cplusplus
extern "C" {
#endif

#include <stdint.h>
#include "tree_sitter/api.h"

typedef enum {
  TSTagsOk,
  TSTagsUnknownScope,
  TSTagsTimeout,
  TSTagsInvalidLanguage,
  TSTagsInvalidUtf8,
  TSTagsInvalidRegex,
  TSTagsInvalidQuery,
  TSTagsInvalidCapture,
} TSTagsError;

typedef struct {
  uint32_t start_byte;
  uint32_t end_byte;
  uint32_t name_start_byte;
  uint32_t name_end_byte;
  uint32_t line_start_byte;
  uint32_t line_end_byte;
  TSPoint start_point;
  TSPoint end_point;
  uint32_t utf16_start_column;
  uint32_t utf16_end_column;
  uint32_t docs_start_byte;
  uint32_t docs_end_byte;
  uint32_t syntax_type_id;
  bool is_definition;
} TSTag;

typedef struct TSTagger TSTagger;
typedef struct TSTagsBuffer TSTagsBuffer;

// Construct a tagger.
TSTagger *ts_tagger_new();

// Delete a tagger.
void ts_tagger_delete(TSTagger *);

// Add a `TSLanguage` to a tagger. The language is associated with a scope name,
// which can be used later to select a language for tagging. Along with the language,
// you must provide two tree query strings, one for matching tags themselves, and one
// specifying local variable definitions.
TSTagsError ts_tagger_add_language(
  TSTagger *self,
  const char *scope_name,
  const TSLanguage *language,
  const char *tags_query,
  const char *locals_query,
  uint32_t tags_query_len,
  uint32_t locals_query_len
);

// Compute syntax highlighting for a given document. You must first
// create a `TSTagsBuffer` to hold the output.
TSTagsError ts_tagger_tag(
  const TSTagger *self,
  const char *scope_name,
  const char *source_code,
  uint32_t source_code_len,
  TSTagsBuffer *output,
  const size_t *cancellation_flag
);

// A tags buffer stores the results produced by a tagging call. It can be reused
// for multiple calls.
TSTagsBuffer *ts_tags_buffer_new();

// Delete a tags buffer.
void ts_tags_buffer_delete(TSTagsBuffer *);

// Access the tags within a tag buffer.
const TSTag *ts_tags_buffer_tags(const TSTagsBuffer *);
uint32_t ts_tags_buffer_tags_len(const TSTagsBuffer *);

// Access the string containing all of the docs
const char *ts_tags_buffer_docs(const TSTagsBuffer *);
uint32_t ts_tags_buffer_docs_len(const TSTagsBuffer *);

// Get the syntax kinds for a scope.
const char **ts_tagger_syntax_kinds_for_scope_name(const TSTagger *, const char *scope_name, uint32_t *len);

// Determine whether a parse error was encountered while tagging.
bool ts_tags_buffer_found_parse_error(const TSTagsBuffer*);

#ifdef __cplusplus
}
#endif

#endif  // TREE_SITTER_TAGS_H_



================================================
FILE: crates/tags/src/c_lib.rs
================================================
use std::{
    collections::HashMap, ffi::CStr, fmt, os::raw::c_char, process::abort, slice, str,
    sync::atomic::AtomicUsize,
};

use tree_sitter::Language;

use super::{Error, TagsConfiguration, TagsContext};

const BUFFER_TAGS_RESERVE_CAPACITY: usize = 100;
const BUFFER_DOCS_RESERVE_CAPACITY: usize = 1024;

#[repr(C)]
#[derive(Debug, PartialEq, Eq)]
pub enum TSTagsError {
    Ok,
    UnknownScope,
    Timeout,
    InvalidLanguage,
    InvalidUtf8,
    InvalidRegex,
    InvalidQuery,
    InvalidCapture,
    Unknown,
}

#[repr(C)]
pub struct TSPoint {
    row: u32,
    column: u32,
}

#[repr(C)]
pub struct TSTag {
    pub start_byte: u32,
    pub end_byte: u32,
    pub name_start_byte: u32,
    pub name_end_byte: u32,
    pub line_start_byte: u32,
    pub line_end_byte: u32,
    pub start_point: TSPoint,
    pub end_point: TSPoint,
    pub utf16_start_column: u32,
    pub utf16_end_column: u32,
    pub docs_start_byte: u32,
    pub docs_end_byte: u32,
    pub syntax_type_id: u32,
    pub is_definition: bool,
}

pub struct TSTagger {
    languages: HashMap<String, TagsConfiguration>,
}

pub struct TSTagsBuffer {
    context: TagsContext,
    tags: Vec<TSTag>,
    docs: Vec<u8>,
    errors_present: bool,
}

#[no_mangle]
pub extern "C" fn ts_tagger_new() -> *mut TSTagger {
    Box::into_raw(Box::new(TSTagger {
        languages: HashMap::new(),
    }))
}

/// Delete a [`TSTagger`].
///
/// # Safety
///
/// `this` must be non-null and a valid pointer to a [`TSTagger`] instance.
#[no_mangle]
pub unsafe extern "C" fn ts_tagger_delete(this: *mut TSTagger) {
    drop(Box::from_raw(this));
}

/// Add a language to a [`TSTagger`].
///
/// Returns a [`TSTagsError`] indicating whether the operation was successful or not.
///
/// # Safety
///
/// `this` must be non-null and a valid pointer to a [`TSTagger`] instance.
/// `scope_name` must be non-null and a valid pointer to a null-terminated string.
/// `tags_query` and `locals_query` must be non-null and valid pointers to strings.
///
/// The caller must ensure that the lengths of `tags_query` and `locals_query` are correct.
#[no_mangle]
pub unsafe extern "C" fn ts_tagger_add_language(
    this: *mut TSTagger,
    scope_name: *const c_char,
    language: Language,
    tags_query: *const u8,
    locals_query: *const u8,
    tags_query_len: u32,
    locals_query_len: u32,
) -> TSTagsError {
    let tagger = unwrap_mut_ptr(this);
    let scope_name = unwrap(CStr::from_ptr(scope_name).to_str());
    let tags_query = slice::from_raw_parts(tags_query, tags_query_len as usize);
    let locals_query = if !locals_query.is_null() {
        slice::from_raw_parts(locals_query, locals_query_len as usize)
    } else {
        &[]
    };
    let Ok(tags_query) = str::from_utf8(tags_query) else {
        return TSTagsError::InvalidUtf8;
    };
    let Ok(locals_query) = str::from_utf8(locals_query) else {
        return TSTagsError::InvalidUtf8;
    };

    match TagsConfiguration::new(language, tags_query, locals_query) {
        Ok(c) => {
            tagger.languages.insert(scope_name.to_string(), c);
            TSTagsError::Ok
        }
        Err(Error::Query(_)) => TSTagsError::InvalidQuery,
        Err(Error::Regex(_)) => TSTagsError::InvalidRegex,
        Err(Error::Cancelled) => TSTagsError::Timeout,
        Err(Error::InvalidLanguage) => TSTagsError::InvalidLanguage,
        Err(Error::InvalidCapture(_)) => TSTagsError::InvalidCapture,
    }
}

/// Tags some source code.
///
/// Returns a [`TSTagsError`] indicating whether the operation was successful or not.
///
/// # Safety
///
/// `this` must be a non-null valid pointer to a [`TSTagger`] instance.
/// `scope_name` must be a non-null valid pointer to a null-terminated string.
/// `source_code` must be a non-null valid pointer to a slice of bytes.
/// `output` must be a non-null valid pointer to a [`TSTagsBuffer`] instance.
/// `cancellation_flag` must be a non-null valid pointer to an [`AtomicUsize`] instance.
#[no_mangle]
pub unsafe extern "C" fn ts_tagger_tag(
    this: *mut TSTagger,
    scope_name: *const c_char,
    source_code: *const u8,
    source_code_len: u32,
    output: *mut TSTagsBuffer,
    cancellation_flag: *const AtomicUsize,
) -> TSTagsError {
    let tagger = unwrap_mut_ptr(this);
    let buffer = unwrap_mut_ptr(output);
    let scope_name = unwrap(CStr::from_ptr(scope_name).to_str());

    if let Some(config) = tagger.languages.get(scope_name) {
        shrink_and_clear(&mut buffer.tags, BUFFER_TAGS_RESERVE_CAPACITY);
        shrink_and_clear(&mut buffer.docs, BUFFER_DOCS_RESERVE_CAPACITY);

        let source_code = slice::from_raw_parts(source_code, source_code_len as usize);
        let cancellation_flag = cancellation_flag.as_ref();

        let tags = match buffer
            .context
            .generate_tags(config, source_code, cancellation_flag)
        {
            Ok((tags, found_error)) => {
                buffer.errors_present = found_error;
                tags
            }
            Err(e) => {
                return match e {
                    Error::InvalidLanguage => TSTagsError::InvalidLanguage,
                    _ => TSTagsError::Timeout,
                }
            }
        };

        for tag in tags {
            let Ok(tag) = tag else {
                buffer.tags.clear();
                buffer.docs.clear();
                return TSTagsError::Timeout;
            };

            let prev_docs_len = buffer.docs.len();
            if let Some(docs) = tag.docs {
                buffer.docs.extend_from_slice(docs.as_bytes());
            }
            buffer.tags.push(TSTag {
                start_byte: tag.range.start as u32,
                end_byte: tag.range.end as u32,
                name_start_byte: tag.name_range.start as u32,
                name_end_byte: tag.name_range.end as u32,
                line_start_byte: tag.line_range.start as u32,
                line_end_byte: tag.line_range.end as u32,
                start_point: TSPoint {
                    row: tag.span.start.row as u32,
                    column: tag.span.start.column as u32,
                },
                end_point: TSPoint {
                    row: tag.span.end.row as u32,
                    column: tag.span.end.column as u32,
                },
                utf16_start_column: tag.utf16_column_range.start as u32,
                utf16_end_column: tag.utf16_column_range.end as u32,
                docs_start_byte: prev_docs_len as u32,
                docs_end_byte: buffer.docs.len() as u32,
                syntax_type_id: tag.syntax_type_id,
                is_definition: tag.is_definition,
            });
        }

        TSTagsError::Ok
    } else {
        TSTagsError::UnknownScope
    }
}

#[no_mangle]
pub extern "C" fn ts_tags_buffer_new() -> *mut TSTagsBuffer {
    Box::into_raw(Box::new(TSTagsBuffer {
        context: TagsContext::new(),
        tags: Vec::with_capacity(BUFFER_TAGS_RESERVE_CAPACITY),
        docs: Vec::with_capacity(BUFFER_DOCS_RESERVE_CAPACITY),
        errors_present: false,
    }))
}

/// Delete a [`TSTagsBuffer`].
///
/// # Safety
///
/// `this` must be non-null and a valid pointer to a [`TSTagsBuffer`] instance created by
/// [`ts_tags_buffer_new`].
#[no_mangle]
pub unsafe extern "C" fn ts_tags_buffer_delete(this: *mut TSTagsBuffer) {
    drop(Box::from_raw(this));
}

/// Get the tags from a [`TSTagsBuffer`].
///
/// # Safety
///
/// `this` must be non-null and a valid pointer to a [`TSTagsBuffer`] instance created by
/// [`ts_tags_buffer_new`].
///
/// The caller must ensure that the returned pointer is not used after the [`TSTagsBuffer`]
/// is deleted with [`ts_tags_buffer_delete`], else the data will point to garbage.
#[no_mangle]
pub unsafe extern "C" fn ts_tags_buffer_tags(this: *const TSTagsBuffer) -> *const TSTag {
    unwrap_ptr(this).tags.as_ptr()
}

/// Get the number of tags in a [`TSTagsBuffer`].
///
/// # Safety
///
/// `this` must be non-null and a valid pointer to a [`TSTagsBuffer`] instance.
#[no_mangle]
pub unsafe extern "C" fn ts_tags_buffer_tags_len(this: *const TSTagsBuffer) -> u32 {
    unwrap_ptr(this).tags.len() as u32
}

/// Get the documentation strings from a [`TSTagsBuffer`].
///
/// # Safety
///
/// `this` must be non-null and a valid pointer to a [`TSTagsBuffer`] instance created by
/// [`ts_tags_buffer_new`].
///
/// The caller must ensure that the returned pointer is not used after the [`TSTagsBuffer`]
/// is deleted with [`ts_tags_buffer_delete`], else the data will point to garbage.
///
/// The returned pointer points to a C-style string.
/// To get the length of the string, use [`ts_tags_buffer_docs_len`].
#[no_mangle]
pub unsafe extern "C" fn ts_tags_buffer_docs(this: *const TSTagsBuffer) -> *const c_char {
    unwrap_ptr(this).docs.as_ptr().cast::<c_char>()
}

/// Get the length of the documentation strings in a [`TSTagsBuffer`].
///
/// # Safety
///
/// `this` must be non-null and a valid pointer to a [`TSTagsBuffer`] instance created by
/// [`ts_tags_buffer_new`].
#[no_mangle]
pub unsafe extern "C" fn ts_tags_buffer_docs_len(this: *const TSTagsBuffer) -> u32 {
    unwrap_ptr(this).docs.len() as u32
}

/// Get whether or not a [`TSTagsBuffer`] contains any parse errors.
///
/// # Safety
///
/// `this` must be non-null and a valid pointer to a [`TSTagsBuffer`] instance created by
/// [`ts_tags_buffer_new`].
#[no_mangle]
pub unsafe extern "C" fn ts_tags_buffer_found_parse_error(this: *const TSTagsBuffer) -> bool {
    unwrap_ptr(this).errors_present
}

/// Get the syntax kinds for a given scope name.
///
/// Returns a pointer to a null-terminated array of null-terminated strings.
///
/// # Safety
///
/// `this` must be non-null and a valid pointer to a [`TSTagger`] instance created by
/// [`ts_tagger_new`].
/// `scope_name` must be non-null and a valid pointer to a null-terminated string.
/// `len` must be non-null and a valid pointer to a `u32`.
///
/// The caller must ensure that the returned pointer is not used after the [`TSTagger`]
/// is deleted with [`ts_tagger_delete`], else the data will point to garbage.
///
/// The returned pointer points to a C-style string array.
#[no_mangle]
pub unsafe extern "C" fn ts_tagger_syntax_kinds_for_scope_name(
    this: *mut TSTagger,
    scope_name: *const c_char,
    len: *mut u32,
) -> *const *const c_char {
    let tagger = unwrap_mut_ptr(this);
    let scope_name = unwrap(CStr::from_ptr(scope_name).to_str());
    let len = unwrap_mut_ptr(len);

    *len = 0;
    if let Some(config) = tagger.languages.get(scope_name) {
        *len = config.c_syntax_type_names.len() as u32;
        return config.c_syntax_type_names.as_ptr().cast::<*const c_char>();
    }
    std::ptr::null()
}

unsafe fn unwrap_ptr<'a, T>(result: *const T) -> &'a T {
    result.as_ref().unwrap_or_else(|| {
        eprintln!("{}:{} - pointer must not be null", file!(), line!());
        abort();
    })
}

unsafe fn unwrap_mut_ptr<'a, T>(result: *mut T) -> &'a mut T {
    result.as_mut().unwrap_or_else(|| {
        eprintln!("{}:{} - pointer must not be null", file!(), line!());
        abort();
    })
}

fn unwrap<T, E: fmt::Display>(result: Result<T, E>) -> T {
    result.unwrap_or_else(|error| {
        eprintln!("tree-sitter tag error: {error}");
        abort();
    })
}

fn shrink_and_clear<T>(vec: &mut Vec<T>, capacity: usize) {
    if vec.len() > capacity {
        vec.truncate(capacity);
        vec.shrink_to_fit();
    }
    vec.clear();
}



================================================
FILE: crates/tags/src/tags.rs
================================================
#![doc = include_str!("../README.md")]

pub mod c_lib;

use std::{
    char,
    collections::HashMap,
    ffi::{CStr, CString},
    mem,
    ops::Range,
    os::raw::c_char,
    str,
    sync::atomic::{AtomicUsize, Ordering},
};

use memchr::memchr;
use regex::Regex;
use streaming_iterator::StreamingIterator;
use thiserror::Error;
use tree_sitter::{
    Language, LossyUtf8, ParseOptions, Parser, Point, Query, QueryCursor, QueryError,
    QueryPredicateArg, Tree,
};

const MAX_LINE_LEN: usize = 180;
const CANCELLATION_CHECK_INTERVAL: usize = 100;

/// Contains the data needed to compute tags for code written in a
/// particular language.
#[derive(Debug)]
pub struct TagsConfiguration {
    pub language: Language,
    pub query: Query,
    syntax_type_names: Vec<Box<[u8]>>,
    c_syntax_type_names: Vec<*const u8>,
    capture_map: HashMap<u32, NamedCapture>,
    doc_capture_index: Option<u32>,
    name_capture_index: Option<u32>,
    ignore_capture_index: Option<u32>,
    local_scope_capture_index: Option<u32>,
    local_definition_capture_index: Option<u32>,
    tags_pattern_index: usize,
    pattern_info: Vec<PatternInfo>,
}

unsafe impl Send for TagsConfiguration {}
unsafe impl Sync for TagsConfiguration {}

#[derive(Debug)]
pub struct NamedCapture {
    pub syntax_type_id: u32,
    pub is_definition: bool,
}

pub struct TagsContext {
    pub parser: Parser,
    cursor: QueryCursor,
}

#[derive(Debug, Clone)]
pub struct Tag {
    pub range: Range<usize>,
    pub name_range: Range<usize>,
    pub line_range: Range<usize>,
    pub span: Range<Point>,
    pub utf16_column_range: Range<usize>,
    pub docs: Option<String>,
    pub is_definition: bool,
    pub syntax_type_id: u32,
}

#[derive(Debug, Error, PartialEq)]
pub enum Error {
    #[error(transparent)]
    Query(#[from] QueryError),
    #[error(transparent)]
    Regex(#[from] regex::Error),
    #[error("Cancelled")]
    Cancelled,
    #[error("Invalid language")]
    InvalidLanguage,
    #[error("Invalid capture @{0}. Expected one of: @definition.*, @reference.*, @doc, @name, @local.(scope|definition|reference).")]
    InvalidCapture(String),
}

#[derive(Debug, Default)]
struct PatternInfo {
    docs_adjacent_capture: Option<u32>,
    local_scope_inherits: bool,
    name_must_be_non_local: bool,
    doc_strip_regex: Option<Regex>,
}

#[derive(Debug)]
struct LocalDef<'a> {
    name: &'a [u8],
}

#[derive(Debug)]
struct LocalScope<'a> {
    inherits: bool,
    range: Range<usize>,
    local_defs: Vec<LocalDef<'a>>,
}

struct TagsIter<'a, I>
where
    I: StreamingIterator<Item = tree_sitter::QueryMatch<'a, 'a>>,
{
    matches: I,
    _tree: Tree,
    source: &'a [u8],
    prev_line_info: Option<LineInfo>,
    config: &'a TagsConfiguration,
    cancellation_flag: Option<&'a AtomicUsize>,
    iter_count: usize,
    tag_queue: Vec<(Tag, usize)>,
    scopes: Vec<LocalScope<'a>>,
}

struct LineInfo {
    utf8_position: Point,
    utf8_byte: usize,
    utf16_column: usize,
    line_range: Range<usize>,
}

impl TagsConfiguration {
    pub fn new(language: Language, tags_query: &str, locals_query: &str) -> Result<Self, Error> {
        let query = Query::new(&language, &format!("{locals_query}{tags_query}"))?;

        let tags_query_offset = locals_query.len();
        let mut tags_pattern_index = 0;
        for i in 0..(query.pattern_count()) {
            let pattern_offset = query.start_byte_for_pattern(i);
            if pattern_offset < tags_query_offset {
                tags_pattern_index += 1;
            }
        }

        let mut capture_map = HashMap::new();
        let mut syntax_type_names = Vec::new();
        let mut doc_capture_index = None;
        let mut name_capture_index = None;
        let mut ignore_capture_index = None;
        let mut local_scope_capture_index = None;
        let mut local_definition_capture_index = None;
        for (i, name) in query.capture_names().iter().enumerate() {
            match *name {
                "name" => name_capture_index = Some(i as u32),
                "ignore" => ignore_capture_index = Some(i as u32),
                "doc" => doc_capture_index = Some(i as u32),
                "local.scope" => local_scope_capture_index = Some(i as u32),
                "local.definition" => local_definition_capture_index = Some(i as u32),
                "local.reference" | "" => {}
                _ => {
                    let mut is_definition = false;

                    let kind = if name.starts_with("definition.") {
                        is_definition = true;
                        name.trim_start_matches("definition.")
                    } else if name.starts_with("reference.") {
                        name.trim_start_matches("reference.")
                    } else {
                        return Err(Error::InvalidCapture((*name).to_string()));
                    };

                    if let Ok(cstr) = CString::new(kind) {
                        let c_kind = cstr.to_bytes_with_nul().to_vec().into_boxed_slice();
                        let syntax_type_id = syntax_type_names
                            .iter()
                            .position(|n| n == &c_kind)
                            .unwrap_or_else(|| {
                                syntax_type_names.push(c_kind);
                                syntax_type_names.len() - 1
                            }) as u32;
                        capture_map.insert(
                            i as u32,
                            NamedCapture {
                                syntax_type_id,
                                is_definition,
                            },
                        );
                    }
                }
            }
        }

        let c_syntax_type_names = syntax_type_names.iter().map(|s| s.as_ptr()).collect();

        let pattern_info = (0..query.pattern_count())
            .map(|pattern_index| {
                let mut info = PatternInfo::default();
                for (property, is_positive) in query.property_predicates(pattern_index) {
                    if !is_positive && property.key.as_ref() == "local" {
                        info.name_must_be_non_local = true;
                    }
                }
                info.local_scope_inherits = true;
                for property in query.property_settings(pattern_index) {
                    if property.key.as_ref() == "local.scope-inherits"
                        && property
                            .value
                            .as_ref()
                            .is_some_and(|v| v.as_ref() == "false")
                    {
                        info.local_scope_inherits = false;
                    }
                }
                if let Some(doc_capture_index) = doc_capture_index {
                    for predicate in query.general_predicates(pattern_index) {
                        if predicate.args.first()
                            == Some(&QueryPredicateArg::Capture(doc_capture_index))
                        {
                            match (predicate.operator.as_ref(), predicate.args.get(1)) {
                                ("select-adjacent!", Some(QueryPredicateArg::Capture(index))) => {
                                    info.docs_adjacent_capture = Some(*index);
                                }
                                ("strip!", Some(QueryPredicateArg::String(pattern))) => {
                                    let regex = Regex::new(pattern.as_ref())?;
                                    info.doc_strip_regex = Some(regex);
                                }
                                _ => {}
                            }
                        }
                    }
                }
                Ok(info)
            })
            .collect::<Result<Vec<_>, Error>>()?;

        Ok(Self {
            language,
            query,
            syntax_type_names,
            c_syntax_type_names,
            capture_map,
            doc_capture_index,
            name_capture_index,
            ignore_capture_index,
            local_scope_capture_index,
            local_definition_capture_index,
            tags_pattern_index,
            pattern_info,
        })
    }

    #[must_use]
    pub fn syntax_type_name(&self, id: u32) -> &str {
        unsafe {
            let cstr = CStr::from_ptr(
                self.syntax_type_names[id as usize]
                    .as_ptr()
                    .cast::<c_char>(),
            )
            .to_bytes();
            str::from_utf8(cstr).expect("syntax type name was not valid utf-8")
        }
    }
}

impl Default for TagsContext {
    fn default() -> Self {
        Self::new()
    }
}

impl TagsContext {
    #[must_use]
    pub fn new() -> Self {
        Self {
            parser: Parser::new(),
            cursor: QueryCursor::new(),
        }
    }

    pub fn parser(&mut self) -> &mut Parser {
        &mut self.parser
    }

    pub fn generate_tags<'a>(
        &'a mut self,
        config: &'a TagsConfiguration,
        source: &'a [u8],
        cancellation_flag: Option<&'a AtomicUsize>,
    ) -> Result<(impl Iterator<Item = Result<Tag, Error>> + 'a, bool), Error> {
        self.parser
            .set_language(&config.language)
            .map_err(|_| Error::InvalidLanguage)?;
        self.parser.reset();
        let tree = self
            .parser
            .parse_with_options(
                &mut |i, _| {
                    if i < source.len() {
                        &source[i..]
                    } else {
                        &[]
                    }
                },
                None,
                Some(ParseOptions::new().progress_callback(&mut |_| {
                    if let Some(cancellation_flag) = cancellation_flag {
                        cancellation_flag.load(Ordering::SeqCst) != 0
                    } else {
                        false
                    }
                })),
            )
            .ok_or(Error::Cancelled)?;

        // The `matches` iterator borrows the `Tree`, which prevents it from being
        // moved. But the tree is really just a pointer, so it's actually ok to
        // move it.
        let tree_ref = unsafe { mem::transmute::<&Tree, &'static Tree>(&tree) };
        let matches = self
            .cursor
            .matches(&config.query, tree_ref.root_node(), source);
        Ok((
            TagsIter {
                _tree: tree,
                matches,
                source,
                config,
                cancellation_flag,
                prev_line_info: None,
                tag_queue: Vec::new(),
                iter_count: 0,
                scopes: vec![LocalScope {
                    range: 0..source.len(),
                    inherits: false,
                    local_defs: Vec::new(),
                }],
            },
            tree_ref.root_node().has_error(),
        ))
    }
}

impl<'a, I> Iterator for TagsIter<'a, I>
where
    I: StreamingIterator<Item = tree_sitter::QueryMatch<'a, 'a>>,
{
    type Item = Result<Tag, Error>;

    fn next(&mut self) -> Option<Self::Item> {
        loop {
            // Periodically check for cancellation, returning `Cancelled` error if the
            // cancellation flag was flipped.
            if let Some(cancellation_flag) = self.cancellation_flag {
                self.iter_count += 1;
                if self.iter_count >= CANCELLATION_CHECK_INTERVAL {
                    self.iter_count = 0;
                    if cancellation_flag.load(Ordering::Relaxed) != 0 {
                        return Some(Err(Error::Cancelled));
                    }
                }
            }

            // If there is a queued tag for an earlier node in the syntax tree, then pop
            // it off of the queue and return it.
            if let Some(last_entry) = self.tag_queue.last() {
                if self.tag_queue.len() > 1
                    && self.tag_queue[0].0.name_range.end < last_entry.0.name_range.start
                {
                    let tag = self.tag_queue.remove(0).0;
                    if tag.is_ignored() {
                        continue;
                    }
                    return Some(Ok(tag));
                }
            }

            // If there is another match, then compute its tag and add it to the
            // tag queue.
            if let Some(mat) = self.matches.next() {
                let pattern_info = &self.config.pattern_info[mat.pattern_index];

                if mat.pattern_index < self.config.tags_pattern_index {
                    for capture in mat.captures {
                        let index = Some(capture.index);
                        let range = capture.node.byte_range();
                        if index == self.config.local_scope_capture_index {
                            self.scopes.push(LocalScope {
                                range,
                                inherits: pattern_info.local_scope_inherits,
                                local_defs: Vec::new(),
                            });
                        } else if index == self.config.local_definition_capture_index {
                            if let Some(scope) = self.scopes.iter_mut().rev().find(|scope| {
                                scope.range.start <= range.start && scope.range.end >= range.end
                            }) {
                                scope.local_defs.push(LocalDef {
                                    name: &self.source[range.clone()],
                                });
                            }
                        }
                    }
                    continue;
                }

                let mut name_node = None;
                let mut doc_nodes = Vec::new();
                let mut tag_node = None;
                let mut syntax_type_id = 0;
                let mut is_definition = false;
                let mut docs_adjacent_node = None;
                let mut is_ignored = false;

                for capture in mat.captures {
                    let index = Some(capture.index);

                    if index == self.config.ignore_capture_index {
                        is_ignored = true;
                        name_node = Some(capture.node);
                    }

                    if index == self.config.pattern_info[mat.pattern_index].docs_adjacent_capture {
                        docs_adjacent_node = Some(capture.node);
                    }

                    if index == self.config.name_capture_index {
                        name_node = Some(capture.node);
                    } else if index == self.config.doc_capture_index {
                        doc_nodes.push(capture.node);
                    }

                    if let Some(named_capture) = self.config.capture_map.get(&capture.index) {
                        tag_node = Some(capture.node);
                        syntax_type_id = named_capture.syntax_type_id;
                        is_definition = named_capture.is_definition;
                    }
                }

                if let Some(name_node) = name_node {
                    let name_range = name_node.byte_range();

                    let tag;
                    if let Some(tag_node) = tag_node {
                        if name_node.has_error() {
                            continue;
                        }

                        if pattern_info.name_must_be_non_local {
                            let mut is_local = false;
                            for scope in self.scopes.iter().rev() {
                                if scope.range.start <= name_range.start
                                    && scope.range.end >= name_range.end
                                {
                                    if scope
                                        .local_defs
                                        .iter()
                                        .any(|d| d.name == &self.source[name_range.clone()])
                                    {
                                        is_local = true;
                                        break;
                                    }
                                    if !scope.inherits {
                                        break;
                                    }
                                }
                            }
                            if is_local {
                                continue;
                            }
                        }

                        // If needed, filter the doc nodes based on their ranges, selecting
                        // only the slice that are adjacent to some specified node.
                        let mut docs_start_index = 0;
                        if let (Some(docs_adjacent_node), false) =
                            (docs_adjacent_node, doc_nodes.is_empty())
                        {
                            docs_start_index = doc_nodes.len();
                            let mut start_row = docs_adjacent_node.start_position().row;
                            while docs_start_index > 0 {
                                let doc_node = &doc_nodes[docs_start_index - 1];
                                let prev_doc_end_row = doc_node.end_position().row;
                                if prev_doc_end_row + 1 >= start_row {
                                    docs_start_index -= 1;
                                    start_row = doc_node.start_position().row;
                                } else {
                                    break;
                                }
                            }
                        }

                        // Generate a doc string from all of the doc nodes, applying any strip
                        // regexes.
                        let mut docs = None;
                        for doc_node in &doc_nodes[docs_start_index..] {
                            if let Ok(content) = str::from_utf8(&self.source[doc_node.byte_range()])
                            {
                                let content = pattern_info.doc_strip_regex.as_ref().map_or_else(
                                    || content.to_string(),
                                    |regex| regex.replace_all(content, "").to_string(),
                                );
                                match &mut docs {
                                    None => docs = Some(content),
                                    Some(d) => {
                                        d.push('\n');
                                        d.push_str(&content);
                                    }
                                }
                            }
                        }

                        let rng = tag_node.byte_range();
                        let range = rng.start.min(name_range.start)..rng.end.max(name_range.end);
                        let span = name_node.start_position()..name_node.end_position();

                        // Compute tag properties that depend on the text of the containing line. If
                        // the previous tag occurred on the same line, then
                        // reuse results from the previous tag.
                        let mut prev_utf16_column = 0;
                        let mut prev_utf8_byte = name_range.start - span.start.column;
                        let line_info = self.prev_line_info.as_ref().and_then(|info| {
                            if info.utf8_position.row == span.start.row {
                                Some(info)
                            } else {
                                None
                            }
                        });
                        let line_range = if let Some(line_info) = line_info {
                            if line_info.utf8_position.column <= span.start.column {
                                prev_utf8_byte = line_info.utf8_byte;
                                prev_utf16_column = line_info.utf16_column;
                            }
                            line_info.line_range.clone()
                        } else {
                            self::line_range(
                                self.source,
                                name_range.start,
                                span.start,
                                MAX_LINE_LEN,
                            )
                        };

                        let utf16_start_column = prev_utf16_column
                            + utf16_len(&self.source[prev_utf8_byte..name_range.start]);
                        let utf16_end_column =
                            utf16_start_column + utf16_len(&self.source[name_range.clone()]);
                        let utf16_column_range = utf16_start_column..utf16_end_column;

                        self.prev_line_info = Some(LineInfo {
                            utf8_position: span.end,
                            utf8_byte: name_range.end,
                            utf16_column: utf16_end_column,
                            line_range: line_range.clone(),
                        });
                        tag = Tag {
                            range,
                            name_range,
                            line_range,
                            span,
                            utf16_column_range,
                            docs,
                            is_definition,
                            syntax_type_id,
                        };
                    } else if is_ignored {
                        tag = Tag::ignored(name_range);
                    } else {
                        continue;
                    }

                    // Only create one tag per node. The tag queue is sorted by node position
                    // to allow for fast lookup.
                    match self.tag_queue.binary_search_by_key(
                        &(tag.name_range.end, tag.name_range.start),
                        |(tag, _)| (tag.name_range.end, tag.name_range.start),
                    ) {
                        Ok(i) => {
                            let (existing_tag, pattern_index) = &mut self.tag_queue[i];
                            if *pattern_index > mat.pattern_index {
                                *pattern_index = mat.pattern_index;
                                *existing_tag = tag;
                            }
                        }
                        Err(i) => self.tag_queue.insert(i, (tag, mat.pattern_index)),
                    }
                }
            }
            // If there are no more matches, then drain the queue.
            else if !self.tag_queue.is_empty() {
                return Some(Ok(self.tag_queue.remove(0).0));
            } else {
                return None;
            }
        }
    }
}

impl Tag {
    #[must_use]
    const fn ignored(name_range: Range<usize>) -> Self {
        Self {
            name_range,
            line_range: 0..0,
            span: Point::new(0, 0)..Point::new(0, 0),
            utf16_column_range: 0..0,
            range: usize::MAX..usize::MAX,
            docs: None,
            is_definition: false,
            syntax_type_id: 0,
        }
    }

    #[must_use]
    const fn is_ignored(&self) -> bool {
        self.range.start == usize::MAX
    }
}

fn line_range(
    text: &[u8],
    start_byte: usize,
    start_point: Point,
    max_line_len: usize,
) -> Range<usize> {
    // Trim leading whitespace
    let mut line_start_byte = start_byte - start_point.column;
    while line_start_byte < text.len() && text[line_start_byte].is_ascii_whitespace() {
        line_start_byte += 1;
    }

    let max_line_len = max_line_len.min(text.len() - line_start_byte);
    let text_after_line_start = &text[line_start_byte..(line_start_byte + max_line_len)];
    let line_len = if let Some(len) = memchr(b'\n', text_after_line_start) {
        len
    } else if let Err(e) = str::from_utf8(text_after_line_start) {
        e.valid_up_to()
    } else {
        max_line_len
    };

    // Trim trailing whitespace
    let mut line_end_byte = line_start_byte + line_len;
    while line_end_byte > line_start_byte && text[line_end_byte - 1].is_ascii_whitespace() {
        line_end_byte -= 1;
    }

    line_start_byte..line_end_byte
}

fn utf16_len(bytes: &[u8]) -> usize {
    LossyUtf8::new(bytes)
        .flat_map(|chunk| chunk.chars().map(char::len_utf16))
        .sum()
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_get_line() {
        let text = "abc\ndefg❤hij\nklmno".as_bytes();
        assert_eq!(line_range(text, 5, Point::new(1, 1), 30), 4..14);
        assert_eq!(line_range(text, 5, Point::new(1, 1), 6), 4..8);
        assert_eq!(line_range(text, 17, Point::new(2, 2), 30), 15..20);
        assert_eq!(line_range(text, 17, Point::new(2, 2), 4), 15..19);
    }

    #[test]
    fn test_get_line_trims() {
        let text = b"   foo\nbar\n";
        assert_eq!(line_range(text, 0, Point::new(0, 0), 10), 3..6);

        let text = b"\t func foo \nbar\n";
        assert_eq!(line_range(text, 0, Point::new(0, 0), 10), 2..10);

        let r = line_range(text, 0, Point::new(0, 0), 14);
        assert_eq!(r, 2..10);
        assert_eq!(str::from_utf8(&text[r]).unwrap_or(""), "func foo");

        let r = line_range(text, 12, Point::new(1, 0), 14);
        assert_eq!(r, 12..15);
        assert_eq!(str::from_utf8(&text[r]).unwrap_or(""), "bar");
    }
}



================================================
FILE: crates/xtask/Cargo.toml
================================================
[package]
name = "xtask"
version = "0.1.0"
authors.workspace = true
edition.workspace = true
rust-version.workspace = true
homepage.workspace = true
repository.workspace = true
license.workspace = true
keywords.workspace = true
categories.workspace = true
publish = false

[lints]
workspace = true

[dependencies]
anstyle.workspace = true
anyhow.workspace = true
bindgen = { version = "0.72.0" }
cc.workspace = true
clap.workspace = true
git2.workspace = true
indoc.workspace = true
toml.workspace = true
regex.workspace = true
semver.workspace = true
serde.workspace = true
serde_json.workspace = true
ureq = "3.0.12"
notify = "8.1.0"
notify-debouncer-full = "0.5.0"



================================================
FILE: crates/xtask/src/benchmark.rs
================================================
use anyhow::Result;

use crate::{bail_on_err, Benchmark};

pub fn run(args: &Benchmark) -> Result<()> {
    if let Some(ref example) = args.example_file_name {
        std::env::set_var("TREE_SITTER_BENCHMARK_EXAMPLE_FILTER", example);
    }

    if let Some(ref language) = args.language {
        std::env::set_var("TREE_SITTER_BENCHMARK_LANGUAGE_FILTER", language);
    }

    if args.repetition_count != 5 {
        std::env::set_var(
            "TREE_SITTER_BENCHMARK_REPETITION_COUNT",
            args.repetition_count.to_string(),
        );
    }

    if args.debug {
        let output = std::process::Command::new("cargo")
            .arg("bench")
            .arg("benchmark")
            .arg("-p")
            .arg("tree-sitter-cli")
            .arg("--no-run")
            .arg("--message-format=json")
            .spawn()?
            .wait_with_output()?;

        bail_on_err(&output, "Failed to run `cargo bench`")?;

        let json_output = serde_json::from_slice::<serde_json::Value>(&output.stdout)?;

        let test_binary = json_output
            .as_array()
            .ok_or_else(|| anyhow::anyhow!("Invalid JSON output"))?
            .iter()
            .find_map(|message| {
                if message
                    .get("target")
                    .and_then(|target| target.get("name"))
                    .and_then(|name| name.as_str())
                    .is_some_and(|name| name == "benchmark")
                    && message
                        .get("executable")
                        .and_then(|executable| executable.as_str())
                        .is_some()
                {
                    message
                        .get("executable")
                        .and_then(|executable| executable.as_str())
                } else {
                    None
                }
            })
            .ok_or_else(|| anyhow::anyhow!("Failed to find benchmark executable"))?;

        println!("{test_binary}");
    } else {
        let status = std::process::Command::new("cargo")
            .arg("bench")
            .arg("benchmark")
            .arg("-p")
            .arg("tree-sitter-cli")
            .status()?;

        if !status.success() {
            anyhow::bail!("Failed to run `cargo bench`");
        }
    }

    Ok(())
}



================================================
FILE: crates/xtask/src/build_wasm.rs
================================================
use std::{
    collections::HashSet,
    ffi::{OsStr, OsString},
    fmt::Write,
    fs,
    path::PathBuf,
    process::Command,
    time::Duration,
};

use anyhow::{anyhow, Result};
use notify::{
    event::{AccessKind, AccessMode},
    EventKind, RecursiveMode,
};
use notify_debouncer_full::new_debouncer;

use crate::{bail_on_err, watch_wasm, BuildWasm, EMSCRIPTEN_TAG};

#[derive(PartialEq, Eq)]
enum EmccSource {
    Native,
    Docker,
    Podman,
}

const EXPORTED_RUNTIME_METHODS: [&str; 8] = [
    "AsciiToString",
    "stringToUTF8",
    "UTF8ToString",
    "lengthBytesUTF8",
    "stringToUTF16",
    "loadWebAssemblyModule",
    "getValue",
    "setValue",
];

pub fn run_wasm(args: &BuildWasm) -> Result<()> {
    let mut emscripten_flags = vec!["-O3", "--minify", "0"];

    if args.debug {
        emscripten_flags.extend(["-s", "ASSERTIONS=1", "-s", "SAFE_HEAP=1", "-O0", "-g"]);
    }

    if args.verbose {
        emscripten_flags.extend(["-s", "VERBOSE=1", "-v"]);
    }

    let emcc_name = if cfg!(windows) { "emcc.bat" } else { "emcc" };

    // Order of preference: emscripten > docker > podman > error
    let source = if !args.docker && Command::new(emcc_name).output().is_ok() {
        EmccSource::Native
    } else if Command::new("docker")
        .output()
        .is_ok_and(|out| out.status.success())
    {
        EmccSource::Docker
    } else if Command::new("podman")
        .arg("--version")
        .output()
        .is_ok_and(|out| out.status.success())
    {
        EmccSource::Podman
    } else {
        return Err(anyhow!(
            "You must have either emcc, docker, or podman on your PATH to run this command"
        ));
    };

    let mut command = match source {
        EmccSource::Native => Command::new(emcc_name),
        EmccSource::Docker | EmccSource::Podman => {
            let mut command = match source {
                EmccSource::Docker => Command::new("docker"),
                EmccSource::Podman => Command::new("podman"),
                _ => unreachable!(),
            };
            command.args(["run", "--rm"]);

            // Mount the root directory as a volume, which is the repo root
            let mut volume_string = OsString::from(std::env::current_dir().unwrap());
            volume_string.push(":/src:Z");
            command.args([OsStr::new("--volume"), &volume_string]);

            // In case `docker` is an alias to `podman`, ensure that podman
            // mounts the current directory as writable by the container
            // user which has the same uid as the host user. Setting the
            // podman-specific variable is more reliable than attempting to
            // detect whether `docker` is an alias for `podman`.
            // see https://docs.podman.io/en/latest/markdown/podman-run.1.html#userns-mode
            command.env("PODMAN_USERNS", "keep-id");

            // Get the current user id so that files created in the docker container will have
            // the same owner.
            #[cfg(unix)]
            {
                #[link(name = "c")]
                extern "C" {
                    fn getuid() -> u32;
                }
                // don't need to set user for podman since PODMAN_USERNS=keep-id is already set
                if source == EmccSource::Docker {
                    let user_id = unsafe { getuid() };
                    command.args(["--user", &user_id.to_string()]);
                }
            };

            // Run `emcc` in a container using the `emscripten-slim` image
            command.args([EMSCRIPTEN_TAG, "emcc"]);
            command
        }
    };

    fs::create_dir_all("target/scratch").unwrap();

    let exported_functions = format!(
        "{}{}",
        fs::read_to_string("lib/src/wasm/stdlib-symbols.txt")?,
        fs::read_to_string("lib/binding_web/lib/exports.txt")?
    )
    .replace('"', "")
    .lines()
    .fold(String::new(), |mut output, line| {
        let _ = write!(output, "_{line}");
        output
    })
    .trim_end_matches(',')
    .to_string();

    let exported_functions = format!("EXPORTED_FUNCTIONS={exported_functions}");
    let exported_runtime_methods = format!(
        "EXPORTED_RUNTIME_METHODS={}",
        EXPORTED_RUNTIME_METHODS.join(",")
    );

    // Clean up old files from prior runs
    for file in [
        "web-tree-sitter.mjs",
        "web-tree-sitter.cjs",
        "web-tree-sitter.wasm",
        "web-tree-sitter.wasm.map",
    ] {
        fs::remove_file(PathBuf::from("lib/binding_web/lib").join(file)).ok();
    }

    if !args.cjs {
        emscripten_flags.extend(["-s", "EXPORT_ES6=1"]);
    }

    #[rustfmt::skip]
    emscripten_flags.extend([
        "-gsource-map",
        "--source-map-base", ".",
        "-fno-exceptions",
        "-std=c11",
        "-s", "WASM=1",
        "-s", "MODULARIZE=1",
        "-s", "INITIAL_MEMORY=33554432",
        "-s", "ALLOW_MEMORY_GROWTH=1",
        "-s", "SUPPORT_BIG_ENDIAN=1",
        "-s", "MAIN_MODULE=2",
        "-s", "FILESYSTEM=0",
        "-s", "NODEJS_CATCH_EXIT=0",
        "-s", "NODEJS_CATCH_REJECTION=0",
        "-s", &exported_functions,
        "-s", &exported_runtime_methods,
        "-D", "fprintf(...)=",
        "-D", "NDEBUG=",
        "-D", "_POSIX_C_SOURCE=200112L",
        "-D", "_DEFAULT_SOURCE=",
        "-I", "lib/src",
        "-I", "lib/include",
        "--js-library", "lib/binding_web/lib/imports.js",
        "--pre-js",     "lib/binding_web/lib/prefix.js",
        "-o",           if args.cjs { "lib/binding_web/lib/web-tree-sitter.cjs" } else { "lib/binding_web/lib/web-tree-sitter.mjs" },
        "lib/src/lib.c",
        "lib/binding_web/lib/tree-sitter.c",
    ]);
    if args.emit_tsd {
        emscripten_flags.extend(["--emit-tsd", "web-tree-sitter.d.ts"]);
    }

    let command = command.args(&emscripten_flags);

    if args.watch {
        watch_wasm!(|| build_wasm(command));
    } else {
        build_wasm(command)?;
    }

    Ok(())
}

fn build_wasm(cmd: &mut Command) -> Result<()> {
    bail_on_err(
        &cmd.spawn()?.wait_with_output()?,
        "Failed to compile the Tree-sitter WASM library",
    )?;

    Ok(())
}

pub fn run_wasm_stdlib() -> Result<()> {
    let export_flags = include_str!("../../../lib/src/wasm/stdlib-symbols.txt")
        .lines()
        .map(|line| format!("-Wl,--export={}", &line[1..line.len() - 2]))
        .collect::<Vec<String>>();

    let mut command = Command::new("docker");

    let output = command
        .args([
            "run",
            "--rm",
            "-v",
            format!("{}:/src", std::env::current_dir().unwrap().display()).as_str(),
            "-w",
            "/src",
            "ghcr.io/webassembly/wasi-sdk",
            "/opt/wasi-sdk/bin/clang",
            "-o",
            "stdlib.wasm",
            "-Os",
            "-fPIC",
            "-Wl,--no-entry",
            "-Wl,--stack-first",
            "-Wl,-z",
            "-Wl,stack-size=65536",
            "-Wl,--import-undefined",
            "-Wl,--import-memory",
            "-Wl,--import-table",
            "-Wl,--strip-debug",
            "-Wl,--export=reset_heap",
            "-Wl,--export=__wasm_call_ctors",
            "-Wl,--export=__stack_pointer",
        ])
        .args(export_flags)
        .arg("lib/src/wasm/stdlib.c")
        .output()?;

    bail_on_err(&output, "Failed to compile the Tree-sitter WASM stdlib")?;

    let xxd = Command::new("xxd")
        .args(["-C", "-i", "stdlib.wasm"])
        .output()?;

    bail_on_err(
        &xxd,
        "Failed to run xxd on the compiled Tree-sitter WASM stdlib",
    )?;

    fs::write("lib/src/wasm/wasm-stdlib.h", xxd.stdout)?;

    fs::rename("stdlib.wasm", "target/stdlib.wasm")?;

    Ok(())
}



================================================
FILE: crates/xtask/src/bump.rs
================================================
use std::cmp::Ordering;

use anyhow::{anyhow, Result};
use git2::{DiffOptions, Repository};
use indoc::indoc;
use semver::{BuildMetadata, Prerelease, Version};
use toml::Value;

use crate::{create_commit, BumpVersion};

pub fn get_latest_tag(repo: &Repository) -> Result<String> {
    let mut tags = repo
        .tag_names(None)?
        .into_iter()
        .filter_map(|tag| tag.map(String::from))
        .filter_map(|tag| Version::parse(tag.strip_prefix('v').unwrap_or(&tag)).ok())
        .collect::<Vec<Version>>();

    tags.sort_by(
        |a, b| match (a.pre != Prerelease::EMPTY, b.pre != Prerelease::EMPTY) {
            (true, true) | (false, false) => a.cmp(b),
            (true, false) => Ordering::Less,
            (false, true) => Ordering::Greater,
        },
    );

    tags.last()
        .map(std::string::ToString::to_string)
        .ok_or_else(|| anyhow!("No tags found"))
}

pub fn run(args: BumpVersion) -> Result<()> {
    let repo = Repository::open(".")?;
    let latest_tag = get_latest_tag(&repo)?;
    let current_version = Version::parse(&latest_tag)?;
    let latest_tag_sha = repo.revparse_single(&format!("v{latest_tag}"))?.id();

    let workspace_toml_version = Version::parse(&fetch_workspace_version()?)?;

    if current_version.major != workspace_toml_version.major
        && current_version.minor != workspace_toml_version.minor
    {
        eprintln!(
            indoc! {"
            Seems like the workspace Cargo.toml ({}) version does not match up with the latest git tag ({}).
            Please ensure you don't change that yourself, this subcommand will handle this for you.
        "},
            workspace_toml_version, latest_tag
        );
        return Ok(());
    }

    let mut revwalk = repo.revwalk()?;
    revwalk.push_range(format!("{latest_tag_sha}..HEAD").as_str())?;
    let mut diff_options = DiffOptions::new();

    let mut should_increment_patch = false;
    let mut should_increment_minor = false;

    for oid in revwalk {
        let oid = oid?;
        let commit = repo.find_commit(oid)?;
        let message = commit.message().unwrap();
        let message = message.trim();

        let diff = {
            let parent = commit.parent(0).unwrap();
            let parent_tree = parent.tree().unwrap();
            let commit_tree = commit.tree().unwrap();
            repo.diff_tree_to_tree(
                Some(&parent_tree),
                Some(&commit_tree),
                Some(&mut diff_options),
            )?
        };

        let mut source_code_changed = false;
        diff.foreach(
            &mut |delta, _| {
                let path = delta.new_file().path().unwrap().to_str().unwrap();
                if path.ends_with("rs") || path.ends_with("js") || path.ends_with('c') {
                    source_code_changed = true;
                }
                true
            },
            None,
            None,
            None,
        )?;

        if source_code_changed {
            should_increment_patch = true;

            let Some((prefix, _)) = message.split_once(':') else {
                continue;
            };

            let convention = if prefix.contains('(') {
                prefix.split_once('(').unwrap().0
            } else {
                prefix
            };

            if ["feat", "feat!"].contains(&convention) || prefix.ends_with('!') {
                should_increment_minor = true;
            }
        }
    }

    let next_version = if let Some(version) = args.version {
        version
    } else {
        let mut next_version = current_version.clone();
        if should_increment_minor {
            next_version.minor += 1;
            next_version.patch = 0;
            next_version.pre = Prerelease::EMPTY;
            next_version.build = BuildMetadata::EMPTY;
        } else if should_increment_patch {
            next_version.patch += 1;
            next_version.pre = Prerelease::EMPTY;
            next_version.build = BuildMetadata::EMPTY;
        } else {
            return Err(anyhow!(format!(
                "No source code changed since {current_version}"
            )));
        }
        next_version
    };
    if next_version <= current_version {
        return Err(anyhow!(format!(
            "Next version {next_version} must be greater than current version {current_version}"
        )));
    }

    println!("Bumping from {current_version} to {next_version}");
    update_crates(&current_version, &next_version)?;
    update_makefile(&next_version)?;
    update_cmake(&next_version)?;
    update_npm(&next_version)?;
    update_zig(&next_version)?;
    tag_next_version(&repo, &next_version)?;

    Ok(())
}

fn tag_next_version(repo: &Repository, next_version: &Version) -> Result<()> {
    let signature = repo.signature()?;

    let commit_id = create_commit(
        repo,
        &format!("{next_version}"),
        &[
            "Cargo.lock",
            "Cargo.toml",
            "Makefile",
            "build.zig.zon",
            "crates/cli/Cargo.toml",
            "crates/cli/npm/package.json",
            "crates/config/Cargo.toml",
            "crates/highlight/Cargo.toml",
            "crates/loader/Cargo.toml",
            "crates/tags/Cargo.toml",
            "lib/CMakeLists.txt",
            "lib/Cargo.toml",
            "lib/binding_web/package.json",
        ],
    )?;

    let tag = repo.tag(
        &format!("v{next_version}"),
        &repo.find_object(commit_id, None)?,
        &signature,
        &format!("v{next_version}"),
        false,
    )?;

    println!("Tagged commit {commit_id} with tag {tag}");

    Ok(())
}

fn update_makefile(next_version: &Version) -> Result<()> {
    let makefile = std::fs::read_to_string("Makefile")?;
    let makefile = makefile
        .lines()
        .map(|line| {
            if line.starts_with("VERSION") {
                format!("VERSION := {next_version}")
            } else {
                line.to_string()
            }
        })
        .collect::<Vec<_>>()
        .join("\n")
        + "\n";

    std::fs::write("Makefile", makefile)?;

    Ok(())
}

fn update_cmake(next_version: &Version) -> Result<()> {
    let cmake = std::fs::read_to_string("lib/CMakeLists.txt")?;
    let cmake = cmake
        .lines()
        .map(|line| {
            if line.contains(" VERSION") {
                let start_quote = line.find('"').unwrap();
                let end_quote = line.rfind('"').unwrap();
                format!(
                    "{}{next_version}{}",
                    &line[..=start_quote],
                    &line[end_quote..]
                )
            } else {
                line.to_string()
            }
        })
        .collect::<Vec<_>>()
        .join("\n")
        + "\n";

    std::fs::write("lib/CMakeLists.txt", cmake)?;

    Ok(())
}

fn update_crates(current_version: &Version, next_version: &Version) -> Result<()> {
    let mut cmd = std::process::Command::new("cargo");
    cmd.arg("workspaces").arg("version");

    if next_version.minor > current_version.minor {
        cmd.arg("minor");
    } else {
        cmd.arg("patch");
    }

    cmd.arg("--no-git-commit")
        .arg("--yes")
        .arg("--force")
        .arg("tree-sitter{,-cli,-config,-generate,-loader,-highlight,-tags}")
        .arg("--ignore-changes")
        .arg("lib/language/*");

    let status = cmd.status()?;

    if !status.success() {
        return Err(anyhow!("Failed to update crates"));
    }

    Ok(())
}

fn update_npm(next_version: &Version) -> Result<()> {
    for path in ["lib/binding_web/package.json", "cli/npm/package.json"] {
        let package_json =
            serde_json::from_str::<serde_json::Value>(&std::fs::read_to_string(path)?)?;

        let mut package_json = package_json
            .as_object()
            .ok_or_else(|| anyhow!("Invalid package.json"))?
            .clone();
        package_json.insert(
            "version".to_string(),
            serde_json::Value::String(next_version.to_string()),
        );

        let package_json = serde_json::to_string_pretty(&package_json)? + "\n";

        std::fs::write(path, package_json)?;
    }

    Ok(())
}

fn update_zig(next_version: &Version) -> Result<()> {
    let zig = std::fs::read_to_string("build.zig.zon")?;

    let zig = zig
        .lines()
        .map(|line| {
            if line.starts_with("  .version") {
                format!("  .version = \"{next_version}\",")
            } else {
                line.to_string()
            }
        })
        .collect::<Vec<_>>()
        .join("\n")
        + "\n";

    std::fs::write("build.zig.zon", zig)?;

    Ok(())
}

/// read Cargo.toml and get the version
fn fetch_workspace_version() -> Result<String> {
    let cargo_toml = toml::from_str::<Value>(&std::fs::read_to_string("Cargo.toml")?)?;

    Ok(cargo_toml["workspace"]["package"]["version"]
        .as_str()
        .unwrap()
        .trim_matches('"')
        .to_string())
}



================================================
FILE: crates/xtask/src/check_wasm_exports.rs
================================================
use std::{
    collections::HashSet,
    io::BufRead,
    path::PathBuf,
    process::{Command, Stdio},
    time::Duration,
};

use anyhow::{anyhow, Result};
use notify::{
    event::{AccessKind, AccessMode},
    EventKind, RecursiveMode,
};
use notify_debouncer_full::new_debouncer;

use crate::{bail_on_err, watch_wasm, CheckWasmExports};

const EXCLUDES: [&str; 27] = [
    // Unneeded because the JS side has its own way of implementing it
    "ts_node_child_by_field_name",
    "ts_node_edit",
    // Precomputed and stored in the JS side
    "ts_node_type",
    "ts_node_grammar_type",
    "ts_node_eq",
    "ts_tree_cursor_current_field_name",
    "ts_lookahead_iterator_current_symbol_name",
    // Not used in wasm
    "ts_init",
    "ts_set_allocator",
    "ts_parser_set_cancellation_flag",
    "ts_parser_cancellation_flag",
    "ts_parser_print_dot_graphs",
    "ts_tree_print_dot_graph",
    "ts_parser_set_wasm_store",
    "ts_parser_take_wasm_store",
    "ts_parser_language",
    "ts_node_language",
    "ts_tree_language",
    "ts_lookahead_iterator_language",
    "ts_parser_logger",
    "ts_parser_parse_string",
    "ts_parser_parse_string_encoding",
    // Query cursor is not managed by user in web bindings
    "ts_query_cursor_delete",
    "ts_query_cursor_timeout_micros",
    "ts_query_cursor_match_limit",
    "ts_query_cursor_remove_match",
    "ts_query_cursor_timeout_micros",
];

pub fn run(args: &CheckWasmExports) -> Result<()> {
    if args.watch {
        watch_wasm!(check_wasm_exports);
    } else {
        check_wasm_exports()?;
    }

    Ok(())
}

fn check_wasm_exports() -> Result<()> {
    let mut wasm_exports = std::fs::read_to_string("lib/binding_web/lib/exports.txt")?
        .lines()
        .map(|s| s.replace("_wasm", "").replace("byte", "index"))
        // remove leading and trailing quotes, trailing comma
        .map(|s| s[1..s.len() - 2].to_string())
        .collect::<HashSet<_>>();

    // Run wasm-objdump to see symbols used internally in binding.c but not exposed in any way.
    let wasm_objdump = Command::new("wasm-objdump")
        .args([
            "--details",
            "lib/binding_web/debug/web-tree-sitter.wasm",
            "--section",
            "Name",
        ])
        .output()
        .expect("Failed to run wasm-objdump");
    bail_on_err(&wasm_objdump, "Failed to run wasm-objdump")?;

    wasm_exports.extend(
        wasm_objdump
            .stdout
            .lines()
            .map_while(Result::ok)
            .skip_while(|line| !line.contains("- func"))
            .filter_map(|line| {
                if line.contains("func") {
                    if let Some(function) = line.split_whitespace().nth(2).map(String::from) {
                        let trimmed = function.trim_start_matches('<').trim_end_matches('>');
                        if trimmed.starts_with("ts") && !trimmed.contains("__") {
                            return Some(trimmed.to_string());
                        }
                    }
                }
                None
            }),
    );

    let nm_child = Command::new("nm")
        .arg("-W")
        .arg("-U")
        .arg("libtree-sitter.so")
        .stdout(Stdio::piped())
        .output()
        .expect("Failed to run nm");
    bail_on_err(&nm_child, "Failed to run nm")?;
    let export_reader = nm_child
        .stdout
        .lines()
        .map_while(Result::ok)
        .filter(|line| line.contains(" T "));

    let exports = export_reader
        .filter_map(|line| line.split_whitespace().nth(2).map(String::from))
        .filter(|symbol| !EXCLUDES.contains(&symbol.as_str()))
        .collect::<HashSet<_>>();

    let mut missing = exports
        .iter()
        .filter(|&symbol| !wasm_exports.contains(symbol))
        .map(String::as_str)
        .collect::<Vec<_>>();
    missing.sort_unstable();

    if !missing.is_empty() {
        Err(anyhow!(format!(
            "Unmatched wasm exports:\n{}",
            missing.join("\n")
        )))?;
    }

    Ok(())
}



================================================
FILE: crates/xtask/src/clippy.rs
================================================
use std::process::Command;

use anyhow::Result;

use crate::{bail_on_err, Clippy};

pub fn run(args: &Clippy) -> Result<()> {
    let mut clippy_command = Command::new("cargo");
    clippy_command.arg("clippy");

    if let Some(package) = args.package.as_ref() {
        clippy_command.args(["--package", package]);
    } else {
        clippy_command.arg("--workspace");
    }

    clippy_command
        .arg("--release")
        .arg("--all-targets")
        .arg("--all-features")
        .arg("--")
        .arg("-D")
        .arg("warnings");

    if args.fix {
        clippy_command.arg("--fix");
    }

    bail_on_err(
        &clippy_command.spawn()?.wait_with_output()?,
        "Clippy failed",
    )
}



================================================
FILE: crates/xtask/src/fetch.rs
================================================
use crate::{bail_on_err, root_dir, EMSCRIPTEN_VERSION};
use anyhow::Result;
use std::process::Command;

pub fn run_fixtures() -> Result<()> {
    let grammars_dir = root_dir().join("test").join("fixtures").join("grammars");

    [
        ("bash", "master"),
        ("c", "master"),
        ("cpp", "master"),
        ("embedded-template", "master"),
        ("go", "master"),
        ("html", "master"),
        ("java", "master"),
        ("javascript", "master"),
        ("jsdoc", "master"),
        ("json", "master"),
        ("php", "master"),
        ("python", "master"),
        ("ruby", "master"),
        ("rust", "master"),
        ("typescript", "master"),
    ]
    .iter()
    .try_for_each(|(grammar, r#ref)| {
        let grammar_dir = grammars_dir.join(grammar);
        let grammar_url = format!("https://github.com/tree-sitter/tree-sitter-{grammar}");

        println!("Updating the {grammar} grammar...");

        if !grammar_dir.exists() {
            let mut command = Command::new("git");
            command.args([
                "clone",
                "--depth",
                "1",
                &grammar_url,
                &grammar_dir.to_string_lossy(),
            ]);
            bail_on_err(
                &command.spawn()?.wait_with_output()?,
                &format!("Failed to clone the {grammar} grammar"),
            )?;
        }

        std::env::set_current_dir(&grammar_dir)?;

        let mut command = Command::new("git");
        command.args(["fetch", "origin", r#ref, "--depth", "1"]);
        bail_on_err(
            &command.spawn()?.wait_with_output()?,
            &format!("Failed to fetch the {grammar} grammar"),
        )?;

        let mut command = Command::new("git");
        command.args(["reset", "--hard", "FETCH_HEAD"]);
        bail_on_err(
            &command.spawn()?.wait_with_output()?,
            &format!("Failed to reset the {grammar} grammar"),
        )?;

        Ok(())
    })
}

pub fn run_emscripten() -> Result<()> {
    let emscripten_dir = root_dir().join("target").join("emsdk");
    if emscripten_dir.exists() {
        println!("Emscripten SDK already exists");
        return Ok(());
    }
    println!("Cloning the Emscripten SDK...");

    let mut command = Command::new("git");
    command.args([
        "clone",
        "https://github.com/emscripten-core/emsdk.git",
        &emscripten_dir.to_string_lossy(),
    ]);
    bail_on_err(
        &command.spawn()?.wait_with_output()?,
        "Failed to clone the Emscripten SDK",
    )?;

    std::env::set_current_dir(&emscripten_dir)?;

    let emsdk = if cfg!(windows) {
        "emsdk.bat"
    } else {
        "./emsdk"
    };

    let mut command = Command::new(emsdk);
    command.args(["install", EMSCRIPTEN_VERSION]);
    bail_on_err(
        &command.spawn()?.wait_with_output()?,
        "Failed to install Emscripten",
    )?;

    let mut command = Command::new(emsdk);
    command.args(["activate", EMSCRIPTEN_VERSION]);
    bail_on_err(
        &command.spawn()?.wait_with_output()?,
        "Failed to activate Emscripten",
    )
}



================================================
FILE: crates/xtask/src/generate.rs
================================================
use std::{collections::BTreeSet, ffi::OsStr, fs, path::Path, process::Command, str::FromStr};

use anyhow::{Context, Result};
use bindgen::RustTarget;

use crate::{bail_on_err, GenerateFixtures};

const HEADER_PATH: &str = "lib/include/tree_sitter/api.h";

pub fn run_fixtures(args: &GenerateFixtures) -> Result<()> {
    let output = std::process::Command::new("cargo")
        .args(["build", "--release"])
        .spawn()?
        .wait_with_output()?;
    bail_on_err(&output, "Failed to run cargo build")?;

    let tree_sitter_binary = std::env::current_dir()?
        .join("target")
        .join("release")
        .join("tree-sitter");

    let grammars_dir = std::env::current_dir()?
        .join("test")
        .join("fixtures")
        .join("grammars");

    for grammar_file in find_grammar_files(grammars_dir.to_str().unwrap()).flatten() {
        let grammar_dir = grammar_file.parent().unwrap();
        let grammar_name = grammar_dir.file_name().and_then(OsStr::to_str).unwrap();

        println!(
            "Regenerating {grammar_name} parser{}",
            if args.wasm { " to wasm" } else { "" }
        );

        if args.wasm {
            let mut cmd = Command::new(&tree_sitter_binary);
            let cmd = cmd.args([
                "build",
                "--wasm",
                "-o",
                &format!("target/release/tree-sitter-{grammar_name}.wasm"),
                grammar_dir.to_str().unwrap(),
            ]);
            bail_on_err(
                &cmd.spawn()?.wait_with_output()?,
                &format!("Failed to regenerate {grammar_name} parser to wasm"),
            )?;
        } else {
            let output = Command::new(&tree_sitter_binary)
                .arg("generate")
                .arg("src/grammar.json")
                .arg("--abi=latest")
                .current_dir(grammar_dir)
                .spawn()?
                .wait_with_output()?;
            bail_on_err(
                &output,
                &format!("Failed to regenerate {grammar_name} parser"),
            )?;
        }
    }

    Ok(())
}

pub fn run_bindings() -> Result<()> {
    let output = Command::new("cargo")
        .args(["metadata", "--format-version", "1"])
        .output()
        .unwrap();

    let metadata = serde_json::from_slice::<serde_json::Value>(&output.stdout).unwrap();

    let Some(rust_version) = metadata
        .get("packages")
        .and_then(|packages| packages.as_array())
        .and_then(|packages| {
            packages.iter().find_map(|package| {
                if package["name"] == "tree-sitter" {
                    package.get("rust_version").and_then(|v| v.as_str())
                } else {
                    None
                }
            })
        })
    else {
        panic!("Failed to find tree-sitter package in cargo metadata");
    };

    let no_copy = [
        "TSInput",
        "TSLanguage",
        "TSLogger",
        "TSLookaheadIterator",
        "TSParser",
        "TSTree",
        "TSQuery",
        "TSQueryCursor",
        "TSQueryCapture",
        "TSQueryMatch",
        "TSQueryPredicateStep",
    ];

    let bindings = bindgen::Builder::default()
        .header(HEADER_PATH)
        .layout_tests(false)
        .allowlist_type("^TS.*")
        .allowlist_function("^ts_.*")
        .allowlist_var("^TREE_SITTER.*")
        .no_copy(no_copy.join("|"))
        .prepend_enum_name(false)
        .use_core()
        .clang_arg("-D TREE_SITTER_FEATURE_WASM")
        .rust_target(RustTarget::from_str(rust_version).unwrap())
        .generate()
        .expect("Failed to generate bindings");

    bindings
        .write_to_file("lib/binding_rust/bindings.rs")
        .with_context(|| "Failed to write bindings")
}

pub fn run_wasm_exports() -> Result<()> {
    let mut imports = BTreeSet::new();

    let mut callback = |path: &str| -> Result<()> {
        let output = Command::new("wasm-objdump")
            .args(["--details", path, "--section", "Import"])
            .output()?;
        bail_on_err(&output, "Failed to run wasm-objdump")?;

        let output = String::from_utf8_lossy(&output.stdout);

        for line in output.lines() {
            if let Some(imp) = line.split("<env.").nth(1).and_then(|s| s.split('>').next()) {
                imports.insert(imp.to_string());
            }
        }

        Ok(())
    };

    for entry in fs::read_dir(Path::new("target"))? {
        let Ok(entry) = entry else {
            continue;
        };
        let path = entry.path();
        if path.is_dir() {
            for entry in fs::read_dir(&path)? {
                let Ok(entry) = entry else {
                    continue;
                };
                let path = entry.path();
                if path.is_file()
                    && path.extension() == Some(OsStr::new("wasm"))
                    && path
                        .file_name()
                        .unwrap()
                        .to_str()
                        .unwrap()
                        .starts_with("tree-sitter-")
                {
                    callback(path.to_str().unwrap())?;
                }
            }
        }
    }

    for imp in imports {
        println!("{imp}");
    }

    Ok(())
}

fn find_grammar_files(
    dir: &str,
) -> impl Iterator<Item = Result<std::path::PathBuf, std::io::Error>> {
    fs::read_dir(dir)
        .expect("Failed to read directory")
        .filter_map(Result::ok)
        .flat_map(|entry| {
            let path = entry.path();
            if path.is_dir() && !path.to_string_lossy().contains("node_modules") {
                Box::new(find_grammar_files(path.to_str().unwrap())) as Box<dyn Iterator<Item = _>>
            } else if path.is_file() && path.file_name() == Some(OsStr::new("grammar.js")) {
                Box::new(std::iter::once(Ok(path))) as _
            } else {
                Box::new(std::iter::empty()) as _
            }
        })
}



================================================
FILE: crates/xtask/src/main.rs
================================================
mod benchmark;
mod build_wasm;
mod bump;
mod check_wasm_exports;
mod clippy;
mod fetch;
mod generate;
mod test;
mod upgrade_emscripten;
mod upgrade_wasmtime;

use std::path::Path;

use anstyle::{AnsiColor, Color, Style};
use anyhow::Result;
use clap::{crate_authors, Args, Command, FromArgMatches as _, Subcommand};
use git2::{Oid, Repository};
use semver::Version;

#[derive(Subcommand)]
#[command(about="Run various tasks", author=crate_authors!("\n"), styles=get_styles())]
enum Commands {
    /// Runs `cargo benchmark` with some optional environment variables set.
    Benchmark(Benchmark),
    /// Compile the Tree-sitter WASM library. This will create two files in the
    /// `lib/binding_web` directory: `web-tree-sitter.js` and `web-tree-sitter.wasm`.
    BuildWasm(BuildWasm),
    /// Compile the Tree-sitter WASM standard library.
    BuildWasmStdlib,
    /// Bumps the version of the workspace.
    BumpVersion(BumpVersion),
    /// Checks that WASM exports are synced.
    CheckWasmExports(CheckWasmExports),
    /// Runs `cargo clippy`.
    Clippy(Clippy),
    /// Fetches emscripten.
    FetchEmscripten,
    /// Fetches the fixtures for testing tree-sitter.
    FetchFixtures,
    /// Generate the Rust bindings from the C library.
    GenerateBindings,
    /// Generates the fixtures for testing tree-sitter.
    GenerateFixtures(GenerateFixtures),
    /// Generate the list of exports from Tree-sitter WASM files.
    GenerateWasmExports,
    /// Run the test suite
    Test(Test),
    /// Run the WASM test suite
    TestWasm,
    /// Upgrade the wasmtime dependency.
    UpgradeWasmtime(UpgradeWasmtime),
    /// Upgrade the emscripten file.
    UpgradeEmscripten,
}

#[derive(Args)]
struct Benchmark {
    /// The language to run the benchmarks for.
    #[arg(long, short)]
    language: Option<String>,
    /// The example file to run the benchmarks for.
    #[arg(long, short)]
    example_file_name: Option<String>,
    /// The number of times to parse each sample (default is 5).
    #[arg(long, short, default_value = "5")]
    repetition_count: u32,
    /// Whether to run the benchmarks in debug mode.
    #[arg(long, short = 'g')]
    debug: bool,
}

#[derive(Args)]
struct BuildWasm {
    /// Compile the library more quickly, with fewer optimizations
    /// and more runtime assertions.
    #[arg(long, short = '0')]
    debug: bool,
    /// Run emscripten using docker, even if \`emcc\` is installed.
    /// By default, \`emcc\` will be run directly when available.
    #[arg(long, short)]
    docker: bool,
    /// Run emscripten with verbose output.
    #[arg(long, short)]
    verbose: bool,
    /// Rebuild when relevant files are changed.
    #[arg(long, short)]
    watch: bool,
    /// Emit TypeScript type definitions for the generated bindings,
    /// requires `tsc` to be available.
    #[arg(long, short)]
    emit_tsd: bool,
    /// Generate `CommonJS` modules instead of ES modules.
    #[arg(long, short, env = "CJS")]
    cjs: bool,
}

#[derive(Args)]
struct BumpVersion {
    /// The version to bump to.
    #[arg(long, short)]
    version: Option<Version>,
}

#[derive(Args)]
struct CheckWasmExports {
    /// Recheck when relevant files are changed.
    #[arg(long, short)]
    watch: bool,
}

#[derive(Args)]
struct Clippy {
    /// Automatically apply lint suggestions (`clippy --fix`).
    #[arg(long, short)]
    fix: bool,
    /// The package to run Clippy against (`cargo -p <PACKAGE> clippy`).
    #[arg(long, short)]
    package: Option<String>,
}

#[derive(Args)]
struct GenerateFixtures {
    /// Generates the parser to WASM
    #[arg(long, short)]
    wasm: bool,
}

#[derive(Args)]
struct Test {
    /// Compile C code with the Clang address sanitizer.
    #[arg(long, short)]
    address_sanitizer: bool,
    /// Run only the corpus tests for the given language.
    #[arg(long, short)]
    language: Option<String>,
    /// Run only the corpus tests whose name contain the given string.
    #[arg(long, short)]
    example: Option<String>,
    /// Run the given number of iterations of randomized tests (default 10).
    #[arg(long, short)]
    iterations: Option<u32>,
    /// Set the seed used to control random behavior.
    #[arg(long, short)]
    seed: Option<usize>,
    /// Print parsing log to stderr.
    #[arg(long, short)]
    debug: bool,
    /// Generate an SVG graph of parsing logs.
    #[arg(long, short = 'D')]
    debug_graph: bool,
    /// Run the tests with a debugger.
    #[arg(short)]
    g: bool,
    #[arg(trailing_var_arg = true)]
    args: Vec<String>,
    /// Don't capture the output
    #[arg(long)]
    nocapture: bool,
    /// Enable the wasm tests.
    #[arg(long, short)]
    wasm: bool,
}

#[derive(Args)]
struct UpgradeWasmtime {
    /// The version to upgrade to.
    #[arg(long, short)]
    version: Version,
}

const BUILD_VERSION: &str = env!("CARGO_PKG_VERSION");
const BUILD_SHA: Option<&str> = option_env!("BUILD_SHA");
const EMSCRIPTEN_VERSION: &str = include_str!("../../loader/emscripten-version").trim_ascii();
const EMSCRIPTEN_TAG: &str = concat!(
    "docker.io/emscripten/emsdk:",
    include_str!("../../loader/emscripten-version")
)
.trim_ascii();

fn main() {
    let result = run();
    if let Err(err) = &result {
        // Ignore BrokenPipe errors
        if let Some(error) = err.downcast_ref::<std::io::Error>() {
            if error.kind() == std::io::ErrorKind::BrokenPipe {
                return;
            }
        }
        if !err.to_string().is_empty() {
            eprintln!("{err:?}");
        }
        std::process::exit(1);
    }
}

fn run() -> Result<()> {
    let version = BUILD_SHA.map_or_else(
        || BUILD_VERSION.to_string(),
        |build_sha| format!("{BUILD_VERSION} ({build_sha})"),
    );
    let version: &'static str = Box::leak(version.into_boxed_str());

    let cli = Command::new("xtask")
        .help_template(
            "\
{before-help}{name} {version}
{author-with-newline}{about-with-newline}
{usage-heading} {usage}

{all-args}{after-help}
",
        )
        .version(version)
        .subcommand_required(true)
        .arg_required_else_help(true)
        .disable_help_subcommand(true)
        .disable_colored_help(false);
    let command = Commands::from_arg_matches(&Commands::augment_subcommands(cli).get_matches())?;

    match command {
        Commands::Benchmark(benchmark_options) => benchmark::run(&benchmark_options)?,
        Commands::BuildWasm(build_wasm_options) => build_wasm::run_wasm(&build_wasm_options)?,
        Commands::BuildWasmStdlib => build_wasm::run_wasm_stdlib()?,
        Commands::BumpVersion(bump_options) => bump::run(bump_options)?,
        Commands::CheckWasmExports(check_options) => check_wasm_exports::run(&check_options)?,
        Commands::Clippy(clippy_options) => clippy::run(&clippy_options)?,
        Commands::FetchEmscripten => fetch::run_emscripten()?,
        Commands::FetchFixtures => fetch::run_fixtures()?,
        Commands::GenerateBindings => generate::run_bindings()?,
        Commands::GenerateFixtures(generate_fixtures_options) => {
            generate::run_fixtures(&generate_fixtures_options)?;
        }
        Commands::GenerateWasmExports => generate::run_wasm_exports()?,
        Commands::Test(test_options) => test::run(&test_options)?,
        Commands::TestWasm => test::run_wasm()?,
        Commands::UpgradeWasmtime(upgrade_wasmtime_options) => {
            upgrade_wasmtime::run(&upgrade_wasmtime_options)?;
        }
        Commands::UpgradeEmscripten => upgrade_emscripten::run()?,
    }

    Ok(())
}

fn root_dir() -> &'static Path {
    Path::new(env!("CARGO_MANIFEST_DIR"))
        .parent()
        .unwrap()
        .parent()
        .unwrap()
}

fn bail_on_err(output: &std::process::Output, prefix: &str) -> Result<()> {
    if !output.status.success() {
        let stderr = String::from_utf8_lossy(&output.stderr);
        anyhow::bail!("{prefix}:\n{stderr}");
    }
    Ok(())
}

#[must_use]
const fn get_styles() -> clap::builder::Styles {
    clap::builder::Styles::styled()
        .usage(
            Style::new()
                .bold()
                .fg_color(Some(Color::Ansi(AnsiColor::Yellow))),
        )
        .header(
            Style::new()
                .bold()
                .fg_color(Some(Color::Ansi(AnsiColor::Yellow))),
        )
        .literal(Style::new().fg_color(Some(Color::Ansi(AnsiColor::Green))))
        .invalid(
            Style::new()
                .bold()
                .fg_color(Some(Color::Ansi(AnsiColor::Red))),
        )
        .error(
            Style::new()
                .bold()
                .fg_color(Some(Color::Ansi(AnsiColor::Red))),
        )
        .valid(
            Style::new()
                .bold()
                .fg_color(Some(Color::Ansi(AnsiColor::Green))),
        )
        .placeholder(Style::new().fg_color(Some(Color::Ansi(AnsiColor::White))))
}

pub fn create_commit(repo: &Repository, msg: &str, paths: &[&str]) -> Result<Oid> {
    let mut index = repo.index()?;
    for path in paths {
        index.add_path(Path::new(path))?;
    }

    index.write()?;

    let tree_id = index.write_tree()?;
    let tree = repo.find_tree(tree_id)?;
    let signature = repo.signature()?;
    let parent_commit = repo.revparse_single("HEAD")?.peel_to_commit()?;

    Ok(repo.commit(
        Some("HEAD"),
        &signature,
        &signature,
        msg,
        &tree,
        &[&parent_commit],
    )?)
}

#[macro_export]
macro_rules! watch_wasm {
    ($watch_fn:expr) => {
        if let Err(e) = $watch_fn() {
            eprintln!("{e}");
        } else {
            println!("Build succeeded");
        }

        let watch_files = [
            "lib/tree-sitter.c",
            "lib/exports.txt",
            "lib/imports.js",
            "lib/prefix.js",
        ]
        .iter()
        .map(PathBuf::from)
        .collect::<HashSet<PathBuf>>();
        let (tx, rx) = std::sync::mpsc::channel();
        let mut debouncer = new_debouncer(Duration::from_secs(1), None, tx)?;
        debouncer.watch("lib/binding_web", RecursiveMode::NonRecursive)?;

        for result in rx {
            match result {
                Ok(events) => {
                    for event in events {
                        if event.kind == EventKind::Access(AccessKind::Close(AccessMode::Write))
                            && event
                                .paths
                                .iter()
                                .filter_map(|p| p.file_name())
                                .any(|p| watch_files.contains(&PathBuf::from(p)))
                        {
                            if let Err(e) = $watch_fn() {
                                eprintln!("{e}");
                            } else {
                                println!("Build succeeded");
                            }
                        }
                    }
                }
                Err(errors) => {
                    return Err(anyhow!(
                        "{}",
                        errors
                            .into_iter()
                            .map(|e| e.to_string())
                            .collect::<Vec<_>>()
                            .join("\n")
                    ));
                }
            }
        }
    };
}



================================================
FILE: crates/xtask/src/test.rs
================================================
use std::{
    env,
    path::Path,
    process::{Command, Stdio},
};

use anyhow::{anyhow, Result};
use regex::Regex;

use crate::{bail_on_err, Test};

pub fn run(args: &Test) -> Result<()> {
    let test_flags = if args.address_sanitizer {
        env::set_var("CFLAGS", "-fsanitize=undefined,address");

        // When the Tree-sitter C library is compiled with the address sanitizer, the address
        // sanitizer runtime library needs to be linked into the final test executable. When
        // using Xcode clang, the Rust linker doesn't know where to find that library, so we
        // need to specify linker flags directly.
        let output = Command::new("cc").arg("-print-runtime-dir").output()?;
        bail_on_err(&output, "Failed to get clang runtime dir")?;
        let runtime_dir = String::from_utf8(output.stdout)?;
        if runtime_dir.contains("/Xcode.app/") {
            env::set_var(
                "RUSTFLAGS",
                format!(
                    "-C link-arg=-L{runtime_dir} -C link-arg=-lclang_rt.asan_osx_dynamic -C link-arg=-Wl,-rpath,{runtime_dir}"
                ),
            );
        }

        // Specify a `--target` explicitly. This is required for address sanitizer support.
        let output = Command::new("rustup")
            .arg("show")
            .arg("active-toolchain")
            .output()?;
        bail_on_err(&output, "Failed to get active Rust toolchain")?;
        let toolchain = String::from_utf8(output.stdout)?;
        let re = Regex::new(r"(stable|beta|nightly)-([_a-z0-9-]+).*")?;
        let captures = re
            .captures(&toolchain)
            .ok_or_else(|| anyhow!("Failed to parse toolchain '{toolchain}'"))?;
        let current_target = captures.get(2).unwrap().as_str();
        format!("--target={current_target}")
    } else {
        String::new()
    };
    if let Some(language) = &args.language {
        env::set_var("TREE_SITTER_LANGUAGE", language);
    }
    if let Some(example) = &args.example {
        env::set_var("TREE_SITTER_EXAMPLE_INCLUDE", example);
    }
    if let Some(seed) = args.seed {
        env::set_var("TREE_SITTER_SEED", seed.to_string());
    }
    if let Some(iterations) = args.iterations {
        env::set_var("TREE_SITTER_ITERATIONS", iterations.to_string());
    }
    if args.debug {
        env::set_var("TREE_SITTER_LOG", "1");
    }
    if args.debug_graph {
        env::set_var("TREE_SITTER_LOG_GRAPHS", "1");
    }

    let run_tests = |subdir: &str| -> Result<()> {
        if args.g {
            let mut cargo_cmd = Command::new("cargo");
            cargo_cmd
                .current_dir(subdir)
                .arg("test")
                .arg(&test_flags)
                .arg("--no-run")
                .arg("--message-format=json");

            #[cfg(target_os = "windows")]
            cargo_cmd.arg("--").arg("--test-threads=1");

            let cargo_cmd = cargo_cmd.stdout(Stdio::piped()).spawn()?;

            let jq_cmd = Command::new("jq")
            .arg("-rs")
            .arg(r#"map(select(.target.name == "tree_sitter_cli" and .executable))[0].executable"#)
            .stdin(cargo_cmd.stdout.unwrap())
            .output()?;

            let test_binary = String::from_utf8(jq_cmd.stdout)?;

            let mut lldb_cmd = Command::new("lldb");
            lldb_cmd.arg(test_binary.trim()).arg("--").args(&args.args);
            bail_on_err(
                &lldb_cmd.spawn()?.wait_with_output()?,
                &format!("Failed to run {lldb_cmd:?}"),
            )
        } else {
            let mut cargo_cmd = Command::new("cargo");
            cargo_cmd.current_dir(subdir).arg("test");
            if args.wasm {
                cargo_cmd.arg("--features").arg("wasm");
            }
            if !test_flags.is_empty() {
                cargo_cmd.arg(&test_flags);
            }
            cargo_cmd.args(&args.args);

            #[cfg(target_os = "windows")]
            cargo_cmd.arg("--").arg("--test-threads=1");

            if args.nocapture {
                #[cfg(not(target_os = "windows"))]
                cargo_cmd.arg("--");

                cargo_cmd.arg("--nocapture");
            }
            bail_on_err(
                &cargo_cmd.spawn()?.wait_with_output()?,
                &format!("Failed to run {cargo_cmd:?}"),
            )
        }
    };

    run_tests(".")?;
    run_tests("crates/generate")?;

    Ok(())
}

pub fn run_wasm() -> Result<()> {
    std::env::set_current_dir("lib/binding_web")?;

    let node_modules_dir = Path::new("node_modules");
    let npm = if cfg!(target_os = "windows") {
        "npm.cmd"
    } else {
        "npm"
    };

    if !node_modules_dir.join("chai").exists() || !node_modules_dir.join("mocha").exists() {
        println!("Installing test dependencies...");
        let output = Command::new(npm).arg("install").output()?;
        bail_on_err(&output, "Failed to install test dependencies")?;
    }

    let child = Command::new(npm).arg("test").spawn()?;
    let output = child.wait_with_output()?;
    bail_on_err(&output, &format!("Failed to run `{npm} test`"))?;

    // Display test results
    let output = String::from_utf8_lossy(&output.stdout);
    for line in output.lines() {
        println!("{line}");
    }

    Ok(())
}



================================================
FILE: crates/xtask/src/upgrade_emscripten.rs
================================================
use anyhow::{anyhow, Result};
use git2::Repository;
use serde_json::Value;
use std::fs;

use crate::{create_commit, root_dir};

pub fn run() -> Result<()> {
    let response = ureq::get("https://api.github.com/repos/emscripten-core/emsdk/tags")
        .call()?
        .body_mut()
        .read_to_string()?;

    let json = serde_json::from_str::<Value>(&response)?;
    let version = json
        .as_array()
        .and_then(|arr| arr.first())
        .and_then(|tag| tag["name"].as_str())
        .ok_or(anyhow!("No tags found"))?;

    let version_file = root_dir()
        .join("crates")
        .join("loader")
        .join("emscripten-version");

    fs::write(version_file, version)?;

    println!("Upgraded emscripten version to {version}");

    let repo = Repository::open(".")?;
    create_commit(
        &repo,
        &format!("build(deps): bump emscripten to {version}"),
        &["crates/loader/emscripten-version"],
    )?;

    Ok(())
}



================================================
FILE: crates/xtask/src/upgrade_wasmtime.rs
================================================
use std::process::Command;

use anyhow::{Context, Result};
use git2::Repository;
use semver::Version;

use crate::{create_commit, UpgradeWasmtime};

const WASMTIME_RELEASE_URL: &str = "https://github.com/bytecodealliance/wasmtime/releases/download";

fn update_cargo(version: &Version) -> Result<()> {
    let file = std::fs::read_to_string("lib/Cargo.toml")?;
    let mut old_lines = file.lines();
    let mut new_lines = Vec::with_capacity(old_lines.size_hint().0);

    while let Some(line) = old_lines.next() {
        new_lines.push(line.to_string());
        if line == "[dependencies.wasmtime-c-api]" {
            let _ = old_lines.next();
            new_lines.push(format!("version = \"{version}\""));
        }
    }

    std::fs::write("lib/Cargo.toml", new_lines.join("\n") + "\n")?;

    Command::new("cargo")
        .arg("update")
        .status()
        .map(|_| ())
        .with_context(|| "Failed to execute cargo update")
}

fn zig_fetch(lines: &mut Vec<String>, version: &Version, url_suffix: &str) -> Result<()> {
    let url = &format!("{WASMTIME_RELEASE_URL}/v{version}/wasmtime-v{version}-{url_suffix}");
    println!("  Fetching {url}");
    lines.push(format!("      .url = \"{url}\","));

    let output = Command::new("zig")
        .arg("fetch")
        .arg(url)
        .output()
        .with_context(|| format!("Failed to execute zig fetch {url}"))?;

    let hash = String::from_utf8_lossy(&output.stdout);
    lines.push(format!("      .hash = \"{}\",", hash.trim_end()));

    Ok(())
}

fn update_zig(version: &Version) -> Result<()> {
    let file = std::fs::read_to_string("build.zig.zon")?;
    let mut old_lines = file.lines();
    let new_lines = &mut Vec::with_capacity(old_lines.size_hint().0);

    while let Some(line) = old_lines.next() {
        new_lines.push(line.to_string());
        match line {
            "    .wasmtime_c_api_aarch64_android = .{" => {
                let (_, _) = (old_lines.next(), old_lines.next());
                zig_fetch(new_lines, version, "aarch64-android-c-api.tar.xz")?;
            }
            "    .wasmtime_c_api_aarch64_linux = .{" => {
                let (_, _) = (old_lines.next(), old_lines.next());
                zig_fetch(new_lines, version, "aarch64-linux-c-api.tar.xz")?;
            }
            "    .wasmtime_c_api_aarch64_macos = .{" => {
                let (_, _) = (old_lines.next(), old_lines.next());
                zig_fetch(new_lines, version, "aarch64-macos-c-api.tar.xz")?;
            }
            "    .wasmtime_c_api_riscv64gc_linux = .{" => {
                let (_, _) = (old_lines.next(), old_lines.next());
                zig_fetch(new_lines, version, "riscv64gc-linux-c-api.tar.xz")?;
            }
            "    .wasmtime_c_api_s390x_linux = .{" => {
                let (_, _) = (old_lines.next(), old_lines.next());
                zig_fetch(new_lines, version, "s390x-linux-c-api.tar.xz")?;
            }
            "    .wasmtime_c_api_x86_64_android = .{" => {
                let (_, _) = (old_lines.next(), old_lines.next());
                zig_fetch(new_lines, version, "x86_64-android-c-api.tar.xz")?;
            }
            "    .wasmtime_c_api_x86_64_linux = .{" => {
                let (_, _) = (old_lines.next(), old_lines.next());
                zig_fetch(new_lines, version, "x86_64-linux-c-api.tar.xz")?;
            }
            "    .wasmtime_c_api_x86_64_macos = .{" => {
                let (_, _) = (old_lines.next(), old_lines.next());
                zig_fetch(new_lines, version, "x86_64-macos-c-api.tar.xz")?;
            }
            "    .wasmtime_c_api_x86_64_mingw = .{" => {
                let (_, _) = (old_lines.next(), old_lines.next());
                zig_fetch(new_lines, version, "x86_64-mingw-c-api.zip")?;
            }
            "    .wasmtime_c_api_x86_64_musl = .{" => {
                let (_, _) = (old_lines.next(), old_lines.next());
                zig_fetch(new_lines, version, "x86_64-musl-c-api.tar.xz")?;
            }
            "    .wasmtime_c_api_x86_64_windows = .{" => {
                let (_, _) = (old_lines.next(), old_lines.next());
                zig_fetch(new_lines, version, "x86_64-windows-c-api.zip")?;
            }
            _ => {}
        }
    }

    std::fs::write("build.zig.zon", new_lines.join("\n") + "\n")?;

    Ok(())
}

pub fn run(args: &UpgradeWasmtime) -> Result<()> {
    println!("Upgrading wasmtime for Rust");
    update_cargo(&args.version)?;

    println!("Upgrading wasmtime for Zig");
    update_zig(&args.version)?;

    let repo = Repository::open(".")?;
    create_commit(
        &repo,
        &format!("build(deps): bump wasmtime-c-api to v{}", args.version),
        &["lib/Cargo.toml", "build.zig.zon"],
    )?;

    Ok(())
}



================================================
FILE: docs/book.toml
================================================
[book]
authors = [
  "Max Brunsfeld <maxbrunsfeld@gmail.com>",
  "Amaan Qureshi <amaanq12@gmail.com>",
]
language = "en"
multilingual = false
src = "src"
title = "Tree-sitter"

[output.html]
additional-css = [
  "src/assets/css/playground.css",
  "src/assets/css/mdbook-admonish.css",
]
additional-js = ["src/assets/js/playground.js"]
git-repository-url = "https://github.com/tree-sitter/tree-sitter"
git-repository-icon = "fa-github"
edit-url-template = "https://github.com/tree-sitter/tree-sitter/edit/master/docs/{path}"

[output.html.search]
limit-results = 20
use-boolean-and = true
boost-title = 2
boost-hierarchy = 2
boost-paragraph = 1
expand = true

[preprocessor]

[preprocessor.admonish]
command = "mdbook-admonish"
assets_version = "3.0.2"    # do not edit: managed by `mdbook-admonish install`



================================================
FILE: docs/src/3-syntax-highlighting.md
================================================
# Syntax Highlighting

Syntax highlighting is a very common feature in applications that deal with code. Tree-sitter has built-in support for
syntax highlighting via the [`tree-sitter-highlight`][highlight crate] library, which is now used on GitHub.com for highlighting
code written in several languages. You can also perform syntax highlighting at the command line using the
`tree-sitter highlight` command.

This document explains how the Tree-sitter syntax highlighting system works, using the command line interface. If you are
using `tree-sitter-highlight` library (either from C or from Rust), all of these concepts are still applicable, but the
configuration data is provided using in-memory objects, rather than files.

## Overview

All the files needed to highlight a given language are normally included in the same git repository as the Tree-sitter
grammar for that language (for example, [`tree-sitter-javascript`][js grammar], [`tree-sitter-ruby`][ruby grammar]).
To run syntax highlighting from the command-line, three types of files are needed:

1. Per-user configuration in `~/.config/tree-sitter/config.json` (see the [init-config][init-config] page for more info).
2. Language configuration in grammar repositories' `tree-sitter.json` files (see the [init][init] page for more info).
3. Tree queries in the grammars repositories' `queries` folders.

For an example of the language-specific files, see the [`tree-sitter.json` file][ts json] and [`queries` directory][queries]
in the `tree-sitter-ruby` repository. The following sections describe the behavior of each file.

## Language Configuration

The `tree-sitter.json` file is used by the Tree-sitter CLI. Within this file, the CLI looks for data nested under the
top-level `"grammars"` key. This key is expected to contain an array of objects with the following keys:

### Basics

These keys specify basic information about the parser:

- `scope` (required) — A string like `"source.js"` that identifies the language. We strive to match the scope names used
by popular [TextMate grammars][textmate] and by the [Linguist][linguist] library.

- `path` (optional) — A relative path from the directory containing `tree-sitter.json` to another directory containing
the `src/` folder, which contains the actual generated parser. The default value is `"."` (so that `src/` is in the same
folder as `tree-sitter.json`), and this very rarely needs to be overridden.

- `external-files` (optional) — A list of relative paths from the root dir of a
parser to files that should be checked for modifications during recompilation.
This is useful during development to have changes to other files besides scanner.c
be picked up by the cli.

### Language Detection

These keys help to decide whether the language applies to a given file:

- `file-types` — An array of filename suffix strings. The grammar will be used for files whose names end with one of these
suffixes. Note that the suffix may match an *entire* filename.

- `first-line-regex` — A regex pattern that will be tested against the first line of a file to determine whether this language
applies to the file. If present, this regex will be used for any file whose language does not match any grammar's `file-types`.

- `content-regex` — A regex pattern that will be tested against the contents of the file to break ties in cases where
multiple grammars matched the file using the above two criteria. If the regex matches, this grammar will be preferred over
another grammar with no `content-regex`. If the regex does not match, a grammar with no `content-regex` will be preferred
over this one.

- `injection-regex` — A regex pattern that will be tested against a *language name* ito determine whether this language
should be used for a potential *language injection* site. Language injection is described in more detail in [a later section](#language-injection).

### Query Paths

These keys specify relative paths from the directory containing `tree-sitter.json` to the files that control syntax highlighting:

- `highlights` — Path to a *highlight query*. Default: `queries/highlights.scm`
- `locals` — Path to a *local variable query*. Default: `queries/locals.scm`.
- `injections` — Path to an *injection query*. Default: `queries/injections.scm`.

The behaviors of these three files are described in the next section.

## Queries

Tree-sitter's syntax highlighting system is based on *tree queries*, which are a general system for pattern-matching on Tree-sitter's
syntax trees. See [this section][pattern matching] of the documentation for more information
about tree queries.

Syntax highlighting is controlled by *three* different types of query files that are usually included in the `queries` folder.
The default names for the query files use the `.scm` file. We chose this extension because it commonly used for files written
in [Scheme][scheme], a popular dialect of Lisp, and these query files use a Lisp-like syntax.

### Highlights

The most important query is called the highlights query. The highlights query uses *captures* to assign arbitrary
*highlight names* to different nodes in the tree. Each highlight name can then be mapped to a color
(as described in the [init-config command][theme]). Commonly used highlight names include
`keyword`, `function`, `type`, `property`, and `string`. Names can also be dot-separated like `function.builtin`.

#### Example Go Snippet

For example, consider the following Go code:

```go
func increment(a int) int {
    return a + 1
}
```

With this syntax tree:

```scheme
(source_file
  (function_declaration
    name: (identifier)
    parameters: (parameter_list
      (parameter_declaration
        name: (identifier)
        type: (type_identifier)))
    result: (type_identifier)
    body: (block
      (return_statement
        (expression_list
          (binary_expression
            left: (identifier)
            right: (int_literal)))))))
```

#### Example Query

Suppose we wanted to render this code with the following colors:

- keywords `func` and `return` in purple
- function `increment` in blue
- type `int` in green
- number `5` brown

We can assign each of these categories a *highlight name* using a query like this:

```scheme
; highlights.scm

"func" @keyword
"return" @keyword
(type_identifier) @type
(int_literal) @number
(function_declaration name: (identifier) @function)
```

Then, in our config file, we could map each of these highlight names to a color:

```json
{
  "theme": {
    "keyword": "purple",
    "function": "blue",
    "type": "green",
    "number": "brown"
  }
}
```

#### Highlights Result

Running `tree-sitter highlight` on this Go file would produce output like this:

```admonish example collapsible=true, title='Output'
<pre class='highlight'>
<span style='color: purple;'>func</span> <span style='color: #005fd7;'>increment</span>(<span>a</span> <span style='color: green;'>int</span>) <span style='color: green;'>int</span> {
    <span style='color: purple;'>return</span> <span>a</span> <span style='font-weight: bold;color: #4e4e4e;'>+</span> <span style='font-weight: bold;color: #875f00;'>1</span>
}
</pre>
```

### Local Variables

Good syntax highlighting helps the reader to quickly distinguish between the different types of *entities* in their code.
Ideally, if a given entity appears in *multiple* places, it should be colored the same in each place. The Tree-sitter syntax
highlighting system can help you to achieve this by keeping track of local scopes and variables.

The *local variables* query is different from the highlights query in that, while the highlights query uses *arbitrary*
capture names, which can then be mapped to colors, the locals variable query uses a fixed set of capture names, each of
which has a special meaning.

The capture names are as follows:

- `@local.scope` — indicates that a syntax node introduces a new local scope.
- `@local.definition` — indicates that a syntax node contains the *name* of a definition within the current local scope.
- `@local.reference` — indicates that a syntax node contains the *name*, which *may* refer to an earlier definition within
some enclosing scope.

When highlighting a file, Tree-sitter will keep track of the set of scopes that contains any given position, and the set
of definitions within each scope. When processing a syntax node that is captured as a `local.reference`, Tree-sitter will
try to find a definition for a name that matches the node's text. If it finds a match, Tree-sitter will ensure that the
*reference*, and the *definition* are colored the same.

The information produced by this query can also be *used* by the highlights query. You can *disable* a pattern for nodes,
which have been identified as local variables by adding the predicate `(#is-not? local)` to the pattern. This is used in
the example below:

#### Example Ruby Snippet

Consider this Ruby code:

```ruby
def process_list(list)
  context = current_context
  list.map do |item|
    process_item(item, context)
  end
end

item = 5
list = [item]
```

With this syntax tree:

```scheme
(program
  (method
    name: (identifier)
    parameters: (method_parameters
      (identifier))
    (assignment
      left: (identifier)
      right: (identifier))
    (method_call
      method: (call
        receiver: (identifier)
        method: (identifier))
      block: (do_block
        (block_parameters
          (identifier))
        (method_call
          method: (identifier)
          arguments: (argument_list
            (identifier)
            (identifier))))))
  (assignment
    left: (identifier)
    right: (integer))
  (assignment
    left: (identifier)
    right: (array
      (identifier))))
```

There are several types of names within this method:

- `process_list` is a method.
- Within this method, `list` is a formal parameter
- `context` is a local variable.
- `current_context` is *not* a local variable, so it must be a method.
- Within the `do` block, `item` is a formal parameter
- Later on, `item` and `list` are both local variables (not formal parameters).

#### Example Queries

Let's write some queries that let us clearly distinguish between these types of names. First, set up the highlighting query,
as described in the previous section. We'll assign distinct colors to method calls, method definitions, and formal parameters:

```scheme
; highlights.scm

(call method: (identifier) @function.method)
(method_call method: (identifier) @function.method)

(method name: (identifier) @function.method)

(method_parameters (identifier) @variable.parameter)
(block_parameters (identifier) @variable.parameter)

((identifier) @function.method
 (#is-not? local))
```

Then, we'll set up a local variable query to keep track of the variables and scopes. Here, we're indicating that methods
and blocks create local *scopes*, parameters and assignments create *definitions*, and other identifiers should be considered
*references*:

```scheme
; locals.scm

(method) @local.scope
(do_block) @local.scope

(method_parameters (identifier) @local.definition)
(block_parameters (identifier) @local.definition)

(assignment left:(identifier) @local.definition)

(identifier) @local.reference
```

#### Locals Result

Running `tree-sitter highlight` on this ruby file would produce output like this:

```admonish example collapsible=true, title='Output'
<pre class='highlight'>
<span style='color: purple;'>def</span> <span style='color: #005fd7;'>process_list</span><span style='color: #4e4e4e;'>(</span><span style='text-decoration: underline;'>list</span><span style='color: #4e4e4e;'>)</span>
  <span>context</span> <span style='font-weight: bold;color: #4e4e4e;'>=</span> <span style='color: #005fd7;'>current_context</span>
  <span style='text-decoration: underline;'>list</span><span style='color: #4e4e4e;'>.</span><span style='color: #005fd7;'>map</span> <span style='color: purple;'>do</span> |<span style='text-decoration: underline;'>item</span>|
    <span style='color: #005fd7;'>process_item</span>(<span style='text-decoration: underline;'>item</span><span style='color: #4e4e4e;'>,</span> <span>context</span><span style='color: #4e4e4e;'>)</span>
  <span style='color: purple;'>end</span>
<span style='color: purple;'>end</span>

<span>item</span> <span style='font-weight: bold;color: #4e4e4e;'>=</span> <span style='font-weight: bold;color: #875f00;'>5</span>
<span>list</span> <span style='font-weight: bold;color: #4e4e4e;'>=</span> [<span>item</span><span style='color: #4e4e4e;'>]</span>
</pre>
```

### Language Injection

Some source files contain code written in multiple different languages. Examples include:

- HTML files, which can contain JavaScript inside `<script>` tags and CSS inside `<style>` tags
- [ERB][erb] files, which contain Ruby inside `<% %>` tags, and HTML outside those tags
- PHP files, which can contain  HTML between the `<php` tags
- JavaScript files, which contain regular expression syntax within regex literals
- Ruby, which can contain snippets of code inside heredoc literals, where the heredoc delimiter often indicates the language

All of these examples can be modeled in terms a *parent* syntax tree and one or more *injected* syntax trees, which reside
*inside* of certain nodes in the parent tree. The language injection query allows you to specify these "injections" using
the following captures:

- `@injection.content` — indicates that the captured node should have its contents re-parsed using another language.
- `@injection.language` — indicates that the captured node's text may contain the *name* of a language that should be used
to re-parse the `@injection.content`.

The language injection behavior can also be configured by some properties associated with patterns:

- `injection.language` — can be used to hard-code the name of a specific language.
- `injection.combined` — indicates that *all* the matching nodes in the tree
  should have their content parsed as *one* nested document.
- `injection.include-children` — indicates that the `@injection.content` node's
  *entire* text should be re-parsed, including the text of its child nodes. By default,
child nodes' text will be *excluded* from the injected document.
- `injection.self` — indicates that the `@injection.content` node should be parsed
  using the same language as the node itself. This is useful for cases where the
  node's language is not known until runtime (e.g. via inheriting another language)
- `injection.parent` indicates that the `@injection.content` node should be parsed
  using the same language as the node's parent language. This is only meant for injections
  that need to refer back to the parent language to parse the node's text inside
  the injected language.

#### Examples

Consider this ruby code:

```ruby
system <<-BASH.strip!
  abc --def | ghi > jkl
BASH
```

With this syntax tree:

```scheme
(program
  (method_call
    method: (identifier)
    arguments: (argument_list
      (call
        receiver: (heredoc_beginning)
        method: (identifier))))
  (heredoc_body
    (heredoc_end)))
```

The following query would specify that the contents of the heredoc should be parsed using a language named "BASH"
(because that is the text of the `heredoc_end` node):

```scheme
(heredoc_body
  (heredoc_end) @injection.language) @injection.content
```

You can also force the language using the `#set!` predicate.
For example, this will force the language to be always `ruby`.

```scheme
((heredoc_body) @injection.content
 (#set! injection.language "ruby"))
```

## Unit Testing

Tree-sitter has a built-in way to verify the results of syntax highlighting. The interface is based on [Sublime Text's system][sublime]
for testing highlighting.

Tests are written as normal source code files that contain specially-formatted *comments* that make assertions about the
surrounding syntax highlighting. These files are stored in the `test/highlight` directory in a grammar repository.

Here is an example of a syntax highlighting test for JavaScript:

```js
var abc = function(d) {
  // <- keyword
  //          ^ keyword
  //               ^ variable.parameter
  // ^ function

  if (a) {
  // <- keyword
  // ^ punctuation.bracket

    foo(`foo ${bar}`);
    // <- function
    //    ^ string
    //          ^ variable
  }

  baz();
  // <- !variable
};
```

```admonish cite title='From the Sublime text docs'
The two types of tests are:

**Caret**: ^ this will test the following selector against the scope on the most recent non-test line. It will test it
at the same column the ^ is in. Consecutive ^s will test each column against the selector.

**Arrow**: <- this will test the following selector against the scope on the most recent non-test line. It will test it
at the same column as the comment character is in.
```
```admonish note
An exclamation mark (`!`) can be used to negate a selector. For example, `!keyword` will match any scope that is
not the `keyword` class.
```

[erb]: https://en.wikipedia.org/wiki/ERuby
[highlight crate]: https://github.com/tree-sitter/tree-sitter/tree/master/highlight
[init-config]: ./cli/init-config.md
[init]: ./cli/init.md#structure-of-tree-sitterjson
[js grammar]: https://github.com/tree-sitter/tree-sitter-javascript
[linguist]: https://github.com/github/linguist
[pattern matching]: ./using-parsers/queries/index.md
[queries]: https://github.com/tree-sitter/tree-sitter-ruby/tree/master/queries
[ruby grammar]: https://github.com/tree-sitter/tree-sitter-ruby
[scheme]: https://en.wikipedia.org/wiki/Scheme_%28programming_language%29
[sublime]: https://www.sublimetext.com/docs/3/syntax.html#testing
[textmate]: https://macromates.com/manual/en/language_grammars
[theme]: ./cli/init-config.md#theme
[ts json]: https://github.com/tree-sitter/tree-sitter-ruby/blob/master/tree-sitter.json



================================================
FILE: docs/src/4-code-navigation.md
================================================
# Code Navigation Systems

Tree-sitter can be used in conjunction with its [query language][query language] as a part of code navigation systems.
An example of such a system can be seen in the `tree-sitter tags` command, which emits a textual dump of the interesting
syntactic nodes in its file argument. A notable application of this is GitHub's support for [search-based code navigation][gh search].
This document exists to describe how to integrate with such systems, and how to extend this functionality to any language with a Tree-sitter grammar.

## Tagging and captures

_Tagging_ is the act of identifying the entities that can be named in a program. We use Tree-sitter queries to find those
entities. Having found them, you use a syntax capture to label the entity and its name.

The essence of a given tag lies in two pieces of data: the _role_ of the entity that is matched
(i.e. whether it is a definition or a reference) and the _kind_ of that entity, which describes how the entity is used
(i.e. whether it's a class definition, function call, variable reference, and so on). Our convention is to use a syntax capture
following the `@role.kind` capture name format, and another inner capture, always called `@name`, that pulls out the name
of a given identifier.

You may optionally include a capture named `@doc` to bind a docstring. For convenience purposes, the tagging system provides
two built-in functions, `#select-adjacent!` and `#strip!` that are convenient for removing comment syntax from a docstring.
`#strip!` takes a capture as its first argument and a regular expression as its second, expressed as a quoted string.
Any text patterns matched by the regular expression will be removed from the text associated with the passed capture.
`#select-adjacent!`, when passed two capture names, filters the text associated with the first capture so that only nodes
adjacent to the second capture are preserved. This can be useful when writing queries that would otherwise include too much
information in matched comments.

## Examples

This [query][query] recognizes Python function definitions and captures their declared name. The `function_definition`
syntax node is defined in the [Python Tree-sitter grammar][node].

```query
(function_definition
  name: (identifier) @name) @definition.function
```

A more sophisticated query can be found in the [JavaScript Tree-sitter repository][js query]:

```query
(assignment_expression
  left: [
    (identifier) @name
    (member_expression
      property: (property_identifier) @name)
  ]
  right: [(arrow_function) (function)]
) @definition.function
```

An even more sophisticated query is in the [Ruby Tree-sitter repository][ruby query], which uses built-in functions to
strip the Ruby comment character (`#`) from the docstrings associated with a class or singleton-class declaration, then
selects only the docstrings adjacent to the node matched as `@definition.class`.

```query
(
  (comment)* @doc
  .
  [
    (class
      name: [
        (constant) @name
        (scope_resolution
          name: (_) @name)
      ]) @definition.class
    (singleton_class
      value: [
        (constant) @name
        (scope_resolution
          name: (_) @name)
      ]) @definition.class
  ]
  (#strip! @doc "^#\\s*")
  (#select-adjacent! @doc @definition.class)
)
```

The below table describes a standard vocabulary for kinds and roles during the tagging process. New applications may extend
(or only recognize a subset of) these capture names, but it is desirable to standardize on the names below.

| Category                 | Tag                         |
| ------------------------ | --------------------------- |
| Class definitions        | `@definition.class`         |
| Function definitions     | `@definition.function`      |
| Interface definitions    | `@definition.interface`     |
| Method definitions       | `@definition.method`        |
| Module definitions       | `@definition.module`        |
| Function/method calls    | `@reference.call`           |
| Class reference          | `@reference.class`          |
| Interface implementation | `@reference.implementation` |

## Command-line invocation

You can use the `tree-sitter tags` command to test out a tags query file, passing as arguments one or more files to tag.
We can run this tool from within the Tree-sitter Ruby repository, over code in a file called `test.rb`:

```ruby
module Foo
  class Bar
    # won't be included

    # is adjacent, will be
    def baz
    end
  end
end
```

Invoking `tree-sitter tags test.rb` produces the following console output, representing matched entities' name, role, location,
first line, and docstring:

```text
    test.rb
        Foo              | module       def (0, 7) - (0, 10) `module Foo`
        Bar              | class        def (1, 8) - (1, 11) `class Bar`
        baz              | method       def (2, 8) - (2, 11) `def baz`  "is adjacent, will be"
```

It is expected that tag queries for a given language are located at `queries/tags.scm` in that language's repository.

## Unit Testing

Tags queries may be tested with `tree-sitter test`. Files under `test/tags/` are checked using the same comment system as
[highlights queries][unit testing]. For example, the above Ruby tags can be tested with these comments:

```ruby
module Foo
  #     ^ definition.module
  class Bar
    #    ^ definition.class

    def baz
      #  ^ definition.method
    end
  end
end
```

[gh search]: https://docs.github.com/en/repositories/working-with-files/using-files/navigating-code-on-github#precise-and-search-based-navigation
[js query]: https://github.com/tree-sitter/tree-sitter-javascript/blob/fdeb68ac8d2bd5a78b943528bb68ceda3aade2eb/queries/tags.scm#L63-L70
[node]: https://github.com/tree-sitter/tree-sitter-python/blob/78c4e9b6b2f08e1be23b541ffced47b15e2972ad/grammar.js#L354
[query]: https://github.com/tree-sitter/tree-sitter-python/blob/78c4e9b6b2f08e1be23b541ffced47b15e2972ad/queries/tags.scm#L4-L5
[ruby query]: https://github.com/tree-sitter/tree-sitter-ruby/blob/1ebfdb288842dae5a9233e2509a135949023dd82/queries/tags.scm#L24-L43
[query language]: ./using-parsers/queries/index.md
[unit testing]: ./3-syntax-highlighting.md#unit-testing



================================================
FILE: docs/src/5-implementation.md
================================================
# Implementation

Tree-sitter consists of two components: a C library (`libtree-sitter`), and a command-line tool (the `tree-sitter` CLI).

The library, `libtree-sitter`, is used in combination with the parsers
generated by the CLI, to produce syntax trees from source code and keep the
syntax trees up-to-date as the source code changes. `libtree-sitter` is designed to be embedded in applications. It is
written in plain C. Its interface is specified in the header file [`tree_sitter/api.h`][api.h].

The CLI is used to generate a parser for a language by supplying a [context-free grammar][cfg] describing the
language. The CLI is a build tool; it is no longer needed once a parser has been generated. It is written in Rust, and is
available on [crates.io][crates], [npm][npm], and as a pre-built binary [on GitHub][gh].

## The CLI

The `tree-sitter` CLI's most important feature is the `generate` command. This subcommand reads in a context-free grammar
from a file called `grammar.js` and outputs a parser as a C file called `parser.c`. The source files in the [`cli/src`][src]
directory all play a role in producing the code in `parser.c`. This section will describe some key parts of this process.

### Parsing a Grammar

First, Tree-sitter must evaluate the JavaScript code in `grammar.js` and convert the grammar to a JSON format. It does this
by shelling out to `node`. The format of the grammars is formally specified by the JSON schema in [grammar.schema.json][schema].
The parsing is implemented in [parse_grammar.rs][parse grammar].

### Grammar Rules

A Tree-sitter grammar is composed of a set of *rules* — objects that describe how syntax nodes can be composed of other
syntax nodes. There are several types of rules: symbols, strings, regexes, sequences, choices, repetitions, and a few others.
Internally, these are all represented using an [enum][enum] called [`Rule`][rules.rs].

### Preparing a Grammar

Once a grammar has been parsed, it must be transformed in several ways before it can be used to generate a parser. Each
transformation is implemented by a separate file in the [`prepare_grammar`][prepare grammar] directory, and the transformations
are ultimately composed together in `prepare_grammar/mod.rs`.

At the end of these transformations, the initial grammar is split into two grammars: a *syntax grammar* and a *lexical grammar*.
The syntax grammar describes how the language's [*non-terminal symbols*][symbols] are constructed from other grammar symbols,
and the lexical grammar describes how the grammar's *terminal symbols* (strings and regexes) can be
composed of individual characters.

### Building Parse Tables

## The Runtime

WIP

[api.h]: https://github.com/tree-sitter/tree-sitter/blob/master/lib/include/tree_sitter/api.h
[cfg]: https://en.wikipedia.org/wiki/Context-free_grammar
[crates]: https://crates.io
[npm]: https://npmjs.com
[gh]: https://github.com/tree-sitter/tree-sitter/releases/latest
[src]: https://github.com/tree-sitter/tree-sitter/tree/master/cli/src
[schema]: https://tree-sitter.github.io/tree-sitter/assets/schemas/grammar.schema.json
[parse grammar]: https://github.com/tree-sitter/tree-sitter/blob/master/cli/generate/src/parse_grammar.rs
[enum]: https://doc.rust-lang.org/book/ch06-01-defining-an-enum.html
[rules.rs]: https://github.com/tree-sitter/tree-sitter/blob/master/cli/generate/src/rules.rs
[prepare grammar]: https://github.com/tree-sitter/tree-sitter/tree/master/cli/generate/src/prepare_grammar
[symbols]: https://en.wikipedia.org/wiki/Terminal_and_nonterminal_symbols



================================================
FILE: docs/src/6-contributing.md
================================================
# Contributing

## Code of Conduct

Contributors to Tree-sitter should abide by the [Contributor Covenant][covenant].

## Developing Tree-sitter

### Prerequisites

To make changes to Tree-sitter, you should have:

1. A C compiler, for compiling the core library and the generated parsers.
2. A [Rust toolchain][rust], for compiling the Rust bindings, the highlighting library, and the CLI.
3. Node.js and NPM, for generating parsers from `grammar.js` files.
4. Either [Emscripten][emscripten], [Docker][docker], or [podman][podman] for
compiling the library to WASM.

### Building

Clone the repository:

```sh
git clone https://github.com/tree-sitter/tree-sitter
cd tree-sitter
```

Optionally, build the WASM library. If you skip this step, then the `tree-sitter playground` command will require an internet
connection. If you have Emscripten installed, this will use your `emcc` compiler. Otherwise, it will use Docker or Podman:

```sh
cd lib/binding_web
npm install # or your JS package manager of choice
npm run build
```

Build the Rust libraries and the CLI:

```sh
cargo build --release
```

This will create the `tree-sitter` CLI executable in the `target/release` folder.

If you want to automatically install the `tree-sitter` CLI in your system, you can run:

```sh
cargo install --path crates/cli
```

If you're going to be in a fast iteration cycle and would like the CLI to build faster, you can use the `release-dev` profile:

```sh
cargo build --release --profile release-dev
# or
cargo install --path crates/cli --profile release-dev
```

### Testing

Before you can run the tests, you need to fetch some upstream grammars that are used for testing:

```sh
cargo xtask fetch-fixtures
```

To test any changes you've made to the CLI, you can regenerate these parsers using your current CLI code:

```sh
cargo xtask generate-fixtures
```

Then you can run the tests:

```sh
cargo xtask test
```

Similarly, to test the WASM binding, you need to compile these parsers to WASM:

```sh
cargo xtask generate-fixtures --wasm
cargo xtask test-wasm
```

### Debugging

The test script has a number of useful flags. You can list them all by running `cargo xtask test -h`.
Here are some of the main flags:

If you want to run a specific unit test, pass its name (or part of its name) as an argument:

```sh
cargo xtask test test_does_something
```

You can run the tests under the debugger (either `lldb` or `gdb`) using the `-g` flag:

```sh
cargo xtask test -g test_does_something
```

Part of the Tree-sitter test suite involves parsing the _corpus_ tests for several languages and performing randomized edits
to each example in the corpus. If you just want to run the tests for a particular _language_, you can pass the `-l` flag.
Additionally, if you want to run a particular _example_ from the corpus, you can pass the `-e` flag:

```sh
cargo xtask test -l javascript -e Arrays
```

## Published Packages

The main [`tree-sitter/tree-sitter`][ts repo] repository contains the source code for
several packages that are published to package registries for different languages:

* Rust crates on [crates.io][crates]:
  * [`tree-sitter`][lib crate] — A Rust binding to the core library
  * [`tree-sitter-highlight`][highlight crate] — The syntax-highlighting library
  * [`tree-sitter-cli`][cli crate] — The command-line tool

* JavaScript modules on [npmjs.com][npmjs]:
  * [`web-tree-sitter`][web-ts] — A WASM-based JavaScript binding to the core library
  * [`tree-sitter-cli`][cli package] — The command-line tool

There are also several other dependent repositories that contain other published packages:

* [`tree-sitter/node-tree-sitter`][node ts] — Node.js bindings to the core library,
published as [`tree-sitter`][node package] on npmjs.com
* [`tree-sitter/py-tree-sitter`][py ts] — Python bindings to the core library,
published as [`tree-sitter`][py package] on [PyPI.org][pypi].
* [`tree-sitter/go-tree-sitter`][go ts] — Go bindings to the core library,
published as [`tree_sitter`][go package] on [pkg.go.dev][go.dev].

## Developing Documentation

Our current static site generator for documentation is [`mdBook`][mdBook], with a little bit of custom JavaScript to handle
the playground page. Most of the documentation is written in Markdown, including this file! You can find these files
at [`docs/src`][docs src]. If you'd like to submit a PR to improve the documentation, navigate to the page you'd like to
edit and hit the edit icon at the top right of the page.

### Prerequisites for Local Development

```admonish note
We're assuming you have `cargo` installed, the Rust package manager.
```

To run and iterate on the docs locally, the
[`mdbook`][mdbook cli] CLI tool is required, which can be installed with

```sh
cargo install mdbook
```

You might have noticed we have some fancy admonitions sprinkled throughout the documentation, like the note above.
These are created using [`mdbook-admonish`][admonish], a [preprocessor][preprocessor] for `mdBook`. As such, this is also
a requirement for developing the documentation locally. To install it, run:

```sh
cargo install mdbook-admonish
```

Once you've installed it, you can begin using admonitions in your markdown files. See the [reference][admonish reference]
for more information.

### Spinning it up

Now that you've installed the prerequisites, you can run the following command to start a local server:

```sh
cd docs
mdbook serve --open
```

`mdbook` has a live-reload feature, so any changes you make to the markdown files will be reflected in the browser after
a short delay. Once you've made a change that you're happy with, you can submit a PR with your changes.

### Improving the Playground

The playground page is a little more complicated, but if you know some basic JavaScript and CSS you should be able to make
changes. The playground code can be found in [`docs/src/assets/js/playground.js`][playground], and its corresponding css
at [`docs/src/assets/css/playground.css`][playground css]. The editor of choice we use for the playground is [CodeMirror][codemirror],
and the tree-sitter module is fetched from [here][js url]. This, along with the wasm module and wasm parsers, live in the
[.github.io repo][gh.io repo].

[admonish]: https://github.com/tommilligan/mdbook-admonish
[admonish reference]: https://tommilligan.github.io/mdbook-admonish/reference.html
[cli crate]: https://crates.io/crates/tree-sitter-cli
[cli package]: https://www.npmjs.com/package/tree-sitter-cli
[codemirror]: https://codemirror.net
[covenant]: https://www.contributor-covenant.org/version/1/4/code-of-conduct
[crates]: https://crates.io
[docker]: https://www.docker.com
[docs src]: https://github.com/tree-sitter/tree-sitter/tree/master/docs/src
[emscripten]: https://emscripten.org
[gh.io repo]: https://github.com/tree-sitter/tree-sitter.github.io
[go.dev]: https://pkg.go.dev
[go package]: https://pkg.go.dev/github.com/tree-sitter/go-tree-sitter
[go ts]: https://github.com/tree-sitter/go-tree-sitter
[highlight crate]: https://crates.io/crates/tree-sitter-highlight
[js url]: https://tree-sitter.github.io/web-tree-sitter.js
[lib crate]: https://crates.io/crates/tree-sitter
[mdBook]: https://rust-lang.github.io/mdBook
[mdbook cli]: https://rust-lang.github.io/mdBook/guide/installation.html
[node package]: https://www.npmjs.com/package/tree-sitter
[node ts]: https://github.com/tree-sitter/node-tree-sitter
[npmjs]: https://npmjs.com
[playground]: https://github.com/tree-sitter/tree-sitter/blob/master/docs/src/assets/js/playground.js
[playground css]: https://github.com/tree-sitter/tree-sitter/blob/master/docs/src/assets/css/playground.css
[podman]: https://podman.io
[preprocessor]: https://rust-lang.github.io/mdBook/for_developers/preprocessors.html
[py package]: https://pypi.org/project/tree-sitter
[py ts]: https://github.com/tree-sitter/py-tree-sitter
[pypi]: https://pypi.org
[rust]: https://rustup.rs
[ts repo]: https://github.com/tree-sitter/tree-sitter
[web-ts]: https://www.npmjs.com/package/web-tree-sitter



================================================
FILE: docs/src/7-playground.md
================================================
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/codemirror/6.65.7/codemirror.min.css">
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/clusterize.js/0.19.0/clusterize.min.css">

<h1>Syntax Tree Playground</h1>

<div id="playground-container" class="ts-playground" style="visibility: hidden;">

<h2>Code</h2>

<div class="custom-select">
  <button id="language-button" class="select-button">
    <span class="selected-value">JavaScript</span>
    <svg class="arrow" width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
      <polyline points="6 9 12 15 18 9"></polyline>
    </svg>
  </button>
  <div class="select-dropdown">
    <div class="option" data-value="bash">Bash</div>
    <div class="option" data-value="c">C</div>
    <div class="option" data-value="cpp">C++</div>
    <div class="option" data-value="c_sharp">C#</div>
    <div class="option" data-value="go">Go</div>
    <div class="option" data-value="html">HTML</div>
    <div class="option" data-value="java">Java</div>
    <div class="option" data-value="javascript">JavaScript</div>
    <div class="option" data-value="php">PHP</div>
    <div class="option" data-value="python">Python</div>
    <div class="option" data-value="ruby">Ruby</div>
    <div class="option" data-value="rust">Rust</div>
    <div class="option" data-value="toml">TOML</div>
    <div class="option" data-value="typescript">TypeScript</div>
    <div class="option" data-value="yaml">YAML</div>
  </div>
  <select id="language-select" style="display: none;">
    <option value="bash">Bash</option>
    <option value="c">C</option>
    <option value="cpp">C++</option>
    <option value="c_sharp">C#</option>
    <option value="go">Go</option>
    <option value="html">HTML</option>
    <option value="java">Java</option>
    <option value="javascript" selected="selected">JavaScript</option>
    <option value="php">PHP</option>
    <option value="python">Python</option>
    <option value="ruby">Ruby</option>
    <option value="rust">Rust</option>
    <option value="toml">TOML</option>
    <option value="typescript">TypeScript</option>
    <option value="yaml">YAML</option>
  </select>
</div>

<input id="logging-checkbox" type="checkbox"></input>
<label for="logging-checkbox">Log</label>

<input id="anonymous-nodes-checkbox" type="checkbox"></input>
<label for="anonymous-nodes-checkbox">Show anonymous nodes</label>

<input id="query-checkbox" type="checkbox"></input>
<label for="query-checkbox">Query</label>

<input id="accessibility-checkbox" type="checkbox"></input>
<label for="accessibility-checkbox">Accessibility</label>

<textarea id="code-input">
</textarea>

<div id="query-container" style="visibility: hidden; position: absolute;">
<h2>Query</h2>
<textarea id="query-input"></textarea>
</div>

<h2>Tree</h2>
<span id="update-time"></span>
<div id="output-container-scroll">
<pre id="output-container" class="highlight"></pre>
</div>

<h2 id="about">About </h2>
<p>You can try out tree-sitter with a few pre-selected grammars on this page.
You can also run playground locally (with your own grammar) using the
<a href="/tree-sitter/cli/playground.html">CLI</a>'s <code>tree-sitter playground</code> subcommand.
</p>

```admonish info
Logging (if enabled) can be viewed in the browser's console.
```

<p>The syntax tree should update as you type in the code. As you move around the
code, the current node should be highlighted in the tree; you can also click any
node in the tree to select the corresponding part of the code.</p>
<p>You can enter one or more <a href="/tree-sitter/using-parsers/queries/index.html">patterns</a>
into the Query panel. If the query is valid, its captures will be
highlighted both in the Code and in the Query panels. Otherwise
the problematic parts of the query will be underlined, and detailed
diagnostics will be available on hover. Note that to see any results
you must use at least one capture, like <code>(node_name) @capture-name</code></p>

</div>

<script src="https://cdnjs.cloudflare.com/ajax/libs/codemirror/6.65.7/codemirror.min.js"></script>

<script>LANGUAGE_BASE_URL = "https://tree-sitter.github.io";</script>
<script type="module">
import * as TreeSitter from 'https://tree-sitter.github.io/web-tree-sitter.js';
window.TreeSitter = TreeSitter;
setTimeout(() => window.initializePlayground({local: false}), 1);
</script>

<script src="https://cdnjs.cloudflare.com/ajax/libs/clusterize.js/0.19.0/clusterize.min.js"></script>



================================================
FILE: docs/src/index.md
================================================
<div style="display: flex; justify-content: center; margin-left: 32px;">
    <a href="./"><img src="assets/images/tree-sitter-small.png" width="200" height="200" alt="Tree-sitter logo" /></a>
</div>

# Introduction

Tree-sitter is a parser generator tool and an incremental parsing library. It can build a concrete syntax tree for a source
file and efficiently update the syntax tree as the source file is edited. Tree-sitter aims to be:

- **General** enough to parse any programming language
- **Fast** enough to parse on every keystroke in a text editor
- **Robust** enough to provide useful results even in the presence of syntax errors
- **Dependency-free** so that the runtime library (which is written in pure [C11](https://github.com/tree-sitter/tree-sitter/tree/master/lib)) can be embedded in any application

## Language Bindings

There are bindings that allow Tree-sitter to be used from the following languages:

### Official

- [C#](https://github.com/tree-sitter/csharp-tree-sitter)
- [Go](https://github.com/tree-sitter/go-tree-sitter)
- [Haskell](https://github.com/tree-sitter/haskell-tree-sitter)
- [Java (JDK 22+)](https://github.com/tree-sitter/java-tree-sitter)
- [JavaScript (Node.js)](https://github.com/tree-sitter/node-tree-sitter)
- [JavaScript (Wasm)](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_web)
- [Kotlin](https://github.com/tree-sitter/kotlin-tree-sitter)
- [Python](https://github.com/tree-sitter/py-tree-sitter)
- [Rust](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_rust)
- [Swift](https://github.com/tree-sitter/swift-tree-sitter)
- [Zig](https://github.com/tree-sitter/zig-tree-sitter)

### Third-party

- [C# (.NET)](https://github.com/zabbius/dotnet-tree-sitter)
- [C++](https://github.com/nsumner/cpp-tree-sitter)
- [Crystal](https://github.com/crystal-lang-tools/crystal-tree-sitter)
- [D](https://github.com/aminya/d-tree-sitter)
- [Delphi](https://github.com/modersohn/delphi-tree-sitter)
- [ELisp](https://www.gnu.org/software/emacs/manual/html_node/elisp/Parsing-Program-Source.html)
- [Go](https://github.com/alexaandru/go-tree-sitter-bare)
- [Guile](https://github.com/Z572/guile-ts)
- [Janet](https://github.com/sogaiu/janet-tree-sitter)
- [Java (JDK 8+)](https://github.com/bonede/tree-sitter-ng)
- [Java (JDK 11+)](https://github.com/seart-group/java-tree-sitter)
- [Julia](https://github.com/MichaelHatherly/TreeSitter.jl)
- [Lua](https://github.com/euclidianAce/ltreesitter)
- [Lua](https://github.com/xcb-xwii/lua-tree-sitter)
- [OCaml](https://github.com/semgrep/ocaml-tree-sitter-core)
- [Odin](https://github.com/laytan/odin-tree-sitter)
- [Perl](https://metacpan.org/pod/Text::Treesitter)
- [Pharo](https://github.com/Evref-BL/Pharo-Tree-Sitter)
- [PHP](https://github.com/soulseekah/ext-treesitter)
- [R](https://github.com/DavisVaughan/r-tree-sitter)
- [Ruby](https://github.com/Faveod/ruby-tree-sitter)

_Keep in mind that some of the bindings may be incomplete or out of date._

## Parsers

The following parsers can be found in the upstream organization:

- [Agda](https://github.com/tree-sitter/tree-sitter-agda)
- [Bash](https://github.com/tree-sitter/tree-sitter-bash)
- [C](https://github.com/tree-sitter/tree-sitter-c)
- [C++](https://github.com/tree-sitter/tree-sitter-cpp)
- [C#](https://github.com/tree-sitter/tree-sitter-c-sharp)
- [CSS](https://github.com/tree-sitter/tree-sitter-css)
- [ERB / EJS](https://github.com/tree-sitter/tree-sitter-embedded-template)
- [Go](https://github.com/tree-sitter/tree-sitter-go)
- [Haskell](https://github.com/tree-sitter/tree-sitter-haskell)
- [HTML](https://github.com/tree-sitter/tree-sitter-html)
- [Java](https://github.com/tree-sitter/tree-sitter-java)
- [JavaScript](https://github.com/tree-sitter/tree-sitter-javascript)
- [JSDoc](https://github.com/tree-sitter/tree-sitter-jsdoc)
- [JSON](https://github.com/tree-sitter/tree-sitter-json)
- [Julia](https://github.com/tree-sitter/tree-sitter-julia)
- [OCaml](https://github.com/tree-sitter/tree-sitter-ocaml)
- [PHP](https://github.com/tree-sitter/tree-sitter-php)
- [Python](https://github.com/tree-sitter/tree-sitter-python)
- [Regex](https://github.com/tree-sitter/tree-sitter-regex)
- [Ruby](https://github.com/tree-sitter/tree-sitter-ruby)
- [Rust](https://github.com/tree-sitter/tree-sitter-rust)
- [Scala](https://github.com/tree-sitter/tree-sitter-scala)
- [TypeScript](https://github.com/tree-sitter/tree-sitter-typescript)
- [Verilog](https://github.com/tree-sitter/tree-sitter-verilog)

A list of known parsers can be found in the [wiki](https://github.com/tree-sitter/tree-sitter/wiki/List-of-parsers).

## Talks on Tree-sitter

- [Strange Loop 2018](https://www.thestrangeloop.com/2018/tree-sitter---a-new-parsing-system-for-programming-tools.html)
- [FOSDEM 2018](https://www.youtube.com/watch?v=0CGzC_iss-8)
- [GitHub Universe 2017](https://www.youtube.com/watch?v=a1rC79DHpmY)

## Underlying Research

The design of Tree-sitter was greatly influenced by the following research papers:

- [Practical Algorithms for Incremental Software Development Environments](https://www2.eecs.berkeley.edu/Pubs/TechRpts/1997/CSD-97-946.pdf)
- [Context Aware Scanning for Parsing Extensible Languages](https://www-users.cse.umn.edu/~evw/pubs/vanwyk07gpce/vanwyk07gpce.pdf)
- [Efficient and Flexible Incremental Parsing](https://harmonia.cs.berkeley.edu/papers/twagner-parsing.pdf)
- [Incremental Analysis of Real Programming Languages](https://harmonia.cs.berkeley.edu/papers/twagner-glr.pdf)
- [Error Detection and Recovery in LR Parsers](https://web.archive.org/web/20240302031213/https://what-when-how.com/compiler-writing/bottom-up-parsing-compiler-writing-part-13)
- [Error Recovery for LR Parsers](https://apps.dtic.mil/sti/pdfs/ADA043470.pdf)



================================================
FILE: docs/src/SUMMARY.md
================================================
# Summary

[Introduction](./index.md)

# User Guide

- [Using Parsers](./using-parsers/index.md)
  - [Getting Started](./using-parsers/1-getting-started.md)
  - [Basic Parsing](./using-parsers/2-basic-parsing.md)
  - [Advanced Parsing](./using-parsers/3-advanced-parsing.md)
  - [Walking Trees](./using-parsers/4-walking-trees.md)
  - [Queries](./using-parsers/queries/index.md)
    - [Basic Syntax](./using-parsers/queries/1-syntax.md)
    - [Operators](./using-parsers/queries/2-operators.md)
    - [Predicates and Directives](./using-parsers/queries/3-predicates-and-directives.md)
    - [API](./using-parsers/queries/4-api.md)
  - [Static Node Types](./using-parsers/6-static-node-types.md)
- [Creating Parsers](./creating-parsers/index.md)
  - [Getting Started](./creating-parsers/1-getting-started.md)
  - [The Grammar DSL](./creating-parsers/2-the-grammar-dsl.md)
  - [Writing the Grammar](./creating-parsers/3-writing-the-grammar.md)
  - [External Scanners](./creating-parsers/4-external-scanners.md)
  - [Writing Tests](./creating-parsers/5-writing-tests.md)
  - [Publishing Parsers](./creating-parsers/6-publishing.md)
- [Syntax Highlighting](./3-syntax-highlighting.md)
- [Code Navigation](./4-code-navigation.md)
- [Implementation](./5-implementation.md)
- [Contributing](./6-contributing.md)
- [Playground](./7-playground.md)

# Reference Guide

- [Command Line Interface](./cli/index.md)
  - [Init Config](./cli/init-config.md)
  - [Init](./cli/init.md)
  - [Generate](./cli/generate.md)
  - [Build](./cli/build.md)
  - [Parse](./cli/parse.md)
  - [Test](./cli/test.md)
  - [Version](./cli/version.md)
  - [Fuzz](./cli/fuzz.md)
  - [Query](./cli/query.md)
  - [Highlight](./cli/highlight.md)
  - [Tags](./cli/tags.md)
  - [Playground](./cli/playground.md)
  - [Dump Languages](./cli/dump-languages.md)
  - [Complete](./cli/complete.md)



================================================
FILE: docs/src/assets/css/mdbook-admonish.css
================================================
@charset "UTF-8";
:is(.admonition) {
  display: flow-root;
  margin: 1.5625em 0;
  padding: 0 1.2rem;
  color: var(--fg);
  page-break-inside: avoid;
  background-color: var(--bg);
  border: 0 solid black;
  border-inline-start-width: 0.4rem;
  border-radius: 0.2rem;
  box-shadow: 0 0.2rem 1rem rgba(0, 0, 0, 0.05), 0 0 0.1rem rgba(0, 0, 0, 0.1);
}
@media print {
  :is(.admonition) {
    box-shadow: none;
  }
}
:is(.admonition) > * {
  box-sizing: border-box;
}
:is(.admonition) :is(.admonition) {
  margin-top: 1em;
  margin-bottom: 1em;
}
:is(.admonition) > .tabbed-set:only-child {
  margin-top: 0;
}
html :is(.admonition) > :last-child {
  margin-bottom: 1.2rem;
}

a.admonition-anchor-link {
  display: none;
  position: absolute;
  left: -1.2rem;
  padding-right: 1rem;
}
a.admonition-anchor-link:link, a.admonition-anchor-link:visited {
  color: var(--fg);
}
a.admonition-anchor-link:link:hover, a.admonition-anchor-link:visited:hover {
  text-decoration: none;
}
a.admonition-anchor-link::before {
  content: "§";
}

:is(.admonition-title, summary.admonition-title) {
  position: relative;
  min-height: 4rem;
  margin-block: 0;
  margin-inline: -1.6rem -1.2rem;
  padding-block: 0.8rem;
  padding-inline: 4.4rem 1.2rem;
  font-weight: 700;
  background-color: rgba(68, 138, 255, 0.1);
  print-color-adjust: exact;
  -webkit-print-color-adjust: exact;
  display: flex;
}
:is(.admonition-title, summary.admonition-title) p {
  margin: 0;
}
html :is(.admonition-title, summary.admonition-title):last-child {
  margin-bottom: 0;
}
:is(.admonition-title, summary.admonition-title)::before {
  position: absolute;
  top: 0.625em;
  inset-inline-start: 1.6rem;
  width: 2rem;
  height: 2rem;
  background-color: #448aff;
  print-color-adjust: exact;
  -webkit-print-color-adjust: exact;
  mask-image: url('data:image/svg+xml;charset=utf-8,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"></svg>');
  -webkit-mask-image: url('data:image/svg+xml;charset=utf-8,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"></svg>');
  mask-repeat: no-repeat;
  -webkit-mask-repeat: no-repeat;
  mask-size: contain;
  -webkit-mask-size: contain;
  content: "";
}
:is(.admonition-title, summary.admonition-title):hover a.admonition-anchor-link {
  display: initial;
}

details.admonition > summary.admonition-title::after {
  position: absolute;
  top: 0.625em;
  inset-inline-end: 1.6rem;
  height: 2rem;
  width: 2rem;
  background-color: currentcolor;
  mask-image: var(--md-details-icon);
  -webkit-mask-image: var(--md-details-icon);
  mask-repeat: no-repeat;
  -webkit-mask-repeat: no-repeat;
  mask-size: contain;
  -webkit-mask-size: contain;
  content: "";
  transform: rotate(0deg);
  transition: transform 0.25s;
}
details[open].admonition > summary.admonition-title::after {
  transform: rotate(90deg);
}

:root {
  --md-details-icon: url("data:image/svg+xml;charset=utf-8,<svg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 24 24'><path d='M8.59 16.58 13.17 12 8.59 7.41 10 6l6 6-6 6-1.41-1.42Z'/></svg>");
}

:root {
  --md-admonition-icon--admonish-note: url("data:image/svg+xml;charset=utf-8,<svg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 24 24'><path d='M20.71 7.04c.39-.39.39-1.04 0-1.41l-2.34-2.34c-.37-.39-1.02-.39-1.41 0l-1.84 1.83 3.75 3.75M3 17.25V21h3.75L17.81 9.93l-3.75-3.75L3 17.25z'/></svg>");
  --md-admonition-icon--admonish-abstract: url("data:image/svg+xml;charset=utf-8,<svg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 24 24'><path d='M17 9H7V7h10m0 6H7v-2h10m-3 6H7v-2h7M12 3a1 1 0 0 1 1 1 1 1 0 0 1-1 1 1 1 0 0 1-1-1 1 1 0 0 1 1-1m7 0h-4.18C14.4 1.84 13.3 1 12 1c-1.3 0-2.4.84-2.82 2H5a2 2 0 0 0-2 2v14a2 2 0 0 0 2 2h14a2 2 0 0 0 2-2V5a2 2 0 0 0-2-2z'/></svg>");
  --md-admonition-icon--admonish-info: url("data:image/svg+xml;charset=utf-8,<svg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 24 24'><path d='M13 9h-2V7h2m0 10h-2v-6h2m-1-9A10 10 0 0 0 2 12a10 10 0 0 0 10 10 10 10 0 0 0 10-10A10 10 0 0 0 12 2z'/></svg>");
  --md-admonition-icon--admonish-tip: url("data:image/svg+xml;charset=utf-8,<svg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 24 24'><path d='M17.66 11.2c-.23-.3-.51-.56-.77-.82-.67-.6-1.43-1.03-2.07-1.66C13.33 7.26 13 4.85 13.95 3c-.95.23-1.78.75-2.49 1.32-2.59 2.08-3.61 5.75-2.39 8.9.04.1.08.2.08.33 0 .22-.15.42-.35.5-.23.1-.47.04-.66-.12a.58.58 0 0 1-.14-.17c-1.13-1.43-1.31-3.48-.55-5.12C5.78 10 4.87 12.3 5 14.47c.06.5.12 1 .29 1.5.14.6.41 1.2.71 1.73 1.08 1.73 2.95 2.97 4.96 3.22 2.14.27 4.43-.12 6.07-1.6 1.83-1.66 2.47-4.32 1.53-6.6l-.13-.26c-.21-.46-.77-1.26-.77-1.26m-3.16 6.3c-.28.24-.74.5-1.1.6-1.12.4-2.24-.16-2.9-.82 1.19-.28 1.9-1.16 2.11-2.05.17-.8-.15-1.46-.28-2.23-.12-.74-.1-1.37.17-2.06.19.38.39.76.63 1.06.77 1 1.98 1.44 2.24 2.8.04.14.06.28.06.43.03.82-.33 1.72-.93 2.27z'/></svg>");
  --md-admonition-icon--admonish-success: url("data:image/svg+xml;charset=utf-8,<svg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 24 24'><path d='m9 20.42-6.21-6.21 2.83-2.83L9 14.77l9.88-9.89 2.83 2.83L9 20.42z'/></svg>");
  --md-admonition-icon--admonish-question: url("data:image/svg+xml;charset=utf-8,<svg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 24 24'><path d='m15.07 11.25-.9.92C13.45 12.89 13 13.5 13 15h-2v-.5c0-1.11.45-2.11 1.17-2.83l1.24-1.26c.37-.36.59-.86.59-1.41a2 2 0 0 0-2-2 2 2 0 0 0-2 2H8a4 4 0 0 1 4-4 4 4 0 0 1 4 4 3.2 3.2 0 0 1-.93 2.25M13 19h-2v-2h2M12 2A10 10 0 0 0 2 12a10 10 0 0 0 10 10 10 10 0 0 0 10-10c0-5.53-4.5-10-10-10z'/></svg>");
  --md-admonition-icon--admonish-warning: url("data:image/svg+xml;charset=utf-8,<svg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 24 24'><path d='M13 14h-2V9h2m0 9h-2v-2h2M1 21h22L12 2 1 21z'/></svg>");
  --md-admonition-icon--admonish-failure: url("data:image/svg+xml;charset=utf-8,<svg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 24 24'><path d='M20 6.91 17.09 4 12 9.09 6.91 4 4 6.91 9.09 12 4 17.09 6.91 20 12 14.91 17.09 20 20 17.09 14.91 12 20 6.91z'/></svg>");
  --md-admonition-icon--admonish-danger: url("data:image/svg+xml;charset=utf-8,<svg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 24 24'><path d='M11 15H6l7-14v8h5l-7 14v-8z'/></svg>");
  --md-admonition-icon--admonish-bug: url("data:image/svg+xml;charset=utf-8,<svg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 24 24'><path d='M14 12h-4v-2h4m0 6h-4v-2h4m6-6h-2.81a5.985 5.985 0 0 0-1.82-1.96L17 4.41 15.59 3l-2.17 2.17a6.002 6.002 0 0 0-2.83 0L8.41 3 7 4.41l1.62 1.63C7.88 6.55 7.26 7.22 6.81 8H4v2h2.09c-.05.33-.09.66-.09 1v1H4v2h2v1c0 .34.04.67.09 1H4v2h2.81c1.04 1.79 2.97 3 5.19 3s4.15-1.21 5.19-3H20v-2h-2.09c.05-.33.09-.66.09-1v-1h2v-2h-2v-1c0-.34-.04-.67-.09-1H20V8z'/></svg>");
  --md-admonition-icon--admonish-example: url("data:image/svg+xml;charset=utf-8,<svg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 24 24'><path d='M7 13v-2h14v2H7m0 6v-2h14v2H7M7 7V5h14v2H7M3 8V5H2V4h2v4H3m-1 9v-1h3v4H2v-1h2v-.5H3v-1h1V17H2m2.25-7a.75.75 0 0 1 .75.75c0 .2-.08.39-.21.52L3.12 13H5v1H2v-.92L4 11H2v-1h2.25z'/></svg>");
  --md-admonition-icon--admonish-quote: url("data:image/svg+xml;charset=utf-8,<svg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 24 24'><path d='M14 17h3l2-4V7h-6v6h3M6 17h3l2-4V7H5v6h3l-2 4z'/></svg>");
}

:is(.admonition):is(.admonish-note) {
  border-color: #448aff;
}

:is(.admonish-note) > :is(.admonition-title, summary.admonition-title) {
  background-color: rgba(68, 138, 255, 0.1);
}
:is(.admonish-note) > :is(.admonition-title, summary.admonition-title)::before {
  background-color: #448aff;
  mask-image: var(--md-admonition-icon--admonish-note);
  -webkit-mask-image: var(--md-admonition-icon--admonish-note);
  mask-repeat: no-repeat;
  -webkit-mask-repeat: no-repeat;
  mask-size: contain;
  -webkit-mask-repeat: no-repeat;
}

:is(.admonition):is(.admonish-abstract, .admonish-summary, .admonish-tldr) {
  border-color: #00b0ff;
}

:is(.admonish-abstract, .admonish-summary, .admonish-tldr) > :is(.admonition-title, summary.admonition-title) {
  background-color: rgba(0, 176, 255, 0.1);
}
:is(.admonish-abstract, .admonish-summary, .admonish-tldr) > :is(.admonition-title, summary.admonition-title)::before {
  background-color: #00b0ff;
  mask-image: var(--md-admonition-icon--admonish-abstract);
  -webkit-mask-image: var(--md-admonition-icon--admonish-abstract);
  mask-repeat: no-repeat;
  -webkit-mask-repeat: no-repeat;
  mask-size: contain;
  -webkit-mask-repeat: no-repeat;
}

:is(.admonition):is(.admonish-info, .admonish-todo) {
  border-color: #00b8d4;
}

:is(.admonish-info, .admonish-todo) > :is(.admonition-title, summary.admonition-title) {
  background-color: rgba(0, 184, 212, 0.1);
}
:is(.admonish-info, .admonish-todo) > :is(.admonition-title, summary.admonition-title)::before {
  background-color: #00b8d4;
  mask-image: var(--md-admonition-icon--admonish-info);
  -webkit-mask-image: var(--md-admonition-icon--admonish-info);
  mask-repeat: no-repeat;
  -webkit-mask-repeat: no-repeat;
  mask-size: contain;
  -webkit-mask-repeat: no-repeat;
}

:is(.admonition):is(.admonish-tip, .admonish-hint, .admonish-important) {
  border-color: #00bfa5;
}

:is(.admonish-tip, .admonish-hint, .admonish-important) > :is(.admonition-title, summary.admonition-title) {
  background-color: rgba(0, 191, 165, 0.1);
}
:is(.admonish-tip, .admonish-hint, .admonish-important) > :is(.admonition-title, summary.admonition-title)::before {
  background-color: #00bfa5;
  mask-image: var(--md-admonition-icon--admonish-tip);
  -webkit-mask-image: var(--md-admonition-icon--admonish-tip);
  mask-repeat: no-repeat;
  -webkit-mask-repeat: no-repeat;
  mask-size: contain;
  -webkit-mask-repeat: no-repeat;
}

:is(.admonition):is(.admonish-success, .admonish-check, .admonish-done) {
  border-color: #00c853;
}

:is(.admonish-success, .admonish-check, .admonish-done) > :is(.admonition-title, summary.admonition-title) {
  background-color: rgba(0, 200, 83, 0.1);
}
:is(.admonish-success, .admonish-check, .admonish-done) > :is(.admonition-title, summary.admonition-title)::before {
  background-color: #00c853;
  mask-image: var(--md-admonition-icon--admonish-success);
  -webkit-mask-image: var(--md-admonition-icon--admonish-success);
  mask-repeat: no-repeat;
  -webkit-mask-repeat: no-repeat;
  mask-size: contain;
  -webkit-mask-repeat: no-repeat;
}

:is(.admonition):is(.admonish-question, .admonish-help, .admonish-faq) {
  border-color: #64dd17;
}

:is(.admonish-question, .admonish-help, .admonish-faq) > :is(.admonition-title, summary.admonition-title) {
  background-color: rgba(100, 221, 23, 0.1);
}
:is(.admonish-question, .admonish-help, .admonish-faq) > :is(.admonition-title, summary.admonition-title)::before {
  background-color: #64dd17;
  mask-image: var(--md-admonition-icon--admonish-question);
  -webkit-mask-image: var(--md-admonition-icon--admonish-question);
  mask-repeat: no-repeat;
  -webkit-mask-repeat: no-repeat;
  mask-size: contain;
  -webkit-mask-repeat: no-repeat;
}

:is(.admonition):is(.admonish-warning, .admonish-caution, .admonish-attention) {
  border-color: #ff9100;
}

:is(.admonish-warning, .admonish-caution, .admonish-attention) > :is(.admonition-title, summary.admonition-title) {
  background-color: rgba(255, 145, 0, 0.1);
}
:is(.admonish-warning, .admonish-caution, .admonish-attention) > :is(.admonition-title, summary.admonition-title)::before {
  background-color: #ff9100;
  mask-image: var(--md-admonition-icon--admonish-warning);
  -webkit-mask-image: var(--md-admonition-icon--admonish-warning);
  mask-repeat: no-repeat;
  -webkit-mask-repeat: no-repeat;
  mask-size: contain;
  -webkit-mask-repeat: no-repeat;
}

:is(.admonition):is(.admonish-failure, .admonish-fail, .admonish-missing) {
  border-color: #ff5252;
}

:is(.admonish-failure, .admonish-fail, .admonish-missing) > :is(.admonition-title, summary.admonition-title) {
  background-color: rgba(255, 82, 82, 0.1);
}
:is(.admonish-failure, .admonish-fail, .admonish-missing) > :is(.admonition-title, summary.admonition-title)::before {
  background-color: #ff5252;
  mask-image: var(--md-admonition-icon--admonish-failure);
  -webkit-mask-image: var(--md-admonition-icon--admonish-failure);
  mask-repeat: no-repeat;
  -webkit-mask-repeat: no-repeat;
  mask-size: contain;
  -webkit-mask-repeat: no-repeat;
}

:is(.admonition):is(.admonish-danger, .admonish-error) {
  border-color: #ff1744;
}

:is(.admonish-danger, .admonish-error) > :is(.admonition-title, summary.admonition-title) {
  background-color: rgba(255, 23, 68, 0.1);
}
:is(.admonish-danger, .admonish-error) > :is(.admonition-title, summary.admonition-title)::before {
  background-color: #ff1744;
  mask-image: var(--md-admonition-icon--admonish-danger);
  -webkit-mask-image: var(--md-admonition-icon--admonish-danger);
  mask-repeat: no-repeat;
  -webkit-mask-repeat: no-repeat;
  mask-size: contain;
  -webkit-mask-repeat: no-repeat;
}

:is(.admonition):is(.admonish-bug) {
  border-color: #f50057;
}

:is(.admonish-bug) > :is(.admonition-title, summary.admonition-title) {
  background-color: rgba(245, 0, 87, 0.1);
}
:is(.admonish-bug) > :is(.admonition-title, summary.admonition-title)::before {
  background-color: #f50057;
  mask-image: var(--md-admonition-icon--admonish-bug);
  -webkit-mask-image: var(--md-admonition-icon--admonish-bug);
  mask-repeat: no-repeat;
  -webkit-mask-repeat: no-repeat;
  mask-size: contain;
  -webkit-mask-repeat: no-repeat;
}

:is(.admonition):is(.admonish-example) {
  border-color: #7c4dff;
}

:is(.admonish-example) > :is(.admonition-title, summary.admonition-title) {
  background-color: rgba(124, 77, 255, 0.1);
}
:is(.admonish-example) > :is(.admonition-title, summary.admonition-title)::before {
  background-color: #7c4dff;
  mask-image: var(--md-admonition-icon--admonish-example);
  -webkit-mask-image: var(--md-admonition-icon--admonish-example);
  mask-repeat: no-repeat;
  -webkit-mask-repeat: no-repeat;
  mask-size: contain;
  -webkit-mask-repeat: no-repeat;
}

:is(.admonition):is(.admonish-quote, .admonish-cite) {
  border-color: #9e9e9e;
}

:is(.admonish-quote, .admonish-cite) > :is(.admonition-title, summary.admonition-title) {
  background-color: rgba(158, 158, 158, 0.1);
}
:is(.admonish-quote, .admonish-cite) > :is(.admonition-title, summary.admonition-title)::before {
  background-color: #9e9e9e;
  mask-image: var(--md-admonition-icon--admonish-quote);
  -webkit-mask-image: var(--md-admonition-icon--admonish-quote);
  mask-repeat: no-repeat;
  -webkit-mask-repeat: no-repeat;
  mask-size: contain;
  -webkit-mask-repeat: no-repeat;
}

.navy :is(.admonition) {
  background-color: var(--sidebar-bg);
}

.ayu :is(.admonition),
.coal :is(.admonition) {
  background-color: var(--theme-hover);
}

.rust :is(.admonition) {
  background-color: var(--sidebar-bg);
  color: var(--sidebar-fg);
}
.rust .admonition-anchor-link:link, .rust .admonition-anchor-link:visited {
  color: var(--sidebar-fg);
}



================================================
FILE: docs/src/assets/css/playground.css
================================================
/* Base Variables */
:root {
  --light-bg: #f9f9f9;
  --light-border: #e0e0e0;
  --light-text: #333;
  --light-hover-border: #c1c1c1;
  --light-scrollbar-track: #f1f1f1;
  --light-scrollbar-thumb: #c1c1c1;
  --light-scrollbar-thumb-hover: #a8a8a8;

  --dark-bg: #1d1f21;
  --dark-border: #2d2d2d;
  --dark-text: #c5c8c6;
  --dark-scrollbar-track: #25282c;
  --dark-scrollbar-thumb: #4a4d51;
  --dark-scrollbar-thumb-hover: #5a5d61;

  --primary-color: #0550ae;
  --primary-color-alpha: rgba(5, 80, 174, 0.1);
  --primary-color-alpha-dark: rgba(121, 192, 255, 0.1);
  --selection-color: rgba(39, 95, 255, 0.3);
}

/* Common Scrollbar Styles */
::-webkit-scrollbar {
  width: 8px;
  height: 8px;
}

::-webkit-scrollbar-track {
  border-radius: 4px;
}

::-webkit-scrollbar-thumb {
  border-radius: 4px;
}

/* Base Light Theme Scrollbars */
::-webkit-scrollbar-track {
  background: var(--light-scrollbar-track);
}

::-webkit-scrollbar-thumb {
  background: var(--light-scrollbar-thumb);
}

::-webkit-scrollbar-thumb:hover {
  background: var(--light-scrollbar-thumb-hover);
}

/* Dropdown Styling */
.custom-select {
  position: relative;
  display: inline-block;
}

#language-select {
  background-color: var(--light-bg);
  border: 1px solid var(--light-border);
  border-radius: 4px;
  padding: 4px 24px 4px 8px;
  font-size: 14px;
  color: var(--light-text);
  cursor: pointer;
  min-width: 120px;
  appearance: none;
  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='12' height='12' viewBox='0 0 24 24' fill='none' stroke='%23666' stroke-width='2' stroke-linecap='round' stroke-linejoin='round'%3E%3Cpolyline points='6 9 12 15 18 9'%3E%3C/polyline%3E%3C/svg%3E");
  background-repeat: no-repeat;
  background-position: right 8px center;
}

.select-button {
  background-color: var(--light-bg);
  border: 1px solid var(--light-border);
  border-radius: 4px;
  padding: 4px 8px;
  font-size: 14px;
  color: var(--light-text);
  cursor: pointer;
  min-width: 120px;
  display: flex;
  align-items: center;
  justify-content: space-between;
}

#language-select:hover,
.select-button:hover {
  border-color: var(--light-hover-border);
}

#language-select:focus,
.select-button:focus {
  outline: none;
  border-color: var(--primary-color);
  box-shadow: 0 0 0 2px var(--primary-color-alpha);
}

/* Custom Checkbox Styling */
input[type="checkbox"] {
  appearance: none;
  width: 16px;
  height: 16px;
  border: 1px solid var(--light-border);
  border-radius: 3px;
  margin-right: 6px;
  position: relative;
  cursor: pointer;
  vertical-align: middle;
}

input[type="checkbox"]:checked {
  background-color: var(--primary-color);
  border-color: var(--primary-color);
}

input[type="checkbox"]:checked::after {
  content: '';
  position: absolute;
  left: 5px;
  top: 2px;
  width: 4px;
  height: 8px;
  border: solid white;
  border-width: 0 2px 2px 0;
  transform: rotate(45deg);
}

input[type="checkbox"]:hover {
  border-color: var(--light-hover-border);
}

input[type="checkbox"]:focus {
  outline: none;
  border-color: var(--primary-color);
  box-shadow: 0 0 0 2px var(--primary-color-alpha);
}

/* Select Dropdown */
.select-dropdown {
  position: absolute;
  top: 100%;
  left: 0;
  right: 0;
  background-color: var(--light-bg);
  border: 1px solid var(--light-border);
  border-radius: 4px;
  margin-top: 4px;
  box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
  display: none;
  z-index: 1000;
  max-height: 300px;
  overflow-y: auto;
}

.select-dropdown.show {
  display: block;
}

.option {
  padding: 8px 12px;
  cursor: pointer;
}

.option:hover {
  background-color: var(--primary-color-alpha);
}

.option.selected {
  background-color: var(--primary-color-alpha);
}

/* CodeMirror Base Styles */
.ts-playground .CodeMirror {
  border-radius: 6px;
  background-color: var(--light-bg) !important;
  color: #080808 !important;
}

.ts-playground .CodeMirror-scroll {
  padding: 8px;
  border: 1px solid var(--light-border);
  border-radius: 6px;
  box-shadow: 0 1px 2px rgba(0, 0, 0, 0.1);
}

.ayu .ts-playground .CodeMirror-scroll,
.coal .ts-playground .CodeMirror-scroll,
.navy .ts-playground .CodeMirror-scroll {
  border-color: var(--dark-border);
}

.ts-playground .CodeMirror-gutters {
  background: #ebebeb !important;
  border-right: 1px solid #e8e8e8 !important;
}

.ts-playground .CodeMirror-cursor {
  border-left: 2px solid #000 !important;
}

.ts-playground .CodeMirror-selected {
  background: var(--selection-color) !important;
}

.ts-playground .CodeMirror-activeline-background {
  background: rgba(36, 99, 180, 0.12) !important;
}

.query-error {
  text-decoration: underline red dashed;
  -webkit-text-decoration: underline red dashed;
}

/* Output Container Styles */
#output-container {
  color: #080808;
  background-color: var(--light-bg);
  margin: 0;
  white-space: pre;
  font-family: ui-monospace, SFMono-Regular, "SF Mono", Menlo, Consolas, "Liberation Mono", monospace;
}

#output-container-scroll {
  max-height: 400px;
  overflow: auto;
  padding: 8px;
  border: 1px solid var(--light-border);
  border-radius: 6px;
  box-shadow: 0 1px 2px rgba(0, 0, 0, 0.1);
  background-color: var(--light-bg);
}

#output-container a {
  color: var(--primary-color);
  text-decoration: none;
}

#output-container a:hover {
  text-decoration: underline;
}

#output-container a.node-link.anonymous {
  color: #116329;
}

#output-container a.node-link.anonymous:before {
  content: '"';
}

#output-container a.node-link.anonymous:after {
  content: '"';
}

#output-container a.node-link.error {
  color: #cf222e;
}

#output-container a.highlighted {
  background-color: var(--selection-color);
}

/* Dark Theme Overrides */
.ayu,
.coal,
.navy {

  & #language-select,
  & .select-button {
    background-color: var(--dark-bg);
    border-color: var(--dark-border);
    color: var(--dark-text);
  }

  & input[type="checkbox"] {
    border-color: var(--dark-border);
    background-color: var(--dark-bg);
  }

  & input[type="checkbox"]:checked {
    background-color: #79c0ff;
    border-color: #79c0ff;
  }

  & label {
    color: var(--dark-text);
  }

  & .select-dropdown {
    background-color: var(--dark-bg);
    border-color: var(--dark-border);
    box-shadow: 0 2px 4px rgba(0, 0, 0, 0.3);
  }

  & .option:hover {
    background-color: var(--primary-color-alpha-dark);
  }

  & .option.selected {
    background-color: var(--primary-color-alpha-dark);
  }

  & .ts-playground .CodeMirror {
    background-color: var(--dark-bg) !important;
    color: var(--dark-text) !important;
  }

  & .ts-playground .CodeMirror-gutters {
    background: var(--dark-scrollbar-track) !important;
    border-right-color: var(--dark-border) !important;
  }

  & .ts-playground .CodeMirror-cursor {
    border-left-color: #aeafad !important;
  }

  & .ts-playground .CodeMirror-selected {
    background: #373b41 !important;
  }

  & .ts-playground .CodeMirror-activeline-background {
    background: #282a2e !important;
  }

  & #output-container {
    color: var(--dark-text);
    background-color: var(--dark-bg);
  }

  & #output-container-scroll {
    background-color: var(--dark-bg);
    border-color: var(--dark-border);
  }

  & #output-container a {
    color: #79c0ff;
  }

  & #output-container a.node-link.anonymous {
    color: #7ee787;
  }

  & #output-container a.node-link.error {
    color: #ff7b72;
  }

  & #output-container a.highlighted {
    background-color: #373b41;
  }

  /* Dark Theme Scrollbars */
  & ::-webkit-scrollbar-track {
    background: var(--dark-scrollbar-track) !important;
  }

  & ::-webkit-scrollbar-thumb {
    background: var(--dark-scrollbar-thumb) !important;
  }

  & ::-webkit-scrollbar-thumb:hover {
    background: var(--dark-scrollbar-thumb-hover) !important;
  }

  & * {
    scrollbar-width: thin !important;
    scrollbar-color: var(--dark-scrollbar-thumb) var(--dark-scrollbar-track) !important;
  }
}

/* Spacing Utilities */
#language-select,
input[type="checkbox"],
label {
  margin: 0 4px;
}

#language-select {
  margin-right: 16px;
}

label {
  font-size: 14px;
  margin-right: 16px;
  cursor: pointer;
}



================================================
FILE: docs/src/assets/js/playground.js
================================================
function initializeLocalTheme() {
  const themeToggle = document.getElementById('theme-toggle');
  if (!themeToggle) return;

  // Load saved theme or use system preference
  const savedTheme = localStorage.getItem('theme');
  const prefersDark = window.matchMedia('(prefers-color-scheme: dark)').matches;
  const initialTheme = savedTheme || (prefersDark ? 'dark' : 'light');

  // Set initial theme
  document.documentElement.setAttribute('data-theme', initialTheme);

  themeToggle.addEventListener('click', () => {
    const currentTheme = document.documentElement.getAttribute('data-theme');
    const newTheme = currentTheme === 'light' ? 'dark' : 'light';
    document.documentElement.setAttribute('data-theme', newTheme);
    localStorage.setItem('theme', newTheme);
  });
}

function initializeCustomSelect({ initialValue = null, addListeners = false }) {
  const button = document.getElementById('language-button');
  const select = document.getElementById('language-select');
  if (!button || !select) return;

  const dropdown = button.nextElementSibling;
  const selectedValue = button.querySelector('.selected-value');

  if (initialValue) {
    select.value = initialValue;
  }
  if (select.selectedIndex >= 0 && select.options[select.selectedIndex]) {
    selectedValue.textContent = select.options[select.selectedIndex].text;
  } else {
    selectedValue.textContent = 'JavaScript';
  }

  if (addListeners) {
    button.addEventListener('click', (e) => {
      e.preventDefault(); // Prevent form submission
      dropdown.classList.toggle('show');
    });

    document.addEventListener('click', (e) => {
      if (!button.contains(e.target)) {
        dropdown.classList.remove('show');
      }
    });

    dropdown.querySelectorAll('.option').forEach(option => {
      option.addEventListener('click', () => {
        selectedValue.textContent = option.textContent;
        select.value = option.dataset.value;
        dropdown.classList.remove('show');

        const event = new Event('change');
        select.dispatchEvent(event);
      });
    });
  }
}

window.initializePlayground = async (opts) => {
  const { Parser, Language } = window.TreeSitter;

  const { local } = opts;
  if (local) {
    initializeLocalTheme();
  }
  initializeCustomSelect({ addListeners: true });

  let tree;

  const CAPTURE_REGEX = /@\s*([\w\._-]+)/g;
  const LIGHT_COLORS = [
    "#0550ae", // blue
    "#ab5000", // rust brown
    "#116329", // forest green
    "#844708", // warm brown
    "#6639ba", // purple
    "#7d4e00", // orange brown
    "#0969da", // bright blue
    "#1a7f37", // green
    "#cf222e", // red
    "#8250df", // violet
    "#6e7781", // gray
    "#953800", // dark orange
    "#1b7c83"  // teal
  ];

  const DARK_COLORS = [
    "#79c0ff", // light blue
    "#ffa657", // orange
    "#7ee787", // light green
    "#ff7b72", // salmon
    "#d2a8ff", // light purple
    "#ffa198", // pink
    "#a5d6ff", // pale blue
    "#56d364", // bright green
    "#ff9492", // light red
    "#e0b8ff", // pale purple
    "#9ca3af", // gray
    "#ffb757", // yellow orange
    "#80cbc4"  // light teal
  ];

  const codeInput = document.getElementById("code-input");
  const languageSelect = document.getElementById("language-select");
  const loggingCheckbox = document.getElementById("logging-checkbox");
  const anonymousNodes = document.getElementById('anonymous-nodes-checkbox');
  const outputContainer = document.getElementById("output-container");
  const outputContainerScroll = document.getElementById(
    "output-container-scroll",
  );
  const playgroundContainer = document.getElementById("playground-container");
  const queryCheckbox = document.getElementById("query-checkbox");
  const queryContainer = document.getElementById("query-container");
  const queryInput = document.getElementById("query-input");
  const accessibilityCheckbox = document.getElementById("accessibility-checkbox");
  const updateTimeSpan = document.getElementById("update-time");
  const languagesByName = {};

  loadState();

  await Parser.init();

  const parser = new Parser();

  console.log(parser, codeInput, queryInput);

  const codeEditor = CodeMirror.fromTextArea(codeInput, {
    lineNumbers: true,
    showCursorWhenSelecting: true
  });

  codeEditor.on('keydown', (_, event) => {
    const key = event.key;
    if (key === 'ArrowLeft' || key === 'ArrowRight' || key === '?') {
      event.stopPropagation(); // Prevent mdBook from going back/forward, or showing help
    }
  });

  const queryEditor = CodeMirror.fromTextArea(queryInput, {
    lineNumbers: true,
    showCursorWhenSelecting: true,
  });

  queryEditor.on('keydown', (_, event) => {
    if (event.key === 'ArrowLeft' || event.key === 'ArrowRight') {
      event.stopPropagation(); // Prevent mdBook from going back/forward
    }
  });

  const cluster = new Clusterize({
    rows: [],
    noDataText: null,
    contentElem: outputContainer,
    scrollElem: outputContainerScroll,
  });
  const renderTreeOnCodeChange = debounce(renderTree, 50);
  const saveStateOnChange = debounce(saveState, 2000);
  const runTreeQueryOnChange = debounce(runTreeQuery, 50);

  let languageName = languageSelect.value;
  let treeRows = null;
  let treeRowHighlightedIndex = -1;
  let parseCount = 0;
  let isRendering = 0;
  let query;

  codeEditor.on("changes", handleCodeChange);
  codeEditor.on("viewportChange", runTreeQueryOnChange);
  codeEditor.on("cursorActivity", debounce(handleCursorMovement, 150));
  queryEditor.on("changes", debounce(handleQueryChange, 150));

  loggingCheckbox.addEventListener("change", handleLoggingChange);
  anonymousNodes.addEventListener('change', renderTree);
  queryCheckbox.addEventListener("change", handleQueryEnableChange);
  accessibilityCheckbox.addEventListener("change", handleQueryChange);
  languageSelect.addEventListener("change", handleLanguageChange);
  outputContainer.addEventListener("click", handleTreeClick);

  handleQueryEnableChange();
  await handleLanguageChange();

  playgroundContainer.style.visibility = "visible";

  async function handleLanguageChange() {
    const newLanguageName = languageSelect.value;
    if (!languagesByName[newLanguageName]) {
      const url = `${LANGUAGE_BASE_URL}/tree-sitter-${newLanguageName}.wasm`;
      languageSelect.disabled = true;
      try {
        languagesByName[newLanguageName] = await Language.load(url);
      } catch (e) {
        console.error(e);
        languageSelect.value = languageName;
        return;
      } finally {
        languageSelect.disabled = false;
      }
    }

    tree = null;
    languageName = newLanguageName;
    parser.setLanguage(languagesByName[newLanguageName]);
    handleCodeChange();
    handleQueryChange();
  }

  async function handleCodeChange(editor, changes) {
    const newText = codeEditor.getValue() + "\n";
    const edits = tree && changes && changes.map(treeEditForEditorChange);

    const start = performance.now();
    if (edits) {
      for (const edit of edits) {
        tree.edit(edit);
      }
    }
    const newTree = parser.parse(newText, tree);
    const duration = (performance.now() - start).toFixed(1);

    updateTimeSpan.innerText = `${duration} ms`;
    if (tree) tree.delete();
    tree = newTree;
    parseCount++;
    renderTreeOnCodeChange();
    runTreeQueryOnChange();
    saveStateOnChange();
  }

  async function renderTree() {
    isRendering++;
    const cursor = tree.walk();

    let currentRenderCount = parseCount;
    let row = "";
    let rows = [];
    let finishedRow = false;
    let visitedChildren = false;
    let indentLevel = 0;

    for (let i = 0; ; i++) {
      if (i > 0 && i % 10000 === 0) {
        await new Promise((r) => setTimeout(r, 0));
        if (parseCount !== currentRenderCount) {
          cursor.delete();
          isRendering--;
          return;
        }
      }

      let displayName;
      if (cursor.nodeIsMissing) {
        const nodeTypeText = cursor.nodeIsNamed ? cursor.nodeType : `"${cursor.nodeType}"`;
        displayName = `MISSING ${nodeTypeText}`;
      } else if (cursor.nodeIsNamed) {
        displayName = cursor.nodeType;
      } else if (anonymousNodes.checked) {
        displayName = cursor.nodeType
      }

      if (visitedChildren) {
        if (displayName) {
          finishedRow = true;
        }

        if (cursor.gotoNextSibling()) {
          visitedChildren = false;
        } else if (cursor.gotoParent()) {
          visitedChildren = true;
          indentLevel--;
        } else {
          break;
        }
      } else {
        if (displayName) {
          if (finishedRow) {
            row += "</div>";
            rows.push(row);
            finishedRow = false;
          }
          const start = cursor.startPosition;
          const end = cursor.endPosition;
          const id = cursor.nodeId;
          let fieldName = cursor.currentFieldName;
          if (fieldName) {
            fieldName += ": ";
          } else {
            fieldName = "";
          }

          const nodeClass =
            displayName === 'ERROR' || displayName.startsWith('MISSING')
              ? 'node-link error plain'
              : cursor.nodeIsNamed
                ? 'node-link named plain'
                : 'node-link anonymous plain';

          row = `<div class="tree-row">${"  ".repeat(indentLevel)}${fieldName}` +
            `<a class='${nodeClass}' href="#" data-id=${id} ` +
            `data-range="${start.row},${start.column},${end.row},${end.column}">` +
            `${displayName}</a> <span class="position-info">` +
            `[${start.row}, ${start.column}] - [${end.row}, ${end.column}]</span>`;
          finishedRow = true;
        }

        if (cursor.gotoFirstChild()) {
          visitedChildren = false;
          indentLevel++;
        } else {
          visitedChildren = true;
        }
      }
    }
    if (finishedRow) {
      row += "</div>";
      rows.push(row);
    }

    cursor.delete();
    cluster.update(rows);
    treeRows = rows;
    isRendering--;
    handleCursorMovement();
  }

  function getCaptureCSS(name) {
    if (accessibilityCheckbox.checked) {
      return `color: white; background-color: ${colorForCaptureName(name)}`;
    } else {
      return `color: ${colorForCaptureName(name)}`;
    }
  }

  function runTreeQuery(_, startRow, endRow) {
    if (endRow == null) {
      const viewport = codeEditor.getViewport();
      startRow = viewport.from;
      endRow = viewport.to;
    }

    codeEditor.operation(() => {
      const marks = codeEditor.getAllMarks();
      marks.forEach((m) => m.clear());

      if (tree && query) {
        const captures = query.captures(
          tree.rootNode,
          { row: startRow, column: 0 },
          { row: endRow, column: 0 },
        );
        let lastNodeId;
        for (const { name, node } of captures) {
          if (node.id === lastNodeId) continue;
          lastNodeId = node.id;
          const { startPosition, endPosition } = node;
          codeEditor.markText(
            { line: startPosition.row, ch: startPosition.column },
            { line: endPosition.row, ch: endPosition.column },
            {
              inclusiveLeft: true,
              inclusiveRight: true,
              css: getCaptureCSS(name),
            },
          );
        }
      }
    });
  }

  // When we change from a dark theme to a light theme (and vice versa), the colors of the
  // captures need to be updated.
  const observer = new MutationObserver((mutations) => {
    mutations.forEach((mutation) => {
      if (mutation.attributeName === 'class') {
        handleQueryChange();
      }
    });
  });

  observer.observe(document.documentElement, {
    attributes: true,
    attributeFilter: ['class']
  });

  function handleQueryChange() {
    if (query) {
      query.delete();
      query.deleted = true;
      query = null;
    }

    queryEditor.operation(() => {
      queryEditor.getAllMarks().forEach((m) => m.clear());
      if (!queryCheckbox.checked) return;

      const queryText = queryEditor.getValue();

      try {
        query = parser.language.query(queryText);
        let match;

        let row = 0;
        queryEditor.eachLine((line) => {
          while ((match = CAPTURE_REGEX.exec(line.text))) {
            queryEditor.markText(
              { line: row, ch: match.index },
              { line: row, ch: match.index + match[0].length },
              {
                inclusiveLeft: true,
                inclusiveRight: true,
                css: `color: ${colorForCaptureName(match[1])}`,
              },
            );
          }
          row++;
        });
      } catch (error) {
        const startPosition = queryEditor.posFromIndex(error.index);
        const endPosition = {
          line: startPosition.line,
          ch: startPosition.ch + (error.length || Infinity),
        };

        if (error.index === queryText.length) {
          if (startPosition.ch > 0) {
            startPosition.ch--;
          } else if (startPosition.row > 0) {
            startPosition.row--;
            startPosition.column = Infinity;
          }
        }

        queryEditor.markText(startPosition, endPosition, {
          className: "query-error",
          inclusiveLeft: true,
          inclusiveRight: true,
          attributes: { title: error.message },
        });
      }
    });

    runTreeQuery();
    saveQueryState();
  }

  function handleCursorMovement() {
    if (isRendering) return;

    const selection = codeEditor.getDoc().listSelections()[0];
    let start = { row: selection.anchor.line, column: selection.anchor.ch };
    let end = { row: selection.head.line, column: selection.head.ch };
    if (
      start.row > end.row ||
      (start.row === end.row && start.column > end.column)
    ) {
      let swap = end;
      end = start;
      start = swap;
    }
    const node = tree.rootNode.namedDescendantForPosition(start, end);
    if (treeRows) {
      if (treeRowHighlightedIndex !== -1) {
        const row = treeRows[treeRowHighlightedIndex];
        if (row)
          treeRows[treeRowHighlightedIndex] = row.replace(
            "highlighted",
            "plain",
          );
      }
      treeRowHighlightedIndex = treeRows.findIndex((row) =>
        row.includes(`data-id=${node.id}`),
      );
      if (treeRowHighlightedIndex !== -1) {
        const row = treeRows[treeRowHighlightedIndex];
        if (row)
          treeRows[treeRowHighlightedIndex] = row.replace(
            "plain",
            "highlighted",
          );
      }
      cluster.update(treeRows);
      const lineHeight = cluster.options.item_height;
      const scrollTop = outputContainerScroll.scrollTop;
      const containerHeight = outputContainerScroll.clientHeight;
      const offset = treeRowHighlightedIndex * lineHeight;
      if (scrollTop > offset - 20) {
        $(outputContainerScroll).animate({ scrollTop: offset - 20 }, 150);
      } else if (scrollTop < offset + lineHeight + 40 - containerHeight) {
        $(outputContainerScroll).animate(
          { scrollTop: offset - containerHeight + 40 },
          150,
        );
      }
    }
  }

  function handleTreeClick(event) {
    if (event.target.tagName === "A") {
      event.preventDefault();
      const [startRow, startColumn, endRow, endColumn] =
        event.target.dataset.range.split(",").map((n) => parseInt(n));
      codeEditor.focus();
      codeEditor.setSelection(
        { line: startRow, ch: startColumn },
        { line: endRow, ch: endColumn },
      );
    }
  }

  function handleLoggingChange() {
    if (loggingCheckbox.checked) {
      parser.setLogger((message, lexing) => {
        if (lexing) {
          console.log("  ", message);
        } else {
          console.log(message);
        }
      });
    } else {
      parser.setLogger(null);
    }
  }

  function handleQueryEnableChange() {
    if (queryCheckbox.checked) {
      queryContainer.style.visibility = "";
      queryContainer.style.position = "";
    } else {
      queryContainer.style.visibility = "hidden";
      queryContainer.style.position = "absolute";
    }
    handleQueryChange();
  }

  function treeEditForEditorChange(change) {
    const oldLineCount = change.removed.length;
    const newLineCount = change.text.length;
    const lastLineLength = change.text[newLineCount - 1].length;

    const startPosition = { row: change.from.line, column: change.from.ch };
    const oldEndPosition = { row: change.to.line, column: change.to.ch };
    const newEndPosition = {
      row: startPosition.row + newLineCount - 1,
      column:
        newLineCount === 1
          ? startPosition.column + lastLineLength
          : lastLineLength,
    };

    const startIndex = codeEditor.indexFromPos(change.from);
    let newEndIndex = startIndex + newLineCount - 1;
    let oldEndIndex = startIndex + oldLineCount - 1;
    for (let i = 0; i < newLineCount; i++) newEndIndex += change.text[i].length;
    for (let i = 0; i < oldLineCount; i++)
      oldEndIndex += change.removed[i].length;

    return {
      startIndex,
      oldEndIndex,
      newEndIndex,
      startPosition,
      oldEndPosition,
      newEndPosition,
    };
  }

  function colorForCaptureName(capture) {
    const id = query.captureNames.indexOf(capture);
    const isDark = document.querySelector('html').classList.contains('ayu') ||
      document.querySelector('html').classList.contains('coal') ||
      document.querySelector('html').classList.contains('navy');

    const colors = isDark ? DARK_COLORS : LIGHT_COLORS;
    return colors[id % colors.length];
  }

  function loadState() {
    const language = localStorage.getItem("language");
    const sourceCode = localStorage.getItem("sourceCode");
    const anonNodes = localStorage.getItem("anonymousNodes");
    const query = localStorage.getItem("query");
    const queryEnabled = localStorage.getItem("queryEnabled");
    if (language != null && sourceCode != null && query != null) {
      queryInput.value = query;
      codeInput.value = sourceCode;
      languageSelect.value = language;
      initializeCustomSelect({ initialValue: language });
      anonymousNodes.checked = anonNodes === "true";
      queryCheckbox.checked = queryEnabled === "true";
    }
  }

  function saveState() {
    localStorage.setItem("language", languageSelect.value);
    localStorage.setItem("sourceCode", codeEditor.getValue());
    localStorage.setItem("anonymousNodes", anonymousNodes.checked);
    saveQueryState();
  }

  function saveQueryState() {
    localStorage.setItem("queryEnabled", queryCheckbox.checked);
    localStorage.setItem("query", queryEditor.getValue());
  }

  function debounce(func, wait, immediate) {
    var timeout;
    return function () {
      var context = this,
        args = arguments;
      var later = function () {
        timeout = null;
        if (!immediate) func.apply(context, args);
      };
      var callNow = immediate && !timeout;
      clearTimeout(timeout);
      timeout = setTimeout(later, wait);
      if (callNow) func.apply(context, args);
    };
  }
};



================================================
FILE: docs/src/assets/schemas/config.schema.json
================================================
{
  "$schema": "http://json-schema.org/draft-07/schema#",
  "type": "object",
  "properties": {
    "$schema": {
      "type": "string"
    },
    "grammars": {
      "type": "array",
      "items": {
        "type": "object",
        "properties": {
          "name": {
            "type": "string",
            "description": "The name of the grammar.",
            "pattern": "^[a-z0-9_]+$"
          },
          "camelcase": {
            "type": "string",
            "description": "The name converted to CamelCase.",
            "pattern": "^\\w+$",
            "examples": [
              "Rust",
              "HTML"
            ]
          },
          "title": {
            "type": "string",
            "description": "The title of the language.",
            "examples": [
              "Rust",
              "HTML"
            ]
          },
          "scope": {
            "type": "string",
            "description": "The TextMate scope that represents this language.",
            "pattern": "^(source|text)(\\.[\\w\\-]+)+$",
            "examples": [
              "source.rust",
              "text.html"
            ]
          },
          "path": {
            "type": "string",
            "default": ".",
            "description": "The relative path to the directory containing the grammar."
          },
          "external-files": {
            "type": "array",
            "description": "The relative paths to files that should be checked for modifications during recompilation.",
            "items": {
              "type": "string"
            },
            "minItems": 1
          },
          "file-types": {
            "type": "array",
            "description": "An array of filename suffix strings.",
            "items": {
              "type": "string"
            },
            "minItems": 1
          },
          "highlights": {
            "anyOf": [
              {
                "type": "string"
              },
              {
                "type": "array",
                "items": {
                  "type": "string"
                },
                "minItems": 1
              }
            ],
            "default": "queries/highlights.scm",
            "description": "The path(s) to the grammar's highlight queries."
          },
          "injections": {
            "anyOf": [
              {
                "type": "string"
              },
              {
                "type": "array",
                "items": {
                  "type": "string"
                },
                "minItems": 1
              }
            ],
            "default": "queries/injections.scm",
            "description": "The path(s) to the grammar's injection queries."
          },
          "locals": {
            "anyOf": [
              {
                "type": "string"
              },
              {
                "type": "array",
                "items": {
                  "type": "string"
                },
                "minItems": 1
              }
            ],
            "default": "queries/locals.scm",
            "description": "The path(s) to the grammar's local variable queries."
          },
          "tags": {
            "anyOf": [
              {
                "type": "string"
              },
              {
                "type": "array",
                "items": {
                  "type": "string"
                },
                "minItems": 1
              }
            ],
            "default": "queries/tags.scm",
            "description": "The path(s) to the grammar's code navigation queries."
          },
          "injection-regex": {
            "type": "string",
            "format": "regex",
            "description": "A regex pattern that will be tested against a language name in order to determine whether this language should be used for a potential language injection site."
          },
          "first-line-regex": {
            "type": "string",
            "format": "regex",
            "description": "A regex pattern that will be tested against the first line of a file in order to determine whether this language applies to the file."
          },
          "content-regex": {
            "type": "string",
            "format": "regex",
            "description": "A regex pattern that will be tested against the contents of the file in order to break ties in cases where multiple grammars matched the file."
          },
          "class-name": {
            "type": "string",
            "pattern": "^TreeSitter\\w+$",
            "description": "The class name for the Swift, Java & Kotlin bindings"
          }
        },
        "additionalProperties": false,
        "required": [
          "name",
          "scope"
        ]
      },
      "minItems": 1
    },
    "metadata": {
      "type": "object",
      "properties": {
        "version": {
          "type": "string",
          "description": "The current version of the project.",
          "pattern": "^(0|[1-9]\\d*)\\.(0|[1-9]\\d*)\\.(0|[1-9]\\d*)(?:-((?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\\.(?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\\+([0-9a-zA-Z-]+(?:\\.[0-9a-zA-Z-]+)*))?$",
          "$comment": "The CLI will use this version to update package.json, Cargo.toml, pyproject.toml, Makefile."
        },
        "license": {
          "type": "string",
          "default": "MIT",
          "description": "The project's license."
        },
        "description": {
          "type": "string",
          "description": "The project's description.",
          "examples": [
            "Rust grammar for tree-sitter"
          ]
        },
        "links": {
          "type": "object",
          "properties": {
            "repository": {
              "type": "string",
              "format": "uri",
              "description": "The project's repository."
            },
            "funding": {
              "type": "string",
              "format": "uri",
              "description": "The project's funding link."
            },
            "homepage": {
              "type": "string",
              "format": "uri",
              "description": "The project's homepage."
            }
          },
          "additionalProperties": false,
          "required": [
            "repository"
          ]
        },
        "authors": {
          "type": "array",
          "items": {
            "type": "object",
            "description": "The project's author(s).",
            "properties": {
              "name": {
                "type": "string"
              },
              "email": {
                "type": "string",
                "format": "email"
              },
              "url": {
                "type": "string",
                "format": "uri"
              }
            },
            "additionalProperties": false,
            "required": [
              "name"
            ]
          },
          "minItems": 1
        },
        "namespace": {
          "type": "string",
          "description": "The namespace for the Java & Kotlin packages.",
          "default": "io.github.tree-sitter",
          "$comment": "Used as is in the Maven/Gradle group name and transformed accordingly for the package names and directories (e.g. io.github.treesitter.jtreesitter.html - src/main/java/io/github/treesitter/jtreesitter/html)."
        }
      },
      "additionalProperties": false,
      "required": [
        "version",
        "links"
      ]
    },
    "bindings": {
      "type": "object",
      "description": "The language bindings that will be generated.",
      "properties": {
        "c": {
          "type": "boolean",
          "default": true
        },
        "go": {
          "type": "boolean",
          "default": true
        },
        "java": {
          "type": "boolean",
          "default": false
        },
        "kotlin": {
          "type": "boolean",
          "default": false
        },
        "node": {
          "type": "boolean",
          "default": true
        },
        "python": {
          "type": "boolean",
          "default": true
        },
        "rust": {
          "type": "boolean",
          "default": true
        },
        "swift": {
          "type": "boolean",
          "default": true
        },
        "zig": {
          "type": "boolean",
          "default": false
        }
      },
      "additionalProperties": false
    }
  },
  "additionalProperties": false,
  "required": [
    "grammars",
    "metadata"
  ]
}



================================================
FILE: docs/src/assets/schemas/grammar.schema.json
================================================
{
  "$schema": "http://json-schema.org/draft-07/schema#",
  "title": "Tree-sitter grammar specification",
  "type": "object",

  "required": ["name", "rules"],

  "additionalProperties": false,

  "properties": {
    "$schema": {
      "type": "string"
    },

    "name": {
      "description": "The name of the grammar",
      "type": "string",
      "pattern": "^[a-zA-Z_]\\w*"
    },

    "inherits": {
      "description": "The name of the parent grammar",
      "type": "string",
      "pattern": "^[a-zA-Z_]\\w*"
    },

    "rules": {
      "type": "object",
      "patternProperties": {
        "^[a-zA-Z_]\\w*$": {
          "$ref": "#/definitions/rule"
        }
      },
      "additionalProperties": false
    },

    "extras": {
      "type": "array",
      "uniqueItems": true,
      "items": {
        "$ref": "#/definitions/rule"
      }
    },

    "precedences": {
      "type": "array",
      "uniqueItems": true,
      "items": {
        "type": "array",
        "uniqueItems": true,
        "items": {
          "oneOf": [
            { "type": "string" },
            { "$ref": "#/definitions/symbol-rule" }
          ]
        }
      }
    },

    "reserved": {
      "type": "object",
      "patternProperties": {
        "^[a-zA-Z_]\\w*$": {
          "type": "array",
          "uniqueItems": true,
          "items": {
            "$ref": "#/definitions/rule"
          }
        }
      },
      "additionalProperties": false
    },

    "externals": {
      "type": "array",
      "uniqueItems": true,
      "items": {
        "$ref": "#/definitions/rule"
      }
    },

    "inline": {
      "type": "array",
      "uniqueItems": true,
      "items": {
        "type": "string",
        "pattern": "^[a-zA-Z_]\\w*$"
      }
    },

    "conflicts": {
      "type": "array",
      "uniqueItems": true,
      "items": {
        "type": "array",
        "uniqueItems": true,
        "items": {
          "type": "string",
          "pattern": "^[a-zA-Z_]\\w*$"
        }
      }
    },

    "word": {
      "type": "string",
      "pattern": "^[a-zA-Z_]\\w*"
    },

    "supertypes": {
      "description": "A list of hidden rule names that should be considered supertypes in the generated node types file. See https://tree-sitter.github.io/tree-sitter/using-parsers/6-static-node-types.",
      "type": "array",
      "uniqueItems": true,
      "items": {
        "description": "The name of a rule in `rules` or `extras`",
        "type": "string"
      }
    }
  },

  "definitions": {
    "blank-rule": {
      "type": "object",
      "properties": {
        "type": {
          "type": "string",
          "const": "BLANK"
        }
      },
      "required": ["type"]
    },

    "string-rule": {
      "type": "object",
      "properties": {
        "type": {
          "type": "string",
          "const": "STRING"
        },
        "value": {
          "type": "string"
        }
      },
      "required": ["type", "value"]
    },

    "pattern-rule": {
      "type": "object",
      "properties": {
        "type": {
          "type": "string",
          "const": "PATTERN"
        },
        "value": { "type": "string" },
        "flags": { "type": "string" }
      },
      "required": ["type", "value"]
    },

    "symbol-rule": {
      "type": "object",
      "properties": {
        "type": {
          "type": "string",
          "const": "SYMBOL"
        },
        "name": { "type": "string" }
      },
      "required": ["type", "name"]
    },

    "seq-rule": {
      "type": "object",
      "properties": {
        "type": {
          "type": "string",
          "const": "SEQ"
        },
        "members": {
          "type": "array",
          "items": {
            "$ref": "#/definitions/rule"
          }
        }
      },
      "required": ["type", "members"]
    },

    "choice-rule": {
      "type": "object",
      "properties": {
        "type": {
          "type": "string",
          "const": "CHOICE"
        },
        "members": {
          "type": "array",
          "items": {
            "$ref": "#/definitions/rule"
          }
        }
      },
      "required": ["type", "members"]
    },

    "alias-rule": {
      "type": "object",
      "properties": {
        "type": {
          "type": "string",
          "const": "ALIAS"
        },
        "value": { "type": "string" },
        "named": { "type": "boolean" },
        "content": {
          "$ref": "#/definitions/rule"
        }
      },
      "required": ["type", "named", "content", "value"]
    },

    "repeat-rule": {
      "type": "object",
      "properties": {
        "type": {
          "type": "string",
          "const": "REPEAT"
        },
        "content": {
          "$ref": "#/definitions/rule"
        }
      },
      "required": ["type", "content"]
    },

    "repeat1-rule": {
      "type": "object",
      "properties": {
        "type": {
          "type": "string",
          "const": "REPEAT1"
        },
        "content": {
          "$ref": "#/definitions/rule"
        }
      },
      "required": ["type", "content"]
    },

    "reserved-rule": {
      "type": "object",
      "properties": {
        "type": {
          "type": "string",
          "const": "RESERVED"
        },
        "context_name": { "type": "string" },
        "content": {
          "$ref": "#/definitions/rule"
        }
      },
      "required": ["type", "context_name", "content"]
    },

    "token-rule": {
      "type": "object",
      "properties": {
        "type": {
          "type": "string",
          "enum": [
            "TOKEN",
            "IMMEDIATE_TOKEN"
          ]
        },
        "content": {
          "$ref": "#/definitions/rule"
        }
      },
      "required": ["type", "content"]
    },

    "field-rule": {
      "properties": {
        "name": { "type": "string" },
        "type": {
          "type": "string",
          "const": "FIELD"
        },
        "content": {
          "$ref": "#/definitions/rule"
        }
      },
      "required": ["name", "type", "content"]
    },

    "prec-rule": {
      "type": "object",
      "properties": {
        "type": {
          "type": "string",
          "enum": [
            "PREC",
            "PREC_LEFT",
            "PREC_RIGHT",
            "PREC_DYNAMIC"
          ]
        },
        "value": {
          "oneof": [
            { "type": "integer" },
            { "type": "string" }
          ]
        },
        "content": {
          "$ref": "#/definitions/rule"
        }
      },
      "required": ["type", "content", "value"]
    },

    "rule": {
      "oneOf": [
        { "$ref": "#/definitions/alias-rule" },
        { "$ref": "#/definitions/blank-rule" },
        { "$ref": "#/definitions/string-rule" },
        { "$ref": "#/definitions/pattern-rule" },
        { "$ref": "#/definitions/symbol-rule" },
        { "$ref": "#/definitions/seq-rule" },
        { "$ref": "#/definitions/choice-rule" },
        { "$ref": "#/definitions/repeat1-rule" },
        { "$ref": "#/definitions/repeat-rule" },
        { "$ref": "#/definitions/reserved-rule" },
        { "$ref": "#/definitions/token-rule" },
        { "$ref": "#/definitions/field-rule" },
        { "$ref": "#/definitions/prec-rule" }
      ]
    }
  }
}



================================================
FILE: docs/src/cli/build.md
================================================
# `tree-sitter build`

The `build` command compiles your parser into a dynamically-loadable library,
either as a shared object (`.so`, `.dylib`, or `.dll`) or as a WASM module.

```bash
tree-sitter build [OPTIONS] [PATH] # Aliases: b
```

You can change the compiler executable via the `CC` environment variable and add extra flags via `CFLAGS`.
For macOS or iOS, you can set `MACOSX_DEPLOYMENT_TARGET` or `IPHONEOS_DEPLOYMENT_TARGET` respectively to define the
minimum supported version.

The path argument allows you to specify the directory of the parser to build. If you don't supply this argument, the CLI
will attempt to build the parser in the current working directory.

## Options

### `-w/--wasm`

Compile the parser as a WASM module.

### `-o/--output`

Specify where to output the shared object file (native or WASM). This flag accepts either an absolute path or a relative
path. If you don't supply this flag, the CLI will attempt to figure out what the language name is based on the parent
directory name to use for the output file. If the CLI can't figure it out, it will default to `parser`, thus generating
`parser.so` or `parser.wasm` in the current working directory.

### `--reuse-allocator`

Reuse the allocator that's set in the core library for the parser's external scanner. This is useful in applications
where the author overrides the default allocator with their own, and wants to ensure every parser that allocates memory
in the external scanner does so using their allocator.

### `-0/--debug`

Compile the parser with debug flags enabled. This is useful when debugging issues that require a debugger like `gdb` or `lldb`.



================================================
FILE: docs/src/cli/complete.md
================================================
# `tree-sitter complete`

The `complete` command generates a completion script for your shell.
This script can be used to enable autocompletion for the `tree-sitter` CLI.

```bash
tree-sitter complete --shell <SHELL> # Aliases: comp
```

## Options

### `--shell <SHELL>`

The shell for which to generate the completion script.

Supported values: `bash`, `elvish`, `fish`, `power-shell`, `zsh`, and `nushell`.



================================================
FILE: docs/src/cli/dump-languages.md
================================================
# `tree-sitter dump-languages`

The `dump-languages` command prints out a list of all the languages that the CLI knows about. This can be useful for debugging purposes, or for scripting. The paths to search comes from the config file's [`parser-directories`][parser-directories] object.

```bash
tree-sitter dump-languages [OPTIONS] # Aliases: langs
```

## Options

### `--config-path`

The path to the configuration file. Ordinarily, the CLI will use the default location as explained in the [init-config](./init-config.md) command. This flag allows you to explicitly override that default, and use a config defined elsewhere.

[parser-directories]: ./init-config.md#parser-directories



================================================
FILE: docs/src/cli/fuzz.md
================================================
# `tree-sitter fuzz`

The `fuzz` command is used to fuzz a parser by performing random edits and ensuring that undoing these edits results in
consistent parse trees. It will fail if the parse trees are not equal, or if the changed ranges are inconsistent.

```bash
tree-sitter fuzz [OPTIONS] # Aliases: f
```

## Options

### `-s/--skip <SKIP>`

A list of test names to skip fuzzing.

### `--subdir <SUBDIR>`

The directory containing the parser. This is primarily useful in multi-language repositories.

### `--edits <EDITS>`

The maximum number of edits to perform. The default is 3.

### `--iterations <ITERATIONS>`

The number of iterations to run. The default is 10.

### `-i/--include <INCLUDE>`

Only run tests whose names match this regex.

### `-e/--exclude <EXCLUDE>`

Skip tests whose names match this regex.

### `--log-graphs`

Outputs logs of the graphs of the stack and parse trees during parsing, as well as the actual parsing and lexing message.
The graphs are constructed with [graphviz dot][dot], and the output is written to `log.html`.

### `-l/--log`

Outputs parsing and lexing logs. This logs to stderr.

### `-r/--rebuild`

Force a rebuild of the parser before running the fuzzer.

[dot]: https://graphviz.org/doc/info/lang.html



================================================
FILE: docs/src/cli/generate.md
================================================
# `tree-sitter generate`

The most important command you'll use is `tree-sitter generate`. This command reads the `grammar.js` file in your current
working directory and creates a file called `src/parser.c`, which implements the parser. After making changes to your grammar,
just run `tree-sitter generate` again.

```bash
tree-sitter generate [OPTIONS] [GRAMMAR_PATH] # Aliases: gen, g
```

The grammar path argument allows you to specify a path to a `grammar.js` JavaScript file, or `grammar.json` JSON file.
In case your `grammar.js` file is in a non-standard path, you can specify it yourself. But, if you are using a parser
where `grammar.json` was already generated, or it was hand-written, you can tell the CLI to generate the parser *based*
on this JSON file. This avoids relying on a JavaScript file and avoids the dependency on a JavaScript runtime.

If there is an ambiguity or *local ambiguity* in your grammar, Tree-sitter will detect it during parser generation, and
it will exit with a `Unresolved conflict` error message. To learn more about conflicts and how to handle them, check out
the section on [`Structuring Rules Well`](../creating-parsers/3-writing-the-grammar.md#structuring-rules-well)
in the user guide.

## Options

### `-l/--log`

Print the log of the parser generation process. This is really only useful if you know what you're doing, or are investigating
a bug in the CLI itself. It logs info such as what tokens are included in the error recovery state,
what keywords were extracted, what states were split and why, and the entry point state.

### `--abi <VERSION>`

The ABI to use for parser generation. The default is ABI 15, with ABI 14 being a supported target.

### `-b/--build`

Compile all defined languages in the current directory. The cli will automatically compile the parsers after generation,
and place them in the cache dir.

### `-0/--debug-build`

Compile the parser with debug flags enabled. This is useful when debugging issues that require a debugger like `gdb` or `lldb`.

### `--libdir <PATH>`

The directory to place the compiled parser(s) in.
On Unix systems, the default path is `$XDG_CACHE_HOME/tree-sitter` if `$XDG_CACHE_HOME` is set,
otherwise `$HOME/.config/tree-sitter` is used. On Windows, the default path is `%LOCALAPPDATA%\tree-sitter` if available,
otherwise `$HOME\AppData\Local\tree-sitter` is used.

### `-o/--output`

The directory to place the generated parser in. The default is `src/` in the current directory.

### `--report-states-for-rule <RULE>`

Print the overview of states from the given rule. This is useful for debugging and understanding the generated parser's
item sets for all given states in a given rule. To solely view state count numbers for rules, pass in `-` for the rule argument.
To view the overview of states for every rule, pass in `*` for the rule argument.

### `json`

Report conflicts in a JSON format.

### `--js-runtime <EXECUTABLE>`

The path to the JavaScript runtime executable to use when generating the parser. The default is `node`.
Note that you can also set this with `TREE_SITTER_JS_RUNTIME`.



================================================
FILE: docs/src/cli/highlight.md
================================================
# `tree-sitter highlight`

You can run syntax highlighting on an arbitrary file using `tree-sitter highlight`. This can either output colors directly
to your terminal using ANSI escape codes, or produce HTML (if the `--html` flag is passed). For more information, see
[the syntax highlighting page](../3-syntax-highlighting.md).

```bash
tree-sitter highlight [OPTIONS] [PATHS]... # Aliases: hi
```

## Options

### `-H/--html`

Output an HTML document with syntax highlighting.

### `--css-classes`

Output HTML with CSS classes instead of inline styles.

### `--check`

Check that the highlighting captures conform strictly to the standards.

### `--captures-path <CAPTURES_PATH>`

The path to a file with captures. These captures would be considered the "standard" captures to compare against.

### `--query-paths <QUERY_PATHS>`

The paths to query files to use for syntax highlighting. These should end in `highlights.scm`.

### `--scope <SCOPE>`

The language scope to use for syntax highlighting. This is useful when the language is ambiguous.

### `-t/--time`

Print the time taken to highlight the file.

### `-q/--quiet`

Suppress main output.

### `--paths <PATHS_FILE>`

The path to a file that contains paths to source files to highlight

### `-p/--grammar-path <PATH>`

The path to the directory containing the grammar.

### `--config-path <CONFIG_PATH>`

The path to an alternative configuration (`config.json`) file. See [the init-config command](./init-config.md) for more information.

### `-n/--test-number <TEST_NUMBER>`

Highlight the contents of a specific test.



================================================
FILE: docs/src/cli/index.md
================================================
# CLI Overview

Let's go over all of the functionality of the `tree-sitter` command line interface.
Once you feel that you have enough of a grasp on the CLI, you can move onto the grammar authoring section to learn more about writing your own parser.



================================================
FILE: docs/src/cli/init-config.md
================================================
# `tree-sitter init-config`

This command initializes a configuration file for the Tree-sitter CLI.

```bash
tree-sitter init-config
```

These directories are created in the "default" location for your platform:

* On Unix, `$XDG_CONFIG_HOME/tree-sitter` or `$HOME/.config/tree-sitter`
* On Windows, `%APPDATA%\tree-sitter` or `$HOME\AppData\Roaming\tree-sitter`

```admonish info
The CLI will work if there's no config file present, falling back on default values for each configuration option.
```

When you run the `init-config` command, it will print out the location of the file that it creates so that you can easily
find and modify it.

The configuration file is a JSON file that contains the following fields:

## `parser-directories`

The [`tree-sitter highlight`](./highlight.md) command takes one or more file paths, and tries to automatically determine,
which language should be used to highlight those files. To do this, it needs to know *where* to look for Tree-sitter grammars
on your filesystem. You can control this using the `"parser-directories"` key in your configuration file:

```json
{
  "parser-directories": [
    "/Users/my-name/code",
    "~/other-code",
    "$HOME/another-code"
  ]
}
```

Any folder within one of these *parser directories* whose name begins with `tree-sitter-` will be treated as a Tree-sitter
grammar repository.

## `theme`

The [Tree-sitter highlighting system](../3-syntax-highlighting.md) works by annotating ranges of source code with logical
"highlight names" like `function.method`, `type.builtin`, `keyword`, etc. To decide what *color* should be used for rendering
each highlight, a *theme* is needed.

In your config file, the `"theme"` value is an object whose keys are dot-separated highlight names like
`function.builtin` or `keyword`, and whose values are JSON expressions that represent text styling parameters.

### Highlight Names

A theme can contain multiple keys that share a common subsequence. Examples:

* `variable` and `variable.parameter`
* `function`, `function.builtin`, and `function.method`

For a given highlight produced, styling will be determined based on the **longest matching theme key**. For example, the
highlight `function.builtin.static` would match the key `function.builtin` rather than `function`.

### Styling Values

Styling values can be any of the following:

* Integers from 0 to 255, representing ANSI terminal color ids.
* Strings like `"#e45649"` representing hexadecimal RGB colors.
* Strings naming basic ANSI colors like `"red"`, `"black"`, `"purple"`, or `"cyan"`.
* Objects with the following keys:
  * `color` — An integer or string as described above.
  * `underline` — A boolean indicating whether the text should be underlined.
  * `italic` — A boolean indicating whether the text should be italicized.
  * `bold` — A boolean indicating whether the text should be bold-face.

An example theme can be seen below:

```json
{
  "function": 26,
  "operator": {
    "bold": true,
    "color": 239
  },
  "variable.builtin": {
    "bold": true
  },
  "variable.parameter": {
    "underline": true
  },
  "type.builtin": {
    "color": 23,
    "bold": true
  },
  "keyword": 56,
  "type": 23,
  "number": {
    "bold": true,
    "color": 94
  },
  "constant": 94,
  "attribute": {
    "color": 124,
    "italic": true
  },
  "comment": {
    "color": 245,
    "italic": true
  },
  "constant.builtin": {
    "color": 94,
    "bold": true
  },
}
```

## `parse-theme`

The [`tree-sitter parse`](./parse.md) command will output a pretty-printed CST when the `-c/--cst` option is used. You can
control what colors are used for various parts of the tree in your configuration file.

```admonish note
Omitting a field will cause the relevant text to be rendered with its default color.
```

An example parse theme can be seen below:

```json
{
  "parse-theme": {
    // The color of node kinds
    "node-kind": [20, 20, 20],
    // The color of text associated with a node
    "node-text": [255, 255, 255],
    // The color of node fields
    "field": [42, 42, 42],
    // The color of the range information for unnamed nodes
    "row-color": [255, 255, 255],
    // The color of the range information for named nodes
    "row-color-named": [255, 130, 0],
    // The color of extra nodes
    "extra": [255, 0, 255],
    // The color of ERROR nodes
    "error": [255, 0, 0],
    // The color of MISSING nodes and their associated text
    "missing": [153, 75, 0],
    // The color of newline characters
    "line-feed": [150, 150, 150],
    // The color of backtick characters
    "backtick": [0, 200, 0],
    // The color of literals
    "literal": [0, 0, 200],
  }
}
```



================================================
FILE: docs/src/cli/init.md
================================================
# `tree-sitter init`

The `init` command is your starting point for creating a new grammar. When you run it, it sets up a repository with all
the essential files and structure needed for grammar development. Since the command includes git-related files by default,
we recommend using git for version control of your grammar.

```bash
tree-sitter init [OPTIONS] # Aliases: i
```

## Options

### `--update`

Update outdated generated files, if needed.

### `-p/--grammar-path <PATH>`

The path to the directory containing the grammar.

## Structure of `tree-sitter.json`

The main file of interest for users to configure is `tree-sitter.json`, which tells the CLI information about your grammar,
such as the location of queries.

### The `grammars` field

This field is an array of objects, though you typically only need one object in this array unless your repo has
multiple grammars (for example, `Typescript` and `TSX`).

### Example

Typically, the objects in the `"tree-sitter"` array only needs to specify a few keys:

```json
{
  "tree-sitter": [
    {
      "scope": "source.ruby",
      "file-types": [
        "rb",
        "gemspec",
        "Gemfile",
        "Rakefile"
      ],
      "first-line-regex": "#!.*\\bruby$"
    }
  ]
}
```

#### Basic Fields

These keys specify basic information about the parser:

- `scope` (required) — A string like `"source.js"` that identifies the language.
We strive to match the scope names used by popular [TextMate grammars][textmate] and by the [Linguist][linguist] library.

- `path` — A relative path from the directory containing `tree-sitter.json` to another directory containing the `src/`
folder, which contains the actual generated parser. The default value is `"."`
(so that `src/` is in the same folder as `tree-sitter.json`), and this very rarely needs to be overridden.

- `external-files` — A list of relative paths from the root dir of a
parser to files that should be checked for modifications during recompilation.
This is useful during development to have changes to other files besides scanner.c
be picked up by the cli.

#### Language Detection

These keys help to decide whether the language applies to a given file:

- `file-types` — An array of filename suffix strings. The grammar will be used for files whose names end with one of
these suffixes. Note that the suffix may match an *entire* filename.

- `first-line-regex` — A regex pattern that will be tested against the first line of a file
to determine whether this language applies to the file. If present, this regex will be used for any file whose
language does not match any grammar's `file-types`.

- `content-regex` — A regex pattern that will be tested against the contents of the file
to break ties in cases where multiple grammars matched the file using the above two criteria. If the regex matches,
this grammar will be preferred over another grammar with no `content-regex`. If the regex does not match, a grammar with
no `content-regex` will be preferred over this one.

- `injection-regex` — A regex pattern that will be tested against a *language name* to determine whether this language
should be used for a potential *language injection* site.
Language injection is described in more detail in [the relevant section](../3-syntax-highlighting.md#language-injection).

#### Query Paths

These keys specify relative paths from the directory containing `tree-sitter.json` to the files that control syntax highlighting:

- `highlights` — Path to a *highlight query*. Default: `queries/highlights.scm`
- `locals` — Path to a *local variable query*. Default: `queries/locals.scm`.
- `injections` — Path to an *injection query*. Default: `queries/injections.scm`.
- `tags` — Path to an *tag query*. Default: `queries/tags.scm`.

### The `metadata` field

This field contains information that tree-sitter will use to populate relevant bindings' files, especially their versions.
Typically, this will all be set up when you run `tree-sitter init`, but you are welcome to update it as you see fit.

- `version` (required) — The current version of your grammar, which should follow [semver][semver]
- `license` — The license of your grammar, which should be a valid [SPDX license][spdx]
- `description` — The brief description of your grammar
- `authors` (required) — An array of objects that contain a `name` field, and optionally an `email` and `url` field.
Each field is a string
- `links` — An object that contains a `repository` field, and optionally a `homepage` field. Each field is a string
- `namespace` — The namespace for the `Java` and `Kotlin` bindings, defaults to `io.github.tree-sitter` if not provided

### The `bindings` field

This field controls what bindings are generated when the `init` command is run.
Each key is a language name, and the value is a boolean.

- `c` (default: `true`)
- `go` (default: `true`)
- `java` (default: `false`)
- `kotlin` (default: `false`)
- `node` (default: `true`)
- `python` (default: `true`)
- `rust` (default: `true`)
- `swift` (default: `false`)

## Binding Files

When you run `tree-sitter init`, the CLI will also generate a number of files in your repository that allow for your parser
to be used from different language. Here is a list of these bindings files that are generated, and what their purpose is:

### C/C++

- `Makefile` — This file tells [`make`][make] how to compile your language.
- `CMakeLists.txt` — This file tells [`cmake`][cmake] how to compile your language.
- `bindings/c/tree_sitter/tree-sitter-language.h` — This file provides the C interface of your language.
- `bindings/c/tree-sitter-language.pc` — This file provides [pkg-config][pkg-config] metadata about your language's C library.
- `src/tree_sitter/parser.h` — This file provides some basic C definitions that are used in your generated `parser.c` file.
- `src/tree_sitter/alloc.h` — This file provides some memory allocation macros that are to be used in your external scanner,
if you have one.
- `src/tree_sitter/array.h` — This file provides some array macros that are to be used in your external scanner,
if you have one.

### Go

- `go.mod` — This file is the manifest of the Go module.
- `bindings/go/binding.go` — This file wraps your language in a Go module.
- `bindings/go/binding_test.go` — This file contains a test for the Go package.

### Node

- `binding.gyp` — This file tells Node.js how to compile your language.
- `package.json` — This file is the manifest of the Node.js package.
- `bindings/node/binding.cc` — This file wraps your language in a JavaScript module for Node.js.
- `bindings/node/index.js` — This is the file that Node.js initially loads when using your language.
- `bindings/node/index.d.ts` — This file provides type hints for your parser when used in TypeScript.
- `bindings/node/binding_test.js` — This file contains a test for the Node.js package.

### Python

- `pyproject.toml` — This file is the manifest of the Python package.
- `setup.py` — This file tells Python how to compile your language.
- `bindings/python/tree_sitter_language/binding.c` — This file wraps your language in a Python module.
- `bindings/python/tree_sitter_language/__init__.py` — This file tells Python how to load your language.
 `bindings/python/tree_sitter_language/__init__.pyi` — This file provides type hints for your parser when used in Python.
- `bindings/python/tree_sitter_language/py.typed` — This file provides type hints for your parser when used in Python.
- `bindings/python/tests/test_binding.py` — This file contains a test for the Python package.

### Rust

- `Cargo.toml` — This file is the manifest of the Rust package.
- `bindings/rust/lib.rs` — This file wraps your language in a Rust crate when used in Rust.
- `bindings/rust/build.rs` — This file wraps the building process for the Rust crate.

### Swift

- `Package.swift` — This file tells Swift how to compile your language.
- `bindings/swift/TreeSitterLanguage/language.h` — This file wraps your language in a Swift module when used in Swift.
- `bindings/swift/TreeSitterLanguageTests/TreeSitterLanguageTests.swift` — This file contains a test for the Swift package.

### Additional Files

Additionally, there's a few other files that are generated when you run `tree-sitter init`,
that aim to improve the development experience:

- `.editorconfig` — This file tells your editor how to format your code. More information about this file can be found [here][editorconfig]
- `.gitattributes` — This file tells Git how to handle line endings, and tells GitHub what files are generated.
- `.gitignore` — This file tells Git what files to ignore when committing changes.

[cmake]: https://cmake.org/cmake/help/latest
[editorconfig]: https://editorconfig.org
[linguist]: https://github.com/github/linguist
[make]: https://www.gnu.org/software/make/manual/make.html
[pkg-config]: https://www.freedesktop.org/wiki/Software/pkg-config
[semver]: https://semver.org
[spdx]: https://spdx.org/licenses
[textmate]: https://macromates.com/manual/en/language_grammars



================================================
FILE: docs/src/cli/parse.md
================================================
# `tree-sitter parse`

The `parse` command parses source files using a Tree-sitter parser. You can pass any number of file paths and glob patterns
to `tree-sitter parse`, and it will parse all the given files. The command will exit with a non-zero status code if any
parse errors occurred.

```bash
tree-sitter parse [OPTIONS] [PATHS]... # Aliases: p
```

## Options

### `--paths <PATHS_FILE>`

The path to a file that contains paths to source files to parse.

### `-p/--grammar-path <PATH>`

The path to the directory containing the grammar.

### `--scope <SCOPE>`

The language scope to use for parsing. This is useful when the language is ambiguous.

### `-d/--debug`

Outputs parsing and lexing logs. This logs to stderr.

### `-0/--debug-build`

Compile the parser with debug flags enabled. This is useful when debugging issues that require a debugger like `gdb` or `lldb`.

### `-D/--debug-graph`

Outputs logs of the graphs of the stack and parse trees during parsing, as well as the actual parsing and lexing message.
The graphs are constructed with [graphviz dot][dot], and the output is written to `log.html`.

### `--wasm`

Compile and run the parser as a WASM module.

### `--dot`

Output the parse tree with [graphviz dot][dot].

### `-x/--xml`

Output the parse tree in XML format.

### `-c/--cst`

Output the parse tree in a pretty-printed CST format.

### `-s/--stat`

Show parsing statistics.

### `--timeout <TIMEOUT>`

Set the timeout for parsing a single file, in microseconds.

### `-t/--time`

Print the time taken to parse the file. If edits are provided, this will also print the time taken to parse the file after
each edit.

### `-q/--quiet`

Suppress main output.

### `--edits <EDITS>...`

Apply edits after parsing the file. Edits are in the form of `row,col|position delcount insert_text` where row and col, or position are 0-indexed.

### `--encoding <ENCODING>`

Set the encoding of the input file. By default, the CLI will look for the [`BOM`][bom] to determine if the file is encoded
in `UTF-16BE` or `UTF-16LE`. If no `BOM` is present, `UTF-8` is the default. One of `utf8`, `utf16-le`, `utf16-be`.

### `--open-log`

When using the `--debug-graph` option, open the log file in the default browser.

### `-j/--json`

Output parsing results in a JSON format.

### `--config-path <CONFIG_PATH>`

The path to an alternative configuration (`config.json`) file. See [the init-config command](./init-config.md) for more information.

### `-n/--test-number <TEST_NUMBER>`

Parse a specific test in the corpus. The test number is the same number that appears in the output of `tree-sitter test`.

### `-r/--rebuild`

Force a rebuild of the parser before running tests.

### `--no-ranges`

Omit the node's ranges from the default parse output. This is useful when copying S-Expressions to a test file.

[dot]: https://graphviz.org/doc/info/lang.html
[bom]: https://en.wikipedia.org/wiki/Byte_order_mark



================================================
FILE: docs/src/cli/playground.md
================================================
# `tree-sitter playground`

The `playground` command allows you to start a local playground to test your parser interactively.

```bash
tree-sitter playground [OPTIONS] # Aliases: play, pg, web-ui
```

```admonish note
For this to work, you must have already built the parser as a WASM module. This can be done with the [`build`](./build.md) subcommand
(`tree-sitter build --wasm`).
```

## Options

### `-q/--quiet`

Don't automatically open the playground in the default browser.

### `--grammar-path <GRAMMAR_PATH>`

The path to the directory containing the grammar and wasm files.



================================================
FILE: docs/src/cli/query.md
================================================
# `tree-sitter query`

The `query` command is used to run a query on a parser, and view the results.

```bash
tree-sitter query [OPTIONS] <QUERY_PATH> [PATHS]... # Aliases: q
```

## Options

### `-p/--grammar-path <PATH>`

The path to the directory containing the grammar.

### `-t/--time`

Print the time taken to execute the query on the file.

### `-q/--quiet`

Suppress main output.

### `--paths <PATHS_FILE>`

The path to a file that contains paths to source files in which the query will be executed.

### `--byte-range <BYTE_RANGE>`

The range of byte offsets in which the query will be executed. The format is `start_byte:end_byte`.

### `--row-range <ROW_RANGE>`

The range of rows in which the query will be executed. The format is `start_row:end_row`.

### `--scope <SCOPE>`

The language scope to use for parsing and querying. This is useful when the language is ambiguous.

### `-c/--captures`

Order the query results by captures instead of matches.

### `--test`

Whether to run query tests or not.

### `--config-path <CONFIG_PATH>`

The path to an alternative configuration (`config.json`) file. See [the init-config command](./init-config.md) for more information.

### `-n/--test-number <TEST_NUMBER>`

Query the contents of a specific test.



================================================
FILE: docs/src/cli/tags.md
================================================
# `tree-sitter tags`

You can run symbol tagging on an arbitrary file using `tree-sitter tags`. This will output a list of tags.
For more information, see [the code navigation page](../4-code-navigation.md#tagging-and-captures).

```bash
tree-sitter tags [OPTIONS] [PATHS]...
```

## Options

### `--scope <SCOPE>`

The language scope to use for symbol tagging. This is useful when the language is ambiguous.

### `-t/--time`

Print the time taken to generate tags for the file.

### `-q/--quiet`

Suppress main output.

### `--paths <PATHS_FILE>`

The path to a file that contains paths to source files to tag.

### `-p/--grammar-path <PATH>`

The path to the directory containing the grammar.

### `--config-path <CONFIG_PATH>`

The path to an alternative configuration (`config.json`) file. See [the init-config command](./init-config.md) for more information.

### `-n/--test-number <TEST_NUMBER>`

Generate tags from the contents of a specific test.



================================================
FILE: docs/src/cli/test.md
================================================
# `tree-sitter test`

The `test` command is used to run the test suite for a parser.

```bash
tree-sitter test [OPTIONS] # Aliases: t
```

## Options

### `-i/--include <INCLUDE>`

Only run tests whose names match this regex.

### `-e/--exclude <EXCLUDE>`

Skip tests whose names match this regex.

### `--file-name <NAME>`

Only run tests from the given filename in the corpus.

### `-p/--grammar-path <PATH>`

The path to the directory containing the grammar.

### `-u/--update`

Update the expected output of tests.

```admonish info
Tests containing `ERROR` nodes or `MISSING` nodes will not be updated.
```

### `-d/--debug`

Outputs parsing and lexing logs. This logs to stderr.

### `-0/--debug-build`

Compile the parser with debug flags enabled. This is useful when debugging issues that require a debugger like `gdb` or `lldb`.

### `-D/--debug-graph`

Outputs logs of the graphs of the stack and parse trees during parsing, as well as the actual parsing and lexing message.
The graphs are constructed with [graphviz dot][dot], and the output is written to `log.html`.

### `--wasm`

Compile and run the parser as a WASM module.

### `--open-log`

When using the `--debug-graph` option, open the log file in the default browser.

### `--config-path <CONFIG_PATH>`

The path to an alternative configuration (`config.json`) file. See [the init-config command](./init-config.md) for more information.

### `--show-fields`

Force showing fields in test diffs.

### `--stat <STAT>`

Show parsing statistics when tests are being run. One of `all`, `outliers-and-total`, or `total-only`.

- `all`: Show statistics for every test.

- `outliers-and-total`: Show statistics only for outliers, and total statistics.

- `total-only`: Show only total statistics.

### `-r/--rebuild`

Force a rebuild of the parser before running tests.

### `--overview-only`

Only show the overview of the test results, and not the diff.



================================================
FILE: docs/src/cli/version.md
================================================
# `tree-sitter version`

The `version` command upgrades the version of your grammar.

```bash
tree-sitter version <VERSION> # Aliases: publish
```

This will update the version in several files, if they exist:

* tree-sitter.json
* Cargo.toml
* Cargo.lock
* package.json
* package-lock.json
* Makefile
* CMakeLists.txt
* pyproject.toml

As a grammar author, you should keep the version of your grammar in sync across
different bindings. However, doing so manually is error-prone and tedious, so
this command takes care of the burden. If you are using a version control system,
it is recommended to commit the changes made by this command, and to tag the
commit with the new version.

## Options

### `-p/--grammar-path <PATH>`

The path to the directory containing the grammar.



================================================
FILE: docs/src/creating-parsers/1-getting-started.md
================================================
# Getting Started

## Dependencies

To develop a Tree-sitter parser, there are two dependencies that you need to install:

- **A JavaScript runtime** — Tree-sitter grammars are written in JavaScript, and Tree-sitter uses a JavaScript runtime
(the default being [Node.js][node.js]) to interpret JavaScript files. It requires this runtime command (default: `node`)
to be in one of the directories in your [`PATH`][path-env].

- **A C Compiler** — Tree-sitter creates parsers that are written in C. To run and test these parsers with the
`tree-sitter parse` or `tree-sitter test` commands, you must have a C/C++ compiler installed. Tree-sitter will try to look
for these compilers in the standard places for each platform.

## Installation

To create a Tree-sitter parser, you need to use [the `tree-sitter` CLI][tree-sitter-cli]. You can install the CLI in a few
different ways:

- Build the `tree-sitter-cli` [Rust crate][crate] from source using [`cargo`][cargo], the Rust package manager. This works
on any platform. See [the contributing docs](../6-contributing.md#developing-tree-sitter) for more information.

- Install the `tree-sitter-cli` [Rust crate][crate] from [crates.io][crates.io] using [`cargo`][cargo]. You can do so by
running the following command: `cargo install tree-sitter-cli --locked`

- Install the `tree-sitter-cli` [Node.js module][node-module] using [`npm`][npm], the Node package manager. This approach
is fast, but it only works on certain platforms, because it relies on pre-built binaries.

- Download a binary for your platform from [the latest GitHub release][releases], and put it into a directory on your `PATH`.

## Project Setup

The preferred convention is to name the parser repository "tree-sitter-" followed by the name of the language, in lowercase.

```sh
mkdir tree-sitter-${LOWER_PARSER_NAME}
cd tree-sitter-${LOWER_PARSER_NAME}
```

```admonish note
The `LOWER_` prefix here means the "lowercase" name of the language.
```

### Init

Once you've installed the `tree-sitter` CLI tool, you can start setting up your project, which will allow your parser to
be used from multiple languages.

```sh
# This will prompt you for input
tree-sitter init
```

The `init` command will create a bunch of files in the project.
There should be a file called `grammar.js` with the following contents:

```js
/**
 * @file PARSER_DESCRIPTION
 * @author PARSER_AUTHOR_NAME PARSER_AUTHOR_EMAIL
 * @license PARSER_LICENSE
 */

/// <reference types="tree-sitter-cli/dsl" />
// @ts-check

module.exports = grammar({
  name: 'LOWER_PARSER_NAME',

  rules: {
    // TODO: add the actual grammar rules
    source_file: $ => 'hello'
  }
});
```

```admonish info
The placeholders shown above would be replaced with the corresponding data you provided in the `init` sub-command's
prompts.
```

To learn more about this command, check the [reference page](../cli/init.md).

### Generate

Next, run the following command:

```sh
tree-sitter generate
```

This will generate the C code required to parse this trivial language.

You can test this parser by creating a source file with the contents "hello" and parsing it:

```sh
echo 'hello' > example-file
tree-sitter parse example-file
```

Alternatively, in Windows PowerShell:

```pwsh
"hello" | Out-File example-file -Encoding utf8
tree-sitter parse example-file
```

This should print the following:

```text
(source_file [0, 0] - [1, 0])
```

You now have a working parser.

Finally, look back at the [triple-slash][] and [`@ts-check`][ts-check] comments in `grammar.js`; these tell your editor
to provide documentation and type information as you edit your grammar. For these to work, you must download Tree-sitter's
TypeScript API from npm into a `node_modules` directory in your project:

```sh
npm install # or your package manager of choice
```

To learn more about this command, check the [reference page](../cli/generate.md).

[cargo]: https://doc.rust-lang.org/cargo/getting-started/installation.html
[crate]: https://crates.io/crates/tree-sitter-cli
[crates.io]: https://crates.io/crates/tree-sitter-cli
[node-module]: https://www.npmjs.com/package/tree-sitter-cli
[node.js]: https://nodejs.org
[npm]: https://docs.npmjs.com
[path-env]: https://en.wikipedia.org/wiki/PATH_(variable)
[releases]: https://github.com/tree-sitter/tree-sitter/releases/latest
[tree-sitter-cli]: https://github.com/tree-sitter/tree-sitter/tree/master/cli
[triple-slash]: https://www.typescriptlang.org/docs/handbook/triple-slash-directives.html
[ts-check]: https://www.typescriptlang.org/docs/handbook/intro-to-js-ts.html



================================================
FILE: docs/src/creating-parsers/2-the-grammar-dsl.md
================================================
# The Grammar DSL

The following is a complete list of built-in functions you can use in your `grammar.js` to define rules. Use-cases for some
of these functions will be explained in more detail in later sections.

- **Symbols (the `$` object)** — Every grammar rule is written as a JavaScript function that takes a parameter conventionally
called `$`. The syntax `$.identifier` is how you refer to another grammar symbol within a rule. Names starting with `$.MISSING`
or `$.UNEXPECTED` should be avoided as they have special meaning for the `tree-sitter test` command.
- **String and Regex literals** — The terminal symbols in a grammar are described using JavaScript strings and regular
expressions. Of course during parsing, Tree-sitter does not actually use JavaScript's regex engine to evaluate these regexes;
it generates its own regex-matching logic based on the Rust regex syntax as part of each parser. Regex literals are just
used as a convenient way of writing regular expressions in your grammar. You can use Rust regular expressions in your grammar
DSL through the `RustRegex` class. Simply pass your regex pattern as a string:

  ```js
  new RustRegex('(?i)[a-z_][a-z0-9_]*') // matches a simple identifier
  ```

  Unlike JavaScript's builtin `RegExp` class, which takes a pattern and flags as separate arguments, `RustRegex` only
  accepts a single pattern string. While it doesn't support separate flags, you can use inline flags within the pattern itself.
  For more details about Rust's regex syntax and capabilities, check out the [Rust regex documentation][rust regex].

  ```admonish note
  Only a subset of the Regex engine is actually supported. This is due to certain features like lookahead and lookaround
  assertions not feasible to use in an LR(1) grammar, as well as certain flags being unnecessary for tree-sitter. However,
  plenty of features are supported by default:

  - Character classes
  - Character ranges
  - Character sets
  - Quantifiers
  - Alternation
  - Grouping
  - Unicode character escapes
  - Unicode property escapes
  ```

- **Sequences : `seq(rule1, rule2, ...)`** — This function creates a rule that matches any number of other rules, one after
another. It is analogous to simply writing multiple symbols next to each other in [EBNF notation][ebnf].

- **Alternatives : `choice(rule1, rule2, ...)`** — This function creates a rule that matches *one* of a set of possible
rules. The order of the arguments does not matter. This is analogous to the `|` (pipe) operator in EBNF notation.

- **Repetitions : `repeat(rule)`** — This function creates a rule that matches *zero-or-more* occurrences of a given rule.
It is analogous to the `{x}` (curly brace) syntax in EBNF notation.

- **Repetitions : `repeat1(rule)`** — This function creates a rule that matches *one-or-more* occurrences of a given rule.
The previous `repeat` rule is implemented in `repeat1` but is included because it is very commonly used.

- **Options : `optional(rule)`** — This function creates a rule that matches *zero or one* occurrence of a given rule.
It is analogous to the `[x]` (square bracket) syntax in EBNF notation.

- **Precedence : `prec(number, rule)`** — This function marks the given rule with a numerical precedence, which will be used
to resolve [*LR(1) Conflicts*][lr-conflict] at parser-generation time. When two rules overlap in a way that represents either
a true ambiguity or a *local* ambiguity given one token of lookahead, Tree-sitter will try to resolve the conflict by matching
the rule with the higher precedence. The default precedence of all rules is zero. This works similarly to the
[precedence directives][yacc-prec] in Yacc grammars.

  This function can also be used to assign lexical precedence to a given
  token, but it must be wrapped in a `token` call, such as `token(prec(1, 'foo'))`. This reads as "the token `foo` has a
  lexical precedence of 1". The purpose of lexical precedence is to solve the issue where multiple tokens can match the same
  set of characters, but one token should be preferred over the other. See [Lexical Precedence vs Parse Precedence][lexical vs parse]
  for a more detailed explanation.

- **Left Associativity : `prec.left([number], rule)`** — This function marks the given rule as left-associative (and optionally
applies a numerical precedence). When an LR(1) conflict arises in which all the rules have the same numerical precedence,
Tree-sitter will consult the rules' associativity. If there is a left-associative rule, Tree-sitter will prefer matching
a rule that ends *earlier*. This works similarly to [associativity directives][yacc-prec] in Yacc grammars.

- **Right Associativity : `prec.right([number], rule)`** — This function is like `prec.left`, but it instructs Tree-sitter
to prefer matching a rule that ends *later*.

- **Dynamic Precedence : `prec.dynamic(number, rule)`** — This function is similar to `prec`, but the given numerical precedence
is applied at *runtime* instead of at parser generation time. This is only necessary when handling a conflict dynamically
using the `conflicts` field in the grammar, and when there is a genuine *ambiguity*: multiple rules correctly match a given
piece of code. In that event, Tree-sitter compares the total dynamic precedence associated with each rule, and selects the
one with the highest total. This is similar to [dynamic precedence directives][bison-dprec] in Bison grammars.

- **Tokens : `token(rule)`** — This function marks the given rule as producing only
a single token. Tree-sitter's default is to treat each String or RegExp literal
in the grammar as a separate token. Each token is matched separately by the lexer
and returned as its own leaf node in the tree. The `token` function allows you to
express a complex rule using the functions described above (rather than as a single
regular expression) but still have Tree-sitter treat it as a single token.
The token function will only accept terminal rules, so `token($.foo)` will not work.
You can think of it as a shortcut for squashing complex rules of strings or regexes
down to a single token.

- **Immediate Tokens : `token.immediate(rule)`** — Usually, whitespace (and any other extras, such as comments) is optional
before each token. This function means that the token will only match if there is no whitespace.

- **Aliases : `alias(rule, name)`** — This function causes the given rule to *appear* with an alternative name in the syntax
tree. If `name` is a *symbol*, as in `alias($.foo, $.bar)`, then the aliased rule will *appear* as a [named node][named-vs-anonymous-nodes]
called `bar`. And if `name` is a *string literal*, as in `alias($.foo, 'bar')`, then the aliased rule will appear as an
[anonymous node][named-vs-anonymous-nodes], as if the rule had been written as the simple string.

- **Field Names : `field(name, rule)`** — This function assigns a *field name* to the child node(s) matched by the given
rule. In the resulting syntax tree, you can then use that field name to access specific children.

- **Reserved Keywords : `reserved(wordset, rule)`**  — This function will override the global reserved word set with the
one passed into the `wordset` parameter. This is useful for contextual keywords, such as `if` in JavaScript, which cannot
be used as a variable name in most contexts, but can be used as a property name.

In addition to the `name` and `rules` fields, grammars have a few other optional public fields that influence the behavior
of the parser. Each of these fields is a function that accepts the grammar object (`$`) as its only parameter, like the
grammar rules themselves. These fields are:

- **`extras`** — an array of tokens that may appear *anywhere* in the language. This is often used for whitespace and
comments. The default value of `extras` is to accept whitespace. To control whitespace explicitly, specify
`extras: $ => []` in your grammar.

- **`inline`** — an array of rule names that should be automatically *removed* from the grammar by replacing all of their
usages with a copy of their definition. This is useful for rules that are used in multiple places but for which you *don't*
want to create syntax tree nodes at runtime.

- **`conflicts`** — an array of arrays of rule names. Each inner array represents a set of rules that's involved in an
*LR(1) conflict* that is *intended to exist* in the grammar. When these conflicts occur at runtime, Tree-sitter will use
the GLR algorithm to explore all the possible interpretations. If *multiple* parses end up succeeding, Tree-sitter will pick
the subtree whose corresponding rule has the highest total *dynamic precedence*.

- **`externals`** — an array of token names which can be returned by an
[*external scanner*][external-scanners]. External scanners allow you to write custom C code which runs during the lexing
process to handle lexical rules (e.g. Python's indentation tokens) that cannot be described by regular expressions.

- **`precedences`** — an array of arrays of strings, where each array of strings defines named precedence levels in descending
order. These names can be used in the `prec` functions to define precedence relative only to other names in the array, rather
than globally. Can only be used with parse precedence, not lexical precedence.

- **`word`** — the name of a token that will match keywords to the
[keyword extraction][keyword-extraction] optimization.

- **`supertypes`** — an array of hidden rule names which should be considered to be 'supertypes' in the generated
[*node types* file][static-node-types].

- **`reserved`** — similar in structure to the main `rules` property, an object of reserved word sets associated with an
array of reserved rules. The reserved rule in the array must be a terminal token meaning it must be a string, regex, or token,
or a terminal rule. The *first* reserved word set in the object is the global word set, meaning it applies to every rule
in every parse state. However, certain keywords are contextual, depending on the rule. For example, in JavaScript, keywords
are typically not allowed as ordinary variables, however, they *can* be used as a property name. In this situation, the `reserved`
function would be used, and the word set to pass in would be the name of the word set that is declared in the `reserved`
object that corresponds to an empty array, signifying *no* keywords are reserved.

[bison-dprec]: https://www.gnu.org/software/bison/manual/html_node/Generalized-LR-Parsing.html
[ebnf]: https://en.wikipedia.org/wiki/Extended_Backus%E2%80%93Naur_form
[external-scanners]: ./4-external-scanners.md
[keyword-extraction]: ./3-writing-the-grammar.md#keyword-extraction
[lexical vs parse]: ./3-writing-the-grammar.md#lexical-precedence-vs-parse-precedence
[lr-conflict]: https://en.wikipedia.org/wiki/LR_parser#Conflicts_in_the_constructed_tables
[named-vs-anonymous-nodes]: ../using-parsers/2-basic-parsing.md#named-vs-anonymous-nodes
[rust regex]: https://docs.rs/regex/1.1.8/regex/#grouping-and-flags
[static-node-types]: ../using-parsers/6-static-node-types.md
[yacc-prec]: https://docs.oracle.com/cd/E19504-01/802-5880/6i9k05dh3/index.html



================================================
FILE: docs/src/creating-parsers/3-writing-the-grammar.md
================================================
# Writing the Grammar

Writing a grammar requires creativity. There are an infinite number of CFGs (context-free grammars) that can be used to describe
any given language. To produce a good Tree-sitter parser, you need to create a grammar with two important properties:

1. **An intuitive structure** — Tree-sitter's output is a [concrete syntax tree][cst]; each node in the tree corresponds
directly to a [terminal or non-terminal symbol][non-terminal] in the grammar. So to produce an easy-to-analyze tree, there
should be a direct correspondence between the symbols in your grammar and the recognizable constructs in the language.
This might seem obvious, but it is very different from the way that context-free grammars are often written in contexts
like [language specifications][language-spec] or [Yacc][yacc]/[Bison][bison] parsers.

2. **A close adherence to LR(1)** — Tree-sitter is based on the [GLR parsing][glr-parsing] algorithm. This means that while
it can handle any context-free grammar, it works most efficiently with a class of context-free grammars called [LR(1) Grammars][lr-grammars].
In this respect, Tree-sitter's grammars are similar to (but less restrictive than) [Yacc][yacc] and [Bison][bison] grammars,
but _different_ from [ANTLR grammars][antlr], [Parsing Expression Grammars][peg], or the [ambiguous grammars][ambiguous-grammar]
commonly used in language specifications.

It's unlikely that you'll be able to satisfy these two properties just by translating an existing context-free grammar directly
into Tree-sitter's grammar format. There are a few kinds of adjustments that are often required.
The following sections will explain these adjustments in more depth.

## The First Few Rules

It's usually a good idea to find a formal specification for the language you're trying to parse. This specification will
most likely contain a context-free grammar. As you read through the rules of this CFG, you will probably discover a complex
and cyclic graph of relationships. It might be unclear how you should navigate this graph as you define your grammar.

Although languages have very different constructs, their constructs can often be categorized in to similar groups like
_Declarations_, _Definitions_, _Statements_, _Expressions_, _Types_ and _Patterns_. In writing your grammar, a good first
step is to create just enough structure to include all of these basic _groups_ of symbols. For a language like Go,
you might start with something like this:

```js
{
  // ...

  rules: {
    source_file: $ => repeat($._definition),

    _definition: $ => choice(
      $.function_definition
      // TODO: other kinds of definitions
    ),

    function_definition: $ => seq(
      'func',
      $.identifier,
      $.parameter_list,
      $._type,
      $.block
    ),

    parameter_list: $ => seq(
      '(',
       // TODO: parameters
      ')'
    ),

    _type: $ => choice(
      'bool'
      // TODO: other kinds of types
    ),

    block: $ => seq(
      '{',
      repeat($._statement),
      '}'
    ),

    _statement: $ => choice(
      $.return_statement
      // TODO: other kinds of statements
    ),

    return_statement: $ => seq(
      'return',
      $._expression,
      ';'
    ),

    _expression: $ => choice(
      $.identifier,
      $.number
      // TODO: other kinds of expressions
    ),

    identifier: $ => /[a-z]+/,

    number: $ => /\d+/
  }
}
```

One important fact to know up front is that the start rule for the grammar is the first property in the `rules` object.
In the example above, that would correspond to `source_file`, but it can be named anything.

Some details of this grammar will be explained in more depth later on, but if you focus on the `TODO` comments, you can
see that the overall strategy is _breadth-first_. Notably, this initial skeleton does not need to directly match an exact
subset of the context-free grammar in the language specification. It just needs to touch on the major groupings of rules
in as simple and obvious a way as possible.

With this structure in place, you can now freely decide what part of the grammar to flesh out next. For example, you might
decide to start with _types_. One-by-one, you could define the rules for writing basic types and composing them into more
complex types:

```js
{
  // ...

  _type: $ => choice(
    $.primitive_type,
    $.array_type,
    $.pointer_type
  ),

  primitive_type: $ => choice(
    'bool',
    'int'
  ),

  array_type: $ => seq(
    '[',
    ']',
    $._type
  ),

  pointer_type: $ => seq(
    '*',
    $._type
  )
}
```

After developing the _type_ sublanguage a bit further, you might decide to switch to working on _statements_ or _expressions_
instead. It's often useful to check your progress by trying to parse some real code using `tree-sitter parse`.

**And remember to add tests for each rule in your `test/corpus` folder!**

## Structuring Rules Well

Imagine that you were just starting work on the [Tree-sitter JavaScript parser][tree-sitter-javascript]. Naively, you might
try to directly mirror the structure of the [ECMAScript Language Spec][ecmascript-spec]. To illustrate the problem with this
approach, consider the following line of code:

```js
return x + y;
```

According to the specification, this line is a `ReturnStatement`, the fragment `x + y` is an `AdditiveExpression`,
and `x` and `y` are both `IdentifierReferences`. The relationship between these constructs is captured by a complex series
of production rules:

```text
ReturnStatement          ->  'return' Expression
Expression               ->  AssignmentExpression
AssignmentExpression     ->  ConditionalExpression
ConditionalExpression    ->  LogicalORExpression
LogicalORExpression      ->  LogicalANDExpression
LogicalANDExpression     ->  BitwiseORExpression
BitwiseORExpression      ->  BitwiseXORExpression
BitwiseXORExpression     ->  BitwiseANDExpression
BitwiseANDExpression     ->  EqualityExpression
EqualityExpression       ->  RelationalExpression
RelationalExpression     ->  ShiftExpression
ShiftExpression          ->  AdditiveExpression
AdditiveExpression       ->  MultiplicativeExpression
MultiplicativeExpression ->  ExponentiationExpression
ExponentiationExpression ->  UnaryExpression
UnaryExpression          ->  UpdateExpression
UpdateExpression         ->  LeftHandSideExpression
LeftHandSideExpression   ->  NewExpression
NewExpression            ->  MemberExpression
MemberExpression         ->  PrimaryExpression
PrimaryExpression        ->  IdentifierReference
```

The language spec encodes the twenty different precedence levels of JavaScript expressions using twenty levels of indirection
between `IdentifierReference` and `Expression`. If we were to create a concrete syntax tree representing this statement
according to the language spec, it would have twenty levels of nesting, and it would contain nodes with names like `BitwiseXORExpression`,
which are unrelated to the actual code.

## Standard Rule Names

Tree-sitter places no restrictions on how to name the rules of your grammar. It can be helpful, however, to follow certain conventions
used by many other established grammars in the ecosystem. Some of these well-established patterns are listed below:

- `source_file`: Represents an entire source file, this rule is commonly used as the root node for a grammar,
- `expression`/`statement`: Used to represent statements and expressions for a given language. Commonly defined as a choice between several
more specific sub-expression/sub-statement rules.
- `block`: Used as the parent node for block scopes, with its children representing the block's contents.
- `type`: Represents the types of a language such as `int`, `char`, and `void`.
- `identifier`: Used for constructs like variable names, function arguments, and object fields; this rule is commonly used as the `word`
token in grammars.
- `string`: Used to represent `"string literals"`.
- `comment`: Used to represent comments, this rule is commonly used as an `extra`.

## Using Precedence

To produce a readable syntax tree, we'd like to model JavaScript expressions using a much flatter structure like this:

```js
{
  // ...

  _expression: $ => choice(
    $.identifier,
    $.unary_expression,
    $.binary_expression,
    // ...
  ),

  unary_expression: $ => choice(
    seq('-', $._expression),
    seq('!', $._expression),
    // ...
  ),

  binary_expression: $ => choice(
    seq($._expression, '*', $._expression),
    seq($._expression, '+', $._expression),
    // ...
  ),
}
```

Of course, this flat structure is highly ambiguous. If we try to generate a parser, Tree-sitter gives us an error message:

```text
Error: Unresolved conflict for symbol sequence:

  '-'  _expression  •  '*'  …

Possible interpretations:

  1:  '-'  (binary_expression  _expression  •  '*'  _expression)
  2:  (unary_expression  '-'  _expression)  •  '*'  …

Possible resolutions:

  1:  Specify a higher precedence in `binary_expression` than in the other rules.
  2:  Specify a higher precedence in `unary_expression` than in the other rules.
  3:  Specify a left or right associativity in `unary_expression`
  4:  Add a conflict for these rules: `binary_expression` `unary_expression`
```

```admonish hint
The • character in the error message indicates where exactly during
parsing the conflict occurs, or in other words, where the parser is encountering
ambiguity.
```

For an expression like `-a * b`, it's not clear whether the `-` operator applies to the `a * b` or just to the `a`. This
is where the `prec` function [described in the previous page][grammar dsl] comes into play. By wrapping a rule with `prec`,
we can indicate that certain sequence of symbols should _bind to each other more tightly_ than others. For example, the
`'-', $._expression` sequence in `unary_expression` should bind more tightly than the `$._expression, '+', $._expression`
sequence in `binary_expression`:

```js
{
  // ...

  unary_expression: $ =>
    prec(
      2,
      choice(
        seq("-", $._expression),
        seq("!", $._expression),
        // ...
      ),
    );
}
```

## Using Associativity

Applying a higher precedence in `unary_expression` fixes that conflict, but there is still another conflict:

```text
Error: Unresolved conflict for symbol sequence:

  _expression  '*'  _expression  •  '*'  …

Possible interpretations:

  1:  _expression  '*'  (binary_expression  _expression  •  '*'  _expression)
  2:  (binary_expression  _expression  '*'  _expression)  •  '*'  …

Possible resolutions:

  1:  Specify a left or right associativity in `binary_expression`
  2:  Add a conflict for these rules: `binary_expression`
```

For an expression like `a * b * c`, it's not clear whether we mean `a * (b * c)` or `(a * b) * c`.
This is where `prec.left` and `prec.right` come into use. We want to select the second interpretation, so we use `prec.left`.

```js
{
  // ...

  binary_expression: $ => choice(
    prec.left(2, seq($._expression, '*', $._expression)),
    prec.left(1, seq($._expression, '+', $._expression)),
    // ...
  ),
}
```

## Using Conflicts

Sometimes, conflicts are actually desirable. In our JavaScript grammar, expressions and patterns can create intentional ambiguity.
A construct like `[x, y]` could be legitimately parsed as both an array literal (like in `let a = [x, y]`) or as a destructuring
pattern (like in `let [x, y] = arr`).

```js
module.exports = grammar({
  name: "javascript",

  rules: {
    expression: $ => choice(
      $.identifier,
      $.array,
      $.pattern,
    ),

    array: $ => seq(
      "[",
      optional(seq(
        $.expression, repeat(seq(",", $.expression))
      )),
      "]"
    ),

    array_pattern: $ => seq(
      "[",
      optional(seq(
        $.pattern, repeat(seq(",", $.pattern))
      )),
      "]"
    ),

    pattern: $ => choice(
      $.identifier,
      $.array_pattern,
    ),
  },
})
```

In such cases, we want the parser to explore both possibilities by explicitly declaring this ambiguity:

```js
{
  name: "javascript",

  conflicts: $ => [
    [$.array, $.array_pattern],
  ],

  rules: {
    // ...
  },
}
```

```admonish note
The example is a bit contrived for the purpose of illustrating the usage of conflicts. The actual JavaScript grammar isn't
structured like that, but this conflict is actually present in the
[Tree-sitter JavaScript grammar](https://github.com/tree-sitter/tree-sitter-javascript/blob/108b2d4d17a04356a340aea809e4dd5b801eb40d/grammar.js#L100).
```

## Hiding Rules

You may have noticed in the above examples that some grammar rule name like `_expression` and `_type` began with an underscore.
Starting a rule's name with an underscore causes the rule to be _hidden_ in the syntax tree. This is useful for rules like
`_expression` in the grammars above, which always just wrap a single child node. If these nodes were not hidden, they would
add substantial depth and noise to the syntax tree without making it any easier to understand.

## Using Fields

Often, it's easier to analyze a syntax node if you can refer to its children by _name_ instead of by their position in an
ordered list. Tree-sitter grammars support this using the `field` function. This function allows you to assign unique names
to some or all of a node's children:

```js
function_definition: $ =>
  seq(
    "func",
    field("name", $.identifier),
    field("parameters", $.parameter_list),
    field("return_type", $._type),
    field("body", $.block),
  );
```

Adding fields like this allows you to retrieve nodes using the [field APIs][field-names-section].

# Lexical Analysis

Tree-sitter's parsing process is divided into two phases: parsing (which is described above) and [lexing][lexing] — the
process of grouping individual characters into the language's fundamental _tokens_. There are a few important things to
know about how Tree-sitter's lexing works.

## Conflicting Tokens

Grammars often contain multiple tokens that can match the same characters. For example, a grammar might contain the tokens
(`"if"` and `/[a-z]+/`). Tree-sitter differentiates between these conflicting tokens in a few ways.

1. **Context-aware Lexing** — Tree-sitter performs lexing on-demand, during the parsing process. At any given position
in a source document, the lexer only tries to recognize tokens that are _valid_ at that position in the document.

2. **Lexical Precedence** — When the precedence functions described [in the previous page][grammar dsl] are used _within_
the `token` function, the given explicit precedence values serve as instructions to the lexer. If there are two valid tokens
that match the characters at a given position in the document, Tree-sitter will select the one with the higher precedence.

3. **Match Length** — If multiple valid tokens with the same precedence match the characters at a given position in a document,
Tree-sitter will select the token that matches the [longest sequence of characters][longest-match].

4. **Match Specificity** — If there are two valid tokens with the same precedence, and they both match the same number
of characters, Tree-sitter will prefer a token that is specified in the grammar as a `String` over a token specified as
a `RegExp`.

5. **Rule Order** — If none of the above criteria can be used to select one token over another, Tree-sitter will prefer
the token that appears earlier in the grammar.

If there is an external scanner it may have [an additional impact][external scanner] over regular tokens
defined in the grammar.

## Lexical Precedence vs. Parse Precedence

One common mistake involves not distinguishing _lexical precedence_ from _parse precedence_. Parse precedence determines
which rule is chosen to interpret a given sequence of tokens. _Lexical precedence_ determines which token is chosen to interpret
at a given position of text, and it is a lower-level operation that is done first. The above list fully captures Tree-sitter's
lexical precedence rules, and you will probably refer back to this section of the documentation more often than any other.
Most of the time when you really get stuck, you're dealing with a lexical precedence problem. Pay particular attention to
the difference in meaning between using `prec` inside the `token` function versus outside it. The _lexical precedence_ syntax,
as mentioned in the previous page, is `token(prec(N, ...))`.

## Keywords

Many languages have a set of _keyword_ tokens (e.g. `if`, `for`, `return`), as well as a more general token (e.g. `identifier`)
that matches any word, including many of the keyword strings. For example, JavaScript has a keyword `instanceof`, which is
used as a binary operator, like this:

```js
if (a instanceof Something) b();
```

The following, however, is not valid JavaScript:

```js
if (a instanceofSomething) b();
```

A keyword like `instanceof` cannot be followed immediately by another letter, because then it would be tokenized as an `identifier`,
**even though an identifier is not valid at that position**. Because Tree-sitter uses context-aware lexing, as described
[above](#conflicting-tokens), it would not normally impose this restriction. By default, Tree-sitter would recognize `instanceofSomething`
as two separate tokens: the `instanceof` keyword followed by an `identifier`.

## Keyword Extraction

Fortunately, Tree-sitter has a feature that allows you to fix this, so that you can match the behavior of other standard
parsers: the `word` token. If you specify a `word` token in your grammar, Tree-sitter will find the set of _keyword_ tokens
that match strings also matched by the `word` token. Then, during lexing, instead of matching each of these keywords individually,
Tree-sitter will match the keywords via a two-step process where it _first_ matches the `word` token.

For example, suppose we added `identifier` as the `word` token in our JavaScript grammar:

```js
grammar({
  name: "javascript",

  word: $ => $.identifier,

  rules: {
    _expression: $ =>
      choice(
        $.identifier,
        $.unary_expression,
        $.binary_expression,
        // ...
      ),

    binary_expression: $ =>
      choice(
        prec.left(1, seq($._expression, "instanceof", $._expression)),
        // ...
      ),

    unary_expression: $ =>
      choice(
        prec.left(2, seq("typeof", $._expression)),
        // ...
      ),

    identifier: $ => /[a-z_]+/,
  },
});
```

Tree-sitter would identify `typeof` and `instanceof` as keywords. Then, when parsing the invalid code above, rather than
scanning for the `instanceof` token individually, it would scan for an `identifier` first, and find `instanceofSomething`.
It would then correctly recognize the code as invalid.

Aside from improving error detection, keyword extraction also has performance benefits. It allows Tree-sitter to generate
a smaller, simpler lexing function, which means that **the parser will compile much more quickly**.

```admonish note
The word token must be a unique token that is not reused by another rule. If you want to have a word token used in a
rule that's called something else, you should just alias the word token instead, like how the Rust grammar does it
<a href="https://github.com/tree-sitter/tree-sitter-rust/blob/1f63b33efee17e833e0ea29266dd3d713e27e321/grammar.js#L1605">here</a>
```

[ambiguous-grammar]: https://en.wikipedia.org/wiki/Ambiguous_grammar
[antlr]: https://www.antlr.org
[bison]: https://en.wikipedia.org/wiki/GNU_bison
[cst]: https://en.wikipedia.org/wiki/Parse_tree
[ecmascript-spec]: https://262.ecma-international.org/6.0/
[external scanner]: ./4-external-scanners.md#other-external-scanner-details
[glr-parsing]: https://en.wikipedia.org/wiki/GLR_parser
[grammar dsl]: ./2-the-grammar-dsl.md
[language-spec]: https://en.wikipedia.org/wiki/Programming_language_specification
[lexing]: https://en.wikipedia.org/wiki/Lexical_analysis
[longest-match]: https://en.wikipedia.org/wiki/Maximal_munch
[lr-grammars]: https://en.wikipedia.org/wiki/LR_parser
[field-names-section]: ../using-parsers/2-basic-parsing.md#node-field-names
[non-terminal]: https://en.wikipedia.org/wiki/Terminal_and_nonterminal_symbols
[peg]: https://en.wikipedia.org/wiki/Parsing_expression_grammar
[tree-sitter-javascript]: https://github.com/tree-sitter/tree-sitter-javascript
[yacc]: https://en.wikipedia.org/wiki/Yacc



================================================
FILE: docs/src/creating-parsers/4-external-scanners.md
================================================
# External Scanners

Many languages have some tokens whose structure is impossible or inconvenient to describe with a regular expression.
Some examples:

- [Indent and dedent][indent-tokens] tokens in Python
- [Heredocs][heredoc] in Bash and Ruby
- [Percent strings][percent-string] in Ruby

Tree-sitter allows you to handle these kinds of tokens using _external scanners_. An external scanner is a set of C functions
that you, the grammar author, can write by hand to add custom logic for recognizing certain tokens.

To use an external scanner, there are a few steps. First, add an `externals` section to your grammar. This section should
list the names of all of your external tokens. These names can then be used elsewhere in your grammar.

```js
grammar({
  name: "my_language",

  externals: $ => [$.indent, $.dedent, $.newline],

  // ...
});
```

Then, add another C source file to your project. Its path must be src/scanner.c for the CLI to recognize it. Be sure to add
this file to the sources section of your `binding.gyp` file so that it will be included when your project is compiled by
Node.js and uncomment the appropriate block in your bindings/rust/build.rs file so that it will be included in your Rust
crate.

In this new source file, define an [`enum`][enum] type containing the names of all of your external tokens. The ordering
of this enum must match the order in your grammar's `externals` array; the actual names do not matter.

```c
#include "tree_sitter/parser.h"
#include "tree_sitter/alloc.h"
#include "tree_sitter/array.h"

enum TokenType {
  INDENT,
  DEDENT,
  NEWLINE
}
```

Finally, you must define five functions with specific names, based on your language's name and five actions:
_create_, _destroy_, _serialize_, _deserialize_, and _scan_.

## Create

```c
void * tree_sitter_my_language_external_scanner_create() {
  // ...
}
```

This function should create your scanner object. It will only be called once anytime your language is set on a parser.
Often, you will want to allocate memory on the heap and return a pointer to it. If your external scanner doesn't need to
maintain any state, it's ok to return `NULL`.

## Destroy

```c
void tree_sitter_my_language_external_scanner_destroy(void *payload) {
  // ...
}
```

This function should free any memory used by your scanner. It is called once when a parser is deleted or assigned a different
language. It receives as an argument the same pointer that was returned from the _create_ function. If your _create_ function
didn't allocate any memory, this function can be a no-op.

## Serialize

```c
unsigned tree_sitter_my_language_external_scanner_serialize(
  void *payload,
  char *buffer
) {
  // ...
}
```

This function should copy the complete state of your scanner into a given byte buffer, and return the number of bytes written.
The function is called every time the external scanner successfully recognizes a token. It receives a pointer to your scanner
and a pointer to a buffer. The maximum number of bytes that you can write is given by the `TREE_SITTER_SERIALIZATION_BUFFER_SIZE`
constant, defined in the `tree_sitter/parser.h` header file.

The data that this function writes will ultimately be stored in the syntax tree so that the scanner can be restored to the
right state when handling edits or ambiguities. For your parser to work correctly, the `serialize` function must store its
entire state, and `deserialize` must restore the entire state. For good performance, you should design your scanner so that
its state can be serialized as quickly and compactly as possible.

## Deserialize

```c
void tree_sitter_my_language_external_scanner_deserialize(
  void *payload,
  const char *buffer,
  unsigned length
) {
  // ...
}
```

This function should _restore_ the state of your scanner based the bytes that were previously written by the `serialize`
function. It is called with a pointer to your scanner, a pointer to the buffer of bytes, and the number of bytes that should
be read. It is good practice to explicitly erase your scanner state variables at the start of this function, before restoring
their values from the byte buffer.

## Scan

Typically, one will

- Call `lexer->advance` several times, if the characters are valid for the token being lexed.

- Optionally, call `lexer->mark_end` to mark the end of the token, and "peek ahead"
to check if the next character (or set of characters) invalidates the token.

- Set `lexer->result_symbol` to the token type.

- Return `true` from the scanning function, indicating that a token was successfully lexed.

Tree-sitter will then push resulting node to the parse stack, and the input position will remain where it reached at the
point `lexer->mark_end` was called.

```c
bool tree_sitter_my_language_external_scanner_scan(
  void *payload,
  TSLexer *lexer,
  const bool *valid_symbols
) {
  // ...
}
```

The second parameter to this function is the lexer, of type `TSLexer`. The `TSLexer` struct has the following fields:

- **`int32_t lookahead`** — The current next character in the input stream, represented as a 32-bit unicode code point.

- **`TSSymbol result_symbol`** — The symbol that was recognized. Your scan function should _assign_ to this field one of
the values from the `TokenType` enum, described above.

- **`void (*advance)(TSLexer *, bool skip)`** — A function for advancing to the next character. If you pass `true` for
the second argument, the current character will be treated as whitespace; whitespace won't be included in the text range
associated with tokens emitted by the external scanner.

- **`void (*mark_end)(TSLexer *)`** — A function for marking the end of the recognized token. This allows matching tokens
that require multiple characters of lookahead. By default, (if you don't call `mark_end`), any character that you moved past
using the `advance` function will be included in the size of the token. But once you call `mark_end`, then any later calls
to `advance` will _not_ increase the size of the returned token. You can call `mark_end` multiple times to increase the size
of the token.

- **`uint32_t (*get_column)(TSLexer *)`** — A function for querying the current column position of the lexer. It returns
the number of codepoints since the start of the current line. The codepoint position is recalculated on every call to this
function by reading from the start of the line.

- **`bool (*is_at_included_range_start)(const TSLexer *)`** — A function for checking whether the parser has just skipped
some characters in the document. When parsing an embedded document using the `ts_parser_set_included_ranges` function
(described in the [multi-language document section][multi-language-section]), the scanner may want to apply some special
behavior when moving to a disjoint part of the document. For example, in [EJS documents][ejs], the JavaScript parser uses
this function to enable inserting automatic semicolon tokens in between the code directives, delimited by `<%` and `%>`.

- **`bool (*eof)(const TSLexer *)`** — A function for determining whether the lexer is at the end of the file. The value
of `lookahead` will be `0` at the end of a file, but this function should be used instead of checking for that value because
the `0` or "NUL" value is also a valid character that could be present in the file being parsed.

The third argument to the `scan` function is an array of booleans that indicates which of external tokens are expected by
the parser. You should only look for a given token if it is valid according to this array. At the same time, you cannot
backtrack, so you may need to combine certain pieces of logic.

```c
if (valid_symbols[INDENT] || valid_symbols[DEDENT]) {

  // ... logic that is common to both `INDENT` and `DEDENT`

  if (valid_symbols[INDENT]) {

    // ... logic that is specific to `INDENT`

    lexer->result_symbol = INDENT;
    return true;
  }
}
```

## External Scanner Helpers

### Allocator

Instead of using libc's `malloc`, `calloc`, `realloc`, and `free`, you should use the versions prefixed with `ts_` from `tree_sitter/alloc.h`.
These macros can allow a potential consumer to override the default allocator with their own implementation, but by default
will use the libc functions.

As a consumer of the tree-sitter core library as well as any parser libraries that might use allocations, you can enable
overriding the default allocator and have it use the same one as the library allocator, of which you can set with `ts_set_allocator`.
To enable this overriding in scanners, you must compile them with the `TREE_SITTER_REUSE_ALLOCATOR` macro defined, and tree-sitter
the library must be linked into your final app dynamically, since it needs to resolve the internal functions at runtime.
If you are compiling an executable binary that uses the core library, but want to load parsers dynamically at runtime, then
you will have to use a special linker flag on Unix. For non-Darwin systems, that would be `--dynamic-list` and for Darwin
systems, that would be `-exported_symbols_list`. The CLI does exactly this, so you can use it as a reference (check out `cli/build.rs`).

For example, assuming you wanted to allocate 100 bytes for your scanner, you'd do so like the following example:

```c
#include "tree_sitter/parser.h"
#include "tree_sitter/alloc.h"

// ...

void* tree_sitter_my_language_external_scanner_create() {
  return ts_calloc(100, 1); // or ts_malloc(100)
}

// ...

```

### Arrays

If you need to use array-like types in your scanner, such as tracking a stack of indentations or tags, you should use the
array macros from `tree_sitter/array.h`.

There are quite a few of them provided for you, but here's how you could get started tracking some . Check out the header
itself for more detailed documentation.

```admonish attention
Do not use any of the array functions or macros that are prefixed with an underscore and have comments saying
that it is not what you are looking for. These are internal functions used as helpers by other macros that are public.
They are not meant to be used directly, nor are they what you want.
```

```c
#include "tree_sitter/parser.h"
#include "tree_sitter/array.h"

enum TokenType {
  INDENT,
  DEDENT,
  NEWLINE,
  STRING,
}

// Create the array in your create function

void* tree_sitter_my_language_external_scanner_create() {
  return ts_calloc(1, sizeof(Array(int)));

  // or if you want to zero out the memory yourself

  Array(int) *stack = ts_malloc(sizeof(Array(int)));
  array_init(&stack);
  return stack;
}

bool tree_sitter_my_language_external_scanner_scan(
  void *payload,
  TSLexer *lexer,
  const bool *valid_symbols
) {
  Array(int) *stack = payload;
  if (valid_symbols[INDENT]) {
    array_push(stack, lexer->get_column(lexer));
    lexer->result_symbol = INDENT;
    return true;
  }
  if (valid_symbols[DEDENT]) {
    array_pop(stack); // this returns the popped element by value, but we don't need it
    lexer->result_symbol = DEDENT;
    return true;
  }

  // we can also use an array on the stack to keep track of a string

  Array(char) next_string = array_new();

  if (valid_symbols[STRING] && lexer->lookahead == '"') {
    lexer->advance(lexer, false);
    while (lexer->lookahead != '"' && lexer->lookahead != '\n' && !lexer->eof(lexer)) {
      array_push(&next_string, lexer->lookahead);
      lexer->advance(lexer, false);
    }

    // assume we have some arbitrary constraint of not having more than 100 characters in a string
    if (lexer->lookahead == '"' && next_string.size <= 100) {
      lexer->advance(lexer, false);
      lexer->result_symbol = STRING;
      return true;
    }
  }

  return false;
}

```

## Other External Scanner Details

External scanners have priority over Tree-sitter's normal lexing process. When a token listed in the externals array is valid
at a given position, the external scanner is called first. This makes external scanners a powerful way to override Tree-sitter's
default lexing behavior, especially for cases that can't be handled with regular lexical rules, parsing, or dynamic precedence.

During error recovery, Tree-sitter's first step is to call the external scanner's scan function with all tokens marked as
valid. Your scanner should detect and handle this case appropriately. One simple approach is to add an unused "sentinel"
token at the end of your externals array:

```js
{
  name: "my_language",

  externals: $ => [$.token1, $.token2, $.error_sentinel]

  // ...
}
```

You can then check if this sentinel token is marked valid to determine if Tree-sitter is in error recovery mode.

If you would rather not handle the error recovery case explicitly, the easiest way to "opt-out" and let tree-sitter's internal
lexer handle it is to return `false` from your scan function when `valid_symbols` contains the error sentinel.

```c
bool tree_sitter_my_language_external_scanner_scan(
  void *payload,
  TSLexer *lexer,
  const bool *valid_symbols
) {
  if (valid_symbols[ERROR_SENTINEL]) {
    return false;
  }
  // ...
}
```

When you include literal keywords in the externals array, for example:

```js
externals: $ => ['if', 'then', 'else']
```

_those_ keywords will
be tokenized by the external scanner whenever they appear in the grammar.

This is equivalent to declaring named tokens and aliasing them:

```js
{
  name: "my_language",

  externals: $ => [$.if_keyword, $.then_keyword, $.else_keyword],

  rules: {

    // then using it in a rule like so:
    if_statement: $ => seq(alias($.if_keyword, 'if'), ...),

    // ...
  }
}
```

The tokenization process for external keywords works in two stages:

1. The external scanner attempts to recognize the token first
2. If the scanner returns true and sets a token, that token is used
3. If the scanner returns false, Tree-sitter falls back to its internal lexer

However, when you use rule references (like `$.if_keyword`) in the externals array without defining the corresponding rules
in the grammar, Tree-sitter cannot fall back to its internal lexer. In this case, the external scanner is solely responsible
for recognizing these tokens.

```admonish danger
- External scanners can easily create infinite loops

- Be extremely careful when emitting zero-width tokens

- Always use the `eof` function when looping through characters
```

[ejs]: https://ejs.co
[enum]: https://en.wikipedia.org/wiki/Enumerated_type#C
[heredoc]: https://en.wikipedia.org/wiki/Here_document
[indent-tokens]: https://en.wikipedia.org/wiki/Off-side_rule
[multi-language-section]: ../using-parsers/3-advanced-parsing.md#multi-language-documents
[percent-string]: https://docs.ruby-lang.org/en/2.5.0/doc/syntax/literals_rdoc.html#label-Percent+Strings



================================================
FILE: docs/src/creating-parsers/5-writing-tests.md
================================================
# Writing Tests

For each rule that you add to the grammar, you should first create a *test* that describes how the syntax trees should look
when parsing that rule. These tests are written using specially-formatted text files in the `test/corpus/` directory within
your parser's root folder.

For example, you might have a file called `test/corpus/statements.txt` that contains a series of entries like this:

```text
==================
Return statements
==================

func x() int {
  return 1;
}

---

(source_file
  (function_definition
    (identifier)
    (parameter_list)
    (primitive_type)
    (block
      (return_statement (number)))))
```

* The **name** of each test is written between two lines containing only `=` (equal sign) characters.

* Then the **input source code** is written, followed by a line containing three or more `-` (dash) characters.

* Then, the **expected output syntax tree** is written as an [S-expression][s-exp]. The exact placement of whitespace in
the S-expression doesn't matter, but ideally the syntax tree should be legible.

```admonish tip
The S-expression does not show syntax nodes like `func`, `(` and `;`, which are expressed as strings and regexes in the grammar.
It only shows the *named* nodes, as described in [this section][named-vs-anonymous-nodes] of the page on parser usage.
```

  The expected output section can also *optionally* show the [*field names*][node-field-names] associated with each child
  node. To include field names in your tests, you write a node's field name followed by a colon, before the node itself in
  the S-expression:

```query
(source_file
  (function_definition
    name: (identifier)
    parameters: (parameter_list)
    result: (primitive_type)
    body: (block
      (return_statement (number)))))
```

* If your language's syntax conflicts with the `===` and `---` test separators, you can optionally add an arbitrary identical
suffix (in the below example, `|||`) to disambiguate them:

```text
==================|||
Basic module
==================|||

---- MODULE Test ----
increment(n) == n + 1
====

---|||

(source_file
  (module (identifier)
    (operator (identifier)
      (parameter_list (identifier))
      (plus (identifier_ref) (number)))))
```

These tests are important. They serve as the parser's API documentation, and they can be run every time you change the grammar
to verify that everything still parses correctly.

By default, the `tree-sitter test` command runs all the tests in your `test/corpus/` folder. To run a particular test, you
can use the `-i` flag:

```sh
tree-sitter test -i 'Return statements'
```

The recommendation is to be comprehensive in adding tests. If it's a visible node, add it to a test file in your `test/corpus`
directory. It's typically a good idea to test all the permutations of each language construct. This increases test coverage,
but doubly acquaints readers with a way to examine expected outputs and understand the "edges" of a language.

## Attributes

Tests can be annotated with a few `attributes`. Attributes must be put in the header, below the test name, and start with
a `:`. A couple of attributes also take in a parameter, which require the use of parenthesis.

```admonish tip
If you'd like to supply in multiple parameters, e.g. to run tests on multiple platforms or to test multiple languages,
you can repeat the attribute on a new line.
```

The following attributes are available:

* `:skip` — This attribute will skip the test when running `tree-sitter test`.
  This is useful when you want to temporarily disable running a test without deleting it.
* `:error` — This attribute will assert that the parse tree contains an error. It's useful to just validate that a certain
input is invalid without displaying the whole parse tree, as such you should omit the parse tree below the `---` line.
* `:fail-fast` — This attribute will stop the testing additional tests if the test marked with this attribute fails.
* `:language(LANG)` — This attribute will run the tests using the parser for the specified language. This is useful for
multi-parser repos, such as XML and DTD, or Typescript and TSX. The default parser used will always be the first entry in
the `grammars` field in the `tree-sitter.json` config file, so having a way to pick a second or even third parser is useful.
* `:platform(PLATFORM)` — This attribute specifies the platform on which the test should run. It is useful to test platform-specific
behavior (e.g. Windows newlines are different from Unix). This attribute must match up with Rust's [`std::env::consts::OS`][constants].

Examples using attributes:

```text
=========================
Test that will be skipped
:skip
=========================

int main() {}

-------------------------

====================================
Test that will run on Linux or macOS

:platform(linux)
:platform(macos)
====================================

int main() {}

------------------------------------

========================================================================
Test that expects an error, and will fail fast if there's no parse error
:fail-fast
:error
========================================================================

int main ( {}

------------------------------------------------------------------------

=================================================
Test that will parse with both Typescript and TSX
:language(typescript)
:language(tsx)
=================================================

console.log('Hello, world!');

-------------------------------------------------
```

### Automatic Compilation

You might notice that the first time you run `tree-sitter test` after regenerating your parser, it takes some extra time.
This is because Tree-sitter automatically compiles your C code into a dynamically-loadable library. It recompiles your parser
as-needed whenever you update it by re-running `tree-sitter generate`, or whenever the [external scanner][external-scanners]
file is changed.

[constants]: https://doc.rust-lang.org/std/env/consts/constant.OS.html
[external-scanners]: ./4-external-scanners.md
[node-field-names]: ../using-parsers/2-basic-parsing.md#node-field-names
[s-exp]: https://en.wikipedia.org/wiki/S-expression
[named-vs-anonymous-nodes]: ../using-parsers/2-basic-parsing.md#named-vs-anonymous-nodes



================================================
FILE: docs/src/creating-parsers/6-publishing.md
================================================
# Publishing your grammar

Once you feel that your parser is in a stable working state for consumers to use, you can publish it to various registries.
It's strongly recommended to publish grammars to GitHub, [crates.io][crates.io] (Rust), [npm][npm] (JavaScript), and [PyPI][pypi]
(Python) to make it easier for others to find and use your grammar.

If your grammar is hosted on GitHub, you can make use of our [reusable workflows][workflows] to handle the publishing process
for you. This action will automatically handle regenerating and publishing your grammar in CI, so long as you have the required
tokens setup for the various registries. For an example of this workflow in action, see the [Python grammar's GitHub][python-gh]

## From start to finish

To release a new grammar (or publish your first version), these are the steps you should follow:

1. Bump your version to the desired version with `tree-sitter version`. For example, if you're releasing version `1.0.0`
of your grammar, you'd run `tree-sitter version 1.0.0`.
2. Commit the changes with `git commit -am "Release 1.0.0" (or however you like)` (ensure that your working directory is
clean).
3. Tag the commit with `git tag -- v1.0.0`.
4. Push the commit and tag with `git push --tags origin main` (assuming you're on the `main` branch, and `origin` is your
remote).
5. (optional) If you've set up the GitHub workflows for your grammar, the release will be automatically published to GitHub,
crates.io, npm, and PyPI.

### Adhering to Semantic Versioning

When releasing new versions of your grammar, it's important to adhere to [Semantic Versioning][semver]. This ensures that
consumers can predictably update their dependencies and that their existing tree-sitter integrations (queries, tree traversal
code, node type checks) will continue to work as expected when upgrading.

1. Increment the major version when you make incompatible changes to the grammar's node types or structure
2. Increment the minor version when you add new node types or patterns while maintaining backward compatibility
3. Increment the patch version when you fix bugs without changing the grammar's structure

For grammars in version 0.y.z (zero version), the usual semantic versioning rules are technically relaxed. However, if your
grammar already has users, it's recommended to treat version changes more conservatively:

- Treat patch version (`z`) changes as if they were minor version changes
- Treat minor version (`y`) changes as if they were major version changes

This helps maintain stability for existing users during the pre-1.0 phase. By following these versioning guidelines, you
ensure that downstream users can safely upgrade without their existing queries breaking.

[crates.io]: https://crates.io
[npm]: https://www.npmjs.com
[pypi]: https://pypi.org
[python-gh]: https://github.com/tree-sitter/tree-sitter-python/blob/master/.github/workflows/publish.yml
[semver]: https://semver.org/
[workflows]: https://github.com/tree-sitter/workflows



================================================
FILE: docs/src/creating-parsers/index.md
================================================
# Creating parsers

Developing Tree-sitter grammars can have a difficult learning curve, but once you get the hang of it, it can be fun and even
zen-like. This document will help you to get started and to develop a useful mental model.



================================================
FILE: docs/src/using-parsers/1-getting-started.md
================================================
# Getting Started

## Building the Library

To build the library on a POSIX system, just run `make` in the Tree-sitter directory. This will create a static library
called `libtree-sitter.a` as well as dynamic libraries.

Alternatively, you can incorporate the library in a larger project's build system by adding one source file to the build.
This source file needs two directories to be in the include path when compiled:

**source file:**

- `tree-sitter/lib/src/lib.c`

**include directories:**

- `tree-sitter/lib/src`
- `tree-sitter/lib/include`

## The Basic Objects

There are four main types of objects involved when using Tree-sitter: languages, parsers, syntax trees, and syntax nodes.
In C, these are called `TSLanguage`, `TSParser`, `TSTree`, and `TSNode`.

- A `TSLanguage` is an opaque object that defines how to parse a particular programming language. The code for each `TSLanguage`
is generated by Tree-sitter. Many languages are already available in separate git repositories within the
[Tree-sitter GitHub organization][ts org] and the [Tree-sitter grammars GitHub organization][tsg org].
See [the next section][creating parsers] for how to create new languages.

- A `TSParser` is a stateful object that can be assigned a `TSLanguage` and used to produce a `TSTree` based on some
source code.

- A `TSTree` represents the syntax tree of an entire source code file. It contains `TSNode` instances that indicate the
structure of the source code. It can also be edited and used to produce a new `TSTree` in the event that the
source code changes.

- A `TSNode` represents a single node in the syntax tree. It tracks its start and end positions in the source code, as
well as its relation to other nodes like its parent, siblings and children.

## An Example Program

Here's an example of a simple C program that uses the Tree-sitter [JSON parser][json].

```c
// Filename - test-json-parser.c

#include <assert.h>
#include <string.h>
#include <stdio.h>
#include <tree_sitter/api.h>

// Declare the `tree_sitter_json` function, which is
// implemented by the `tree-sitter-json` library.
const TSLanguage *tree_sitter_json(void);

int main() {
  // Create a parser.
  TSParser *parser = ts_parser_new();

  // Set the parser's language (JSON in this case).
  ts_parser_set_language(parser, tree_sitter_json());

  // Build a syntax tree based on source code stored in a string.
  const char *source_code = "[1, null]";
  TSTree *tree = ts_parser_parse_string(
    parser,
    NULL,
    source_code,
    strlen(source_code)
  );

  // Get the root node of the syntax tree.
  TSNode root_node = ts_tree_root_node(tree);

  // Get some child nodes.
  TSNode array_node = ts_node_named_child(root_node, 0);
  TSNode number_node = ts_node_named_child(array_node, 0);

  // Check that the nodes have the expected types.
  assert(strcmp(ts_node_type(root_node), "document") == 0);
  assert(strcmp(ts_node_type(array_node), "array") == 0);
  assert(strcmp(ts_node_type(number_node), "number") == 0);

  // Check that the nodes have the expected child counts.
  assert(ts_node_child_count(root_node) == 1);
  assert(ts_node_child_count(array_node) == 5);
  assert(ts_node_named_child_count(array_node) == 2);
  assert(ts_node_child_count(number_node) == 0);

  // Print the syntax tree as an S-expression.
  char *string = ts_node_string(root_node);
  printf("Syntax tree: %s\n", string);

  // Free all of the heap-allocated memory.
  free(string);
  ts_tree_delete(tree);
  ts_parser_delete(parser);
  return 0;
}
```

This program requires three components to build:

1. The Tree-sitter C API from `tree-sitter/api.h` (requiring `tree-sitter/lib/include` in our include path)
2. The Tree-sitter library (`libtree-sitter.a`)
3. The JSON grammar's source code, which we compile directly into the binary

```sh
clang                                   \
  -I tree-sitter/lib/include            \
  test-json-parser.c                    \
  tree-sitter-json/src/parser.c         \
  tree-sitter/libtree-sitter.a          \
  -o test-json-parser
./test-json-parser
```

When using dynamic linking, you'll need to ensure the shared library is discoverable through `LD_LIBRARY_PATH` or your system's
equivalent environment variable. Here's how to compile with dynamic linking:

```sh
clang                                   \
  -I tree-sitter/lib/include            \
  test-json-parser.c                    \
  tree-sitter-json/src/parser.c         \
  -ltree-sitter                         \
  -o test-json-parser
./test-json-parser
```

[creating parsers]: ../creating-parsers/index.md
[json]: https://github.com/tree-sitter/tree-sitter-json
[ts org]: https://github.com/tree-sitter
[tsg org]: https://github.com/tree-sitter-grammars



================================================
FILE: docs/src/using-parsers/2-basic-parsing.md
================================================
# Basic Parsing

## Providing the Code

In the example on the previous page, we parsed source code stored in a simple string using the `ts_parser_parse_string` function:

```c
TSTree *ts_parser_parse_string(
  TSParser *self,
  const TSTree *old_tree,
  const char *string,
  uint32_t length
);
```

You may want to parse source code that's stored in a custom data structure, like a [piece table][piece table] or a [rope][rope].
In this case, you can use the more general `ts_parser_parse` function:

```c
TSTree *ts_parser_parse(
  TSParser *self,
  const TSTree *old_tree,
  TSInput input
);
```

The `TSInput` structure lets you provide your own function for reading a chunk of text at a given byte offset and row/column
position. The function can return text encoded in either UTF-8 or UTF-16. This interface allows you to efficiently parse
text that is stored in your own data structure.

```c
typedef struct {
  void *payload;
  const char *(*read)(
    void *payload,
    uint32_t byte_offset,
    TSPoint position,
    uint32_t *bytes_read
  );
  TSInputEncoding encoding;
  DecodeFunction decode;
} TSInput;
```

If you want to decode text that is not encoded in UTF-8 or UTF-16, you can set the `decode` field of the input to your function
that will decode text. The signature of the `DecodeFunction` is as follows:

```c
typedef uint32_t (*DecodeFunction)(
  const uint8_t *string,
  uint32_t length,
  int32_t *code_point
);
```

```admonish attention
The `TSInputEncoding` must be set to `TSInputEncodingCustom` for the `decode` function to be called.
```

The `string` argument is a pointer to the text to decode, which comes from the `read` function, and the `length` argument
is the length of the `string`. The `code_point` argument is a pointer to an integer that represents the decoded code point,
and should be written to in your `decode` callback. The function should return the number of bytes decoded.

## Syntax Nodes

Tree-sitter provides a [DOM][dom]-style interface for inspecting syntax trees.
A syntax node's _type_ is a string that indicates which grammar rule the node represents.

```c
const char *ts_node_type(TSNode);
```

Syntax nodes store their position in the source code both in raw bytes and row/column
coordinates. In a point, rows and columns are zero-based. The `row` field represents
the number of newlines before a given position, while `column` represents the number
of bytes between the position and beginning of the line.

```c
uint32_t ts_node_start_byte(TSNode);
uint32_t ts_node_end_byte(TSNode);
typedef struct {
  uint32_t row;
  uint32_t column;
} TSPoint;
TSPoint ts_node_start_point(TSNode);
TSPoint ts_node_end_point(TSNode);
```

## Retrieving Nodes

Every tree has a _root node_:

```c
TSNode ts_tree_root_node(const TSTree *);
```

Once you have a node, you can access the node's children:

```c
uint32_t ts_node_child_count(TSNode);
TSNode ts_node_child(TSNode, uint32_t);
```

You can also access its siblings and parent:

```c
TSNode ts_node_next_sibling(TSNode);
TSNode ts_node_prev_sibling(TSNode);
TSNode ts_node_parent(TSNode);
```

These methods may all return a _null node_ to indicate, for example, that a node does not _have_ a next sibling.
You can check if a node is null:

```c
bool ts_node_is_null(TSNode);
```

## Named vs Anonymous Nodes

Tree-sitter produces [_concrete_ syntax trees][cst] — trees that contain nodes for
every individual token in the source code, including things like commas and parentheses. This is important for use-cases
that deal with individual tokens, like [syntax highlighting][syntax highlighting]. But some
types of code analysis are easier to perform using an [_abstract_ syntax tree][ast] — a tree in which the less important
details have been removed. Tree-sitter's trees support these use cases by making a distinction between
_named_ and _anonymous_ nodes.

Consider a grammar rule like this:

```js
if_statement: $ => seq("if", "(", $._expression, ")", $._statement);
```

A syntax node representing an `if_statement` in this language would have 5 children: the condition expression, the body statement,
as well as the `if`, `(`, and `)` tokens. The expression and the statement would be marked as _named_ nodes, because they
have been given explicit names in the grammar. But the `if`, `(`, and `)` nodes would _not_ be named nodes, because they
are represented in the grammar as simple strings.

You can check whether any given node is named:

```c
bool ts_node_is_named(TSNode);
```

When traversing the tree, you can also choose to skip over anonymous nodes by using the `_named_` variants of all of the
methods described above:

```c
TSNode ts_node_named_child(TSNode, uint32_t);
uint32_t ts_node_named_child_count(TSNode);
TSNode ts_node_next_named_sibling(TSNode);
TSNode ts_node_prev_named_sibling(TSNode);
```

If you use this group of methods, the syntax tree functions much like an abstract syntax tree.

## Node Field Names

To make syntax nodes easier to analyze, many grammars assign unique _field names_ to particular child nodes.
In the [creating parsers][using fields] section, it's explained how to do this in your own grammars. If a syntax node has
fields, you can access its children using their field name:

```c
TSNode ts_node_child_by_field_name(
  TSNode self,
  const char *field_name,
  uint32_t field_name_length
);
```

Fields also have numeric ids that you can use, if you want to avoid repeated string comparisons. You can convert between
strings and ids using the `TSLanguage`:

```c
uint32_t ts_language_field_count(const TSLanguage *);
const char *ts_language_field_name_for_id(const TSLanguage *, TSFieldId);
TSFieldId ts_language_field_id_for_name(const TSLanguage *, const char *, uint32_t);
```

The field ids can be used in place of the name:

```c
TSNode ts_node_child_by_field_id(TSNode, TSFieldId);
```

[ast]: https://en.wikipedia.org/wiki/Abstract_syntax_tree
[cst]: https://en.wikipedia.org/wiki/Parse_tree
[dom]: https://en.wikipedia.org/wiki/Document_Object_Model
[piece table]: <https://en.wikipedia.org/wiki/Piece_table>
[rope]: <https://en.wikipedia.org/wiki/Rope_(data_structure)>
[syntax highlighting]: https://en.wikipedia.org/wiki/Syntax_highlighting
[using fields]: ../creating-parsers/3-writing-the-grammar.md#using-fields



================================================
FILE: docs/src/using-parsers/3-advanced-parsing.md
================================================
# Advanced Parsing

## Editing

In applications like text editors, you often need to re-parse a file after its source code has changed. Tree-sitter is designed
to support this use case efficiently. There are two steps required. First, you must _edit_ the syntax tree, which adjusts
the ranges of its nodes so that they stay in sync with the code.

```c
typedef struct {
  uint32_t start_byte;
  uint32_t old_end_byte;
  uint32_t new_end_byte;
  TSPoint start_point;
  TSPoint old_end_point;
  TSPoint new_end_point;
} TSInputEdit;

void ts_tree_edit(TSTree *, const TSInputEdit *);
```

Then, you can call `ts_parser_parse` again, passing in the old tree. This will create a new tree that internally shares structure
with the old tree.

When you edit a syntax tree, the positions of its nodes will change. If you have stored any `TSNode` instances outside of
the `TSTree`, you must update their positions separately, using the same `TSInput` value, in order to update their
cached positions.

```c
void ts_node_edit(TSNode *, const TSInputEdit *);
```

This `ts_node_edit` function is _only_ needed in the case where you have retrieved `TSNode` instances _before_ editing the
tree, and then _after_ editing the tree, you want to continue to use those specific node instances. Often, you'll just want
to re-fetch nodes from the edited tree, in which case `ts_node_edit` is not needed.

## Multi-language Documents

Sometimes, different parts of a file may be written in different languages. For example, templating languages like [EJS][ejs]
and [ERB][erb] allow you to generate HTML by writing a mixture of HTML and another language like JavaScript or Ruby.

Tree-sitter handles these types of documents by allowing you to create a syntax tree based on the text in certain
_ranges_ of a file.

```c
typedef struct {
  TSPoint start_point;
  TSPoint end_point;
  uint32_t start_byte;
  uint32_t end_byte;
} TSRange;

void ts_parser_set_included_ranges(
  TSParser *self,
  const TSRange *ranges,
  uint32_t range_count
);
```

For example, consider this ERB document:

```erb
<ul>
  <% people.each do |person| %>
    <li><%= person.name %></li>
  <% end %>
</ul>
```

Conceptually, it can be represented by three syntax trees with overlapping ranges: an ERB syntax tree, a Ruby syntax tree,
and an HTML syntax tree. You could generate these syntax trees with the following code:

```c
#include <string.h>
#include <tree_sitter/api.h>

// These functions are each implemented in their own repo.
const TSLanguage *tree_sitter_embedded_template(void);
const TSLanguage *tree_sitter_html(void);
const TSLanguage *tree_sitter_ruby(void);

int main(int argc, const char **argv) {
  const char *text = argv[1];
  unsigned len = strlen(text);

  // Parse the entire text as ERB.
  TSParser *parser = ts_parser_new();
  ts_parser_set_language(parser, tree_sitter_embedded_template());
  TSTree *erb_tree = ts_parser_parse_string(parser, NULL, text, len);
  TSNode erb_root_node = ts_tree_root_node(erb_tree);

  // In the ERB syntax tree, find the ranges of the `content` nodes,
  // which represent the underlying HTML, and the `code` nodes, which
  // represent the interpolated Ruby.
  TSRange html_ranges[10];
  TSRange ruby_ranges[10];
  unsigned html_range_count = 0;
  unsigned ruby_range_count = 0;
  unsigned child_count = ts_node_child_count(erb_root_node);

  for (unsigned i = 0; i < child_count; i++) {
    TSNode node = ts_node_child(erb_root_node, i);
    if (strcmp(ts_node_type(node), "content") == 0) {
      html_ranges[html_range_count++] = (TSRange) {
        ts_node_start_point(node),
        ts_node_end_point(node),
        ts_node_start_byte(node),
        ts_node_end_byte(node),
      };
    } else {
      TSNode code_node = ts_node_named_child(node, 0);
      ruby_ranges[ruby_range_count++] = (TSRange) {
        ts_node_start_point(code_node),
        ts_node_end_point(code_node),
        ts_node_start_byte(code_node),
        ts_node_end_byte(code_node),
      };
    }
  }

  // Use the HTML ranges to parse the HTML.
  ts_parser_set_language(parser, tree_sitter_html());
  ts_parser_set_included_ranges(parser, html_ranges, html_range_count);
  TSTree *html_tree = ts_parser_parse_string(parser, NULL, text, len);
  TSNode html_root_node = ts_tree_root_node(html_tree);

  // Use the Ruby ranges to parse the Ruby.
  ts_parser_set_language(parser, tree_sitter_ruby());
  ts_parser_set_included_ranges(parser, ruby_ranges, ruby_range_count);
  TSTree *ruby_tree = ts_parser_parse_string(parser, NULL, text, len);
  TSNode ruby_root_node = ts_tree_root_node(ruby_tree);

  // Print all three trees.
  char *erb_sexp = ts_node_string(erb_root_node);
  char *html_sexp = ts_node_string(html_root_node);
  char *ruby_sexp = ts_node_string(ruby_root_node);
  printf("ERB: %s\n", erb_sexp);
  printf("HTML: %s\n", html_sexp);
  printf("Ruby: %s\n", ruby_sexp);
  return 0;
}
```

This API allows for great flexibility in how languages can be composed. Tree-sitter is not responsible for mediating the
interactions between languages. Instead, you are free to do that using arbitrary application-specific logic.

## Concurrency

Tree-sitter supports multi-threaded use cases by making syntax trees very cheap to copy.

```c
TSTree *ts_tree_copy(const TSTree *);
```

Internally, copying a syntax tree just entails incrementing an atomic reference count. Conceptually, it provides you a new
tree which you can freely query, edit, reparse, or delete on a new thread while continuing to use the original tree on a
different thread.

```admonish danger
Individual `TSTree` instances are _not_ thread safe; you must copy a tree if you want to use it on multiple threads simultaneously.
```

[ejs]: https://ejs.co
[erb]: https://ruby-doc.org/stdlib-2.5.1/libdoc/erb/rdoc/ERB.html



================================================
FILE: docs/src/using-parsers/4-walking-trees.md
================================================
# Walking Trees with Tree Cursors

You can access every node in a syntax tree using the `TSNode` APIs [described earlier][retrieving nodes], but if you need
to access a large number of nodes, the fastest way to do so is with a _tree cursor_. A cursor is a stateful object that
allows you to walk a syntax tree with maximum efficiency.

```admonish note
The given input node is considered the root of the cursor, and the cursor cannot walk outside this node.
Going to the parent or any sibling of the root node will always return `false`.

This has no unexpected effects if the given input node is the actual `root` node of the tree, but is something to keep in
mind when using cursors constructed with a node that is not the `root` node.
```

You can initialize a cursor from any node:

```c
TSTreeCursor ts_tree_cursor_new(TSNode);
```

You can move the cursor around the tree:

```c
bool ts_tree_cursor_goto_first_child(TSTreeCursor *);
bool ts_tree_cursor_goto_next_sibling(TSTreeCursor *);
bool ts_tree_cursor_goto_parent(TSTreeCursor *);
```

These methods return `true` if the cursor successfully moved and `false` if there was no node to move to.

You can always retrieve the cursor's current node, as well as the [field name][node-field-names] that is associated with
the current node.

```c
TSNode ts_tree_cursor_current_node(const TSTreeCursor *);
const char *ts_tree_cursor_current_field_name(const TSTreeCursor *);
TSFieldId ts_tree_cursor_current_field_id(const TSTreeCursor *);
```

[retrieving nodes]: ./2-basic-parsing.md#retrieving-nodes
[node-field-names]: ./2-basic-parsing.md#node-field-names



================================================
FILE: docs/src/using-parsers/6-static-node-types.md
================================================
# Static Node Types

In languages with static typing, it can be helpful for syntax trees to provide specific type information about individual
syntax nodes. Tree-sitter makes this information available via a generated file called `node-types.json`. This _node types_
file provides structured data about every possible syntax node in a grammar.

You can use this data to generate type declarations in statically-typed programming languages.

The node types file contains an array of objects, each of which describes a particular type of syntax node using the
following entries:

## Basic Info

Every object in this array has these two entries:

- `"type"` — A string that indicates, which grammar rule the node represents. This corresponds to the `ts_node_type` function
described [here][syntax nodes].
- `"named"` — A boolean that indicates whether this kind of node corresponds to a rule name in the grammar or just a string
literal. See [here][named-vs-anonymous-nodes] for more info.

Examples:

```json
{
  "type": "string_literal",
  "named": true
}
{
  "type": "+",
  "named": false
}
```

Together, these two fields constitute a unique identifier for a node type; no two top-level objects in the `node-types.json`
should have the same values for both `"type"` and `"named"`.

## Internal Nodes

Many syntax nodes can have _children_. The node type object describes the possible children that a node can have using the
following entries:

- `"fields"` — An object that describes the possible [fields][node-field-names] that the node can have. The keys of this
object are field names, and the values are _child type_ objects, described below.
- `"children"` — Another _child type_ object that describes all the node's possible _named_ children _without_ fields.

A _child type_ object describes a set of child nodes using the following entries:

- `"required"` — A boolean indicating whether there is always _at least one_ node in this set.
- `"multiple"` — A boolean indicating whether there can be _multiple_ nodes in this set.
- `"types"`- An array of objects that represent the possible types of nodes in this set. Each object has two keys: `"type"`
and `"named"`, whose meanings are described above.

Example with fields:

```json
{
  "type": "method_definition",
  "named": true,
  "fields": {
    "body": {
      "multiple": false,
      "required": true,
      "types": [{ "type": "statement_block", "named": true }]
    },
    "decorator": {
      "multiple": true,
      "required": false,
      "types": [{ "type": "decorator", "named": true }]
    },
    "name": {
      "multiple": false,
      "required": true,
      "types": [
        { "type": "computed_property_name", "named": true },
        { "type": "property_identifier", "named": true }
      ]
    },
    "parameters": {
      "multiple": false,
      "required": true,
      "types": [{ "type": "formal_parameters", "named": true }]
    }
  }
}
```

Example with children:

```json
{
  "type": "array",
  "named": true,
  "fields": {},
  "children": {
    "multiple": true,
    "required": false,
    "types": [
      { "type": "_expression", "named": true },
      { "type": "spread_element", "named": true }
    ]
  }
}
```

## Supertype Nodes

In Tree-sitter grammars, there are usually certain rules that represent abstract _categories_ of syntax nodes (e.g. "expression",
"type", "declaration"). In the `grammar.js` file, these are often written as [hidden rules][hidden rules]
whose definition is a simple [`choice`][grammar dsl] where each member is just a single symbol.

Normally, hidden rules are not mentioned in the node types file, since they don't appear in the syntax tree. But if you add
a hidden rule to the grammar's [`supertypes` list][grammar dsl], then it _will_ show up in the node
types file, with the following special entry:

- `"subtypes"` — An array of objects that specify the _types_ of nodes that this 'supertype' node can wrap.

Example:

```json
{
  "type": "_declaration",
  "named": true,
  "subtypes": [
    { "type": "class_declaration", "named": true },
    { "type": "function_declaration", "named": true },
    { "type": "generator_function_declaration", "named": true },
    { "type": "lexical_declaration", "named": true },
    { "type": "variable_declaration", "named": true }
  ]
}
```

Supertype nodes will also appear elsewhere in the node types file, as children of other node types, in a way that corresponds
with how the supertype rule was used in the grammar. This can make the node types much shorter and easier to read, because
a single supertype will take the place of multiple subtypes.

Example:

```json
{
  "type": "export_statement",
  "named": true,
  "fields": {
    "declaration": {
      "multiple": false,
      "required": false,
      "types": [{ "type": "_declaration", "named": true }]
    },
    "source": {
      "multiple": false,
      "required": false,
      "types": [{ "type": "string", "named": true }]
    }
  }
}
```

[grammar dsl]: ../creating-parsers/2-the-grammar-dsl.md
[hidden rules]: ../creating-parsers/3-writing-the-grammar.md#hiding-rules
[named-vs-anonymous-nodes]: ./2-basic-parsing.md#named-vs-anonymous-nodes
[node-field-names]: ./2-basic-parsing.md#node-field-names
[syntax nodes]: ./2-basic-parsing.md#syntax-nodes



================================================
FILE: docs/src/using-parsers/index.md
================================================
# Using Parsers

This guide covers the fundamental concepts of using Tree-sitter, which is applicable across all programming languages.
Although we'll explore some C-specific details that are valuable for direct C API usage or creating new language bindings,
the core concepts remain the same.

Tree-sitter's parsing functionality is implemented through its C API, with all functions documented in the [tree_sitter/api.h][api.h]
header file, but if you're working in another language, you can use one of the following bindings found [here](../index.md#language-bindings),
each providing idiomatic access to Tree-sitter's functionality. Of these bindings, the official ones have their own API docs
hosted online at the following pages:

- [Go][go]
- [Java]
- [JavaScript (Node.js)][javascript]
- [Kotlin][kotlin]
- [Python][python]
- [Rust][rust]
- [Zig][zig]

[api.h]: https://github.com/tree-sitter/tree-sitter/blob/master/lib/include/tree_sitter/api.h
[go]: https://pkg.go.dev/github.com/tree-sitter/go-tree-sitter
[java]: https://tree-sitter.github.io/java-tree-sitter
[javascript]: https://tree-sitter.github.io/node-tree-sitter
[kotlin]: https://tree-sitter.github.io/kotlin-tree-sitter
[python]: https://tree-sitter.github.io/py-tree-sitter
[rust]: https://docs.rs/tree-sitter
[zig]: https://tree-sitter.github.io/zig-tree-sitter



================================================
FILE: docs/src/using-parsers/queries/1-syntax.md
================================================
# Query Syntax

A _query_ consists of one or more _patterns_, where each pattern is an [S-expression][s-exp] that matches a certain set of
nodes in a syntax tree. The expression to match a given node consists of a pair of parentheses containing two things: the
node's type, and optionally, a series of other S-expressions that match the node's children. For example, this pattern would
match any `binary_expression` node whose children are both `number_literal` nodes:

```query
(binary_expression (number_literal) (number_literal))
```

Children can also be omitted. For example, this would match any `binary_expression` where at least _one_ of child is a
`string_literal` node:

```query
(binary_expression (string_literal))
```

## Fields

In general, it's a good idea to make patterns more specific by specifying [field names][node-field-names] associated with
child nodes. You do this by prefixing a child pattern with a field name followed by a colon. For example, this pattern would
match an `assignment_expression` node where the `left` child is a `member_expression` whose `object` is a `call_expression`.

```query
(assignment_expression
  left: (member_expression
    object: (call_expression)))
```

## Negated Fields

You can also constrain a pattern so that it only matches nodes that _lack_ a certain field. To do this, add a field name
prefixed by a `!` within the parent pattern. For example, this pattern would match a class declaration with no type parameters:

```query
(class_declaration
  name: (identifier) @class_name
  !type_parameters)
```

## Anonymous Nodes

The parenthesized syntax for writing nodes only applies to [named nodes][named-vs-anonymous-nodes]. To match specific anonymous
nodes, you write their name between double quotes. For example, this pattern would match any `binary_expression` where the
operator is `!=` and the right side is `null`:

```query
(binary_expression
  operator: "!="
  right: (null))
```

## Special Nodes

### The Wildcard Node

A wildcard node is represented with an underscore (`_`), it matches any node.
This is similar to `.` in regular expressions.
There are two types, `(_)` will match any named node,
and `_` will match any named or anonymous node.

For example, this pattern would match any node inside a call:

```query
(call (_) @call.inner)
```

### The `ERROR` Node

When the parser encounters text it does not recognize, it represents this node
as `(ERROR)` in the syntax tree. These error nodes can be queried just like
normal nodes:

```scheme
(ERROR) @error-node
```

### The `MISSING` Node

If the parser is able to recover from erroneous text by inserting a missing token and then reducing, it will insert that
missing node in the final tree so long as that tree has the lowest error cost. These missing nodes appear as seemingly normal
nodes in the tree, but they are zero tokens wide, and are internally represented as a property of the actual terminal node
that was inserted, instead of being its own kind of node, like the `ERROR` node. These special missing nodes can be queried
using `(MISSING)`:

```scheme
(MISSING) @missing-node
```

This is useful when attempting to detect all syntax errors in a given parse tree, since these missing node are not captured
by `(ERROR)` queries. Specific missing node types can also be queried:

```scheme
(MISSING identifier) @missing-identifier
(MISSING ";") @missing-semicolon
```

[node-field-names]: ../2-basic-parsing.md#node-field-names
[named-vs-anonymous-nodes]: ../2-basic-parsing.md#named-vs-anonymous-nodes
[s-exp]: https://en.wikipedia.org/wiki/S-expression



================================================
FILE: docs/src/using-parsers/queries/2-operators.md
================================================
# Operators

## Capturing Nodes

When matching patterns, you may want to process specific nodes within the pattern. Captures allow you to associate names
with specific nodes in a pattern, so that you can later refer to those nodes by those names. Capture names are written _after_
the nodes that they refer to, and start with an `@` character.

For example, this pattern would match any assignment of a `function` to an `identifier`, and it would associate the name
`the-function-name` with the identifier:

```query
(assignment_expression
  left: (identifier) @the-function-name
  right: (function))
```

And this pattern would match all method definitions, associating the name `the-method-name` with the method name, `the-class-name`
with the containing class name:

```query
(class_declaration
  name: (identifier) @the-class-name
  body: (class_body
    (method_definition
      name: (property_identifier) @the-method-name)))
```

## Quantification Operators

You can match a repeating sequence of sibling nodes using the postfix `+` and `*` _repetition_ operators, which work analogously
to the `+` and `*` operators [in regular expressions][regex]. The `+` operator matches _one or more_ repetitions of a pattern,
and the `*` operator matches _zero or more_.

For example, this pattern would match a sequence of one or more comments:

```query
(comment)+
```

This pattern would match a class declaration, capturing all of the decorators if any were present:

```query
(class_declaration
  (decorator)* @the-decorator
  name: (identifier) @the-name)
```

You can also mark a node as optional using the `?` operator. For example, this pattern would match all function calls, capturing
a string argument if one was present:

```query
(call_expression
  function: (identifier) @the-function
  arguments: (arguments (string)? @the-string-arg))
```

## Grouping Sibling Nodes

You can also use parentheses for grouping a sequence of _sibling_ nodes. For example, this pattern would match a comment
followed by a function declaration:

```query
(
  (comment)
  (function_declaration)
)
```

Any of the quantification operators mentioned above (`+`, `*`, and `?`) can also be applied to groups. For example, this
pattern would match a comma-separated series of numbers:

```query
(
  (number)
  ("," (number))*
)
```

## Alternations

An alternation is written as a pair of square brackets (`[]`) containing a list of alternative patterns.
This is similar to _character classes_ from regular expressions (`[abc]` matches either a, b, or c).

For example, this pattern would match a call to either a variable or an object property.
In the case of a variable, capture it as `@function`, and in the case of a property, capture it as `@method`:

```query
(call_expression
  function: [
    (identifier) @function
    (member_expression
      property: (property_identifier) @method)
  ])
```

This pattern would match a set of possible keyword tokens, capturing them as `@keyword`:

```query
[
  "break"
  "delete"
  "else"
  "for"
  "function"
  "if"
  "return"
  "try"
  "while"
] @keyword
```

## Anchors

The anchor operator, `.`, is used to constrain the ways in which child patterns are matched. It has different behaviors
depending on where it's placed inside a query.

When `.` is placed before the _first_ child within a parent pattern, the child will only match when it is the first named
node in the parent. For example, the below pattern matches a given `array` node at most once, assigning the `@the-element`
capture to the first `identifier` node in the parent `array`:

```query
(array . (identifier) @the-element)
```

Without this anchor, the pattern would match once for every identifier in the array, with `@the-element` bound
to each matched identifier.

Similarly, an anchor placed after a pattern's _last_ child will cause that child pattern to only match nodes that are the
last named child of their parent. The below pattern matches only nodes that are the last named child within a `block`.

```query
(block (_) @last-expression .)
```

Finally, an anchor _between_ two child patterns will cause the patterns to only match nodes that are immediate siblings.
The pattern below, given a long dotted name like `a.b.c.d`, will only match pairs of consecutive identifiers:
`a, b`, `b, c`, and `c, d`.

```query
(dotted_name
  (identifier) @prev-id
  .
  (identifier) @next-id)
```

Without the anchor, non-consecutive pairs like `a, c` and `b, d` would also be matched.

The restrictions placed on a pattern by an anchor operator ignore anonymous nodes.

[regex]: https://en.wikipedia.org/wiki/Regular_expression#Basic_concepts



================================================
FILE: docs/src/using-parsers/queries/3-predicates-and-directives.md
================================================
# Predicates

You can also specify arbitrary metadata and conditions associated with a pattern
by adding _predicate_ S-expressions anywhere within your pattern. Predicate S-expressions
start with a _predicate name_ beginning with a `#` character, and ending with a `?` character. After that, they can
contain an arbitrary number of `@`-prefixed capture names or strings.

Tree-sitter's CLI supports the following predicates by default:

## The `eq?` predicate

This family of predicates allows you to match against a single capture or string
value.

The first argument to this predicate must be a capture, but the second can be either a capture to
compare the two captures' text, or a string to compare first capture's text
against.

The base predicate is `#eq?`, but its complement, `#not-eq?`, can be used to _not_
match a value. Additionally, you can prefix either of these with `any-` to match
if _any_ of the nodes match the predicate. This is only useful when dealing with
quantified captures, as by default a quantified capture will only match if _all_ the captured nodes match the predicate.

Thus, there are four predicates in total:

- `#eq?`
- `#not-eq?`
- `#any-eq?`
- `#any-not-eq?`

Consider the following example targeting C:

```query
((identifier) @variable.builtin
  (#eq? @variable.builtin "self"))
```

This pattern would match any identifier that is `self`.

Now consider the following example:

```query
(
  (pair
    key: (property_identifier) @key-name
    value: (identifier) @value-name)
  (#eq? @key-name @value-name)
)
```

This pattern would match key-value pairs where the `value` is an identifier
with the same text as the key (meaning they are the same):

As mentioned earlier, the `any-` prefix is meant for use with quantified captures. Here's
an example finding an empty comment within a group of comments:

```query
((comment)+ @comment.empty
  (#any-eq? @comment.empty "//"))
```

## The `match?` predicate

These predicates are similar to the `eq?` predicates, but they use regular expressions
to match against the capture's text instead of string comparisons.

The first argument must be a capture, and the second must be a string containing
a regular expression.

Like the `eq?` predicate family, we can tack on `not-` to the beginning of the predicate
to negate the match, and `any-` to match if _any_ of the nodes in a quantified capture match the predicate.

This pattern matches identifiers written in `SCREAMING_SNAKE_CASE`.

```query
((identifier) @constant
  (#match? @constant "^[A-Z][A-Z_]+"))
```

This query identifies documentation comments in C that begin with three forward slashes (`///`).

```query
((comment)+ @comment.documentation
  (#match? @comment.documentation "^///\\s+.*"))
```

This query finds C code embedded in Go comments that appear just before a "C" import statement.
These are known as [`Cgo`][cgo] comments and are used to inject C code into Go programs.

```query
((comment)+ @injection.content
  .
  (import_declaration
    (import_spec path: (interpreted_string_literal) @_import_c))
  (#eq? @_import_c "\"C\"")
  (#match? @injection.content "^//"))
```

## The `any-of?` predicate

The `any-of?` predicate allows you to match a capture against multiple strings,
and will match if the capture's text is equal to any of the strings.

The query below will match any of the builtin variables in JavaScript.

```query
((identifier) @variable.builtin
  (#any-of? @variable.builtin
        "arguments"
        "module"
        "console"
        "window"
        "document"))
```

## The `is?` predicate

The `is?` predicate allows you to assert that a capture has a given property. This isn't widely used, but the CLI uses it
to determine whether a given node is a local variable or not, for example:

```query
((identifier) @variable.builtin
  (#match? @variable.builtin "^(arguments|module|console|window|document)$")
  (#is-not? local))
```

This pattern would match any builtin variable that is not a local variable, because the `#is-not? local` predicate is used.

# Directives

Similar to predicates, directives are a way to associate arbitrary metadata with a pattern. The only difference between predicates
and directives is that directives end in a `!` character instead of `?` character.

Tree-sitter's CLI supports the following directives by default:

## The `set!` directive

This directive allows you to associate key-value pairs with a pattern. The key and value can be any arbitrary text that you
see fit.

```query
((comment) @injection.content
  (#lua-match? @injection.content "/[*\/][!*\/]<?[^a-zA-Z]")
  (#set! injection.language "doxygen"))
```

This pattern would match any comment that contains a Doxygen-style comment, and then sets the `injection.language` key to
`"doxygen"`. Programmatically, when iterating the captures of this pattern, you can access this property to then parse the
comment with the Doxygen parser.

### The `#select-adjacent!` directive

The `#select-adjacent!` directive allows you to filter the text associated with a capture so that only nodes adjacent to
another capture are preserved. It takes two arguments, both of which are capture names.

### The `#strip!` directive

The `#strip!` directive allows you to remove text from a capture. It takes two arguments: the first is the capture to strip
text from, and the second is a regular expression to match against the text. Any text matched by the regular expression will
be removed from the text associated with the capture.

For an example on the `#select-adjacent!` and `#strip!` directives,
view the [code navigation](../../4-code-navigation.md#examples) documentation.

## Recap

To recap about the predicates and directives Tree-Sitter's bindings support:

- `#eq?` checks for a direct match against a capture or string

- `#match?` checks for a match against a regular expression

- `#any-of?` checks for a match against a list of strings

- `#is?` checks for a property on a capture

- Adding `not-` to the beginning of these predicates will negate the match

- By default, a quantified capture will only match if _all_ the nodes match the predicate

- Adding `any-` before the `eq` or `match` predicates will instead match if any of the nodes match the predicate

- `#set!` associates key-value pairs with a pattern

- `#select-adjacent!` filters the text associated with a capture so that only nodes adjacent to another capture are preserved

- `#strip!` removes text from a capture

```admonish info
Predicates and directives are not handled directly by the Tree-sitter C library.
They are just exposed in a structured form so that higher-level code can perform
the filtering. However, higher-level bindings to Tree-sitter like
[the Rust Crate][rust crate]
or the [WebAssembly binding][wasm binding]
do implement a few common predicates like those explained above. In the future, more "standard" predicates and directives
may be added.
```

[cgo]: https://pkg.go.dev/cmd/cgo
[rust crate]: https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_rust
[wasm binding]: https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_web



================================================
FILE: docs/src/using-parsers/queries/4-api.md
================================================
# The Query API

Create a query by specifying a string containing one or more patterns:

```c
TSQuery *ts_query_new(
  const TSLanguage *language,
  const char *source,
  uint32_t source_len,
  uint32_t *error_offset,
  TSQueryError *error_type
);
```

If there is an error in the query, then the `error_offset` argument will be set to the byte offset of the error, and the
`error_type` argument will be set to a value that indicates the type of error:

```c
typedef enum {
  TSQueryErrorNone = 0,
  TSQueryErrorSyntax,
  TSQueryErrorNodeType,
  TSQueryErrorField,
  TSQueryErrorCapture,
} TSQueryError;
```

The `TSQuery` value is immutable and can be safely shared between threads. To execute the query, create a `TSQueryCursor`,
which carries the state needed for processing the queries. The query cursor should not be shared between threads, but can
be reused for many query executions.

```c
TSQueryCursor *ts_query_cursor_new(void);
```

You can then execute the query on a given syntax node:

```c
void ts_query_cursor_exec(TSQueryCursor *, const TSQuery *, TSNode);
```

You can then iterate over the matches:

```c
typedef struct {
  TSNode node;
  uint32_t index;
} TSQueryCapture;

typedef struct {
  uint32_t id;
  uint16_t pattern_index;
  uint16_t capture_count;
  const TSQueryCapture *captures;
} TSQueryMatch;

bool ts_query_cursor_next_match(TSQueryCursor *, TSQueryMatch *match);
```

This function will return `false` when there are no more matches. Otherwise, it will populate the `match` with data about
which pattern matched and which nodes were captured.



================================================
FILE: docs/src/using-parsers/queries/index.md
================================================
# Pattern Matching with Queries

Code analysis often requires finding specific patterns in source code. Tree-sitter provides a simple pattern-matching
language for this purpose, similar to what's used in its [unit test system][unit testing].
This allows you to express and search for code structures without writing complex parsing logic.

[unit testing]: ../../creating-parsers/5-writing-tests.md



================================================
FILE: lib/README.md
================================================
## Subdirectories

* [`src`](./src) - C source code for the Tree-sitter library
* [`include`](./include) - C headers for the Tree-sitter library
* [`binding_rust`](./binding_rust) - Rust bindings to the Tree-sitter library
* [`binding_web`](./binding_web) - JavaScript bindings to the Tree-sitter library, using WebAssembly



================================================
FILE: lib/Cargo.toml
================================================
[package]
name = "tree-sitter"
version.workspace = true
description = "Rust bindings to the Tree-sitter parsing library"
authors.workspace = true
edition.workspace = true
rust-version = "1.76"
readme = "binding_rust/README.md"
homepage.workspace = true
repository.workspace = true
documentation = "https://docs.rs/tree-sitter"
license.workspace = true
keywords.workspace = true
categories = [
  "api-bindings",
  "external-ffi-bindings",
  "parsing",
  "text-editors",
]

build = "binding_rust/build.rs"
links = "tree-sitter"

include = [
  "/binding_rust/*",
  "/Cargo.toml",
  "/src/*.h",
  "/src/*.c",
  "/src/portable/*",
  "/src/unicode/*",
  "/src/wasm/*",
  "/include/tree_sitter/api.h",
]

[package.metadata.docs.rs]
all-features = true
rustdoc-args = ["--cfg", "docsrs"]
targets = ["x86_64-unknown-linux-gnu", "x86_64-pc-windows-gnu"]

[lints]
workspace = true

[features]
default = ["std"]
std = ["regex/std", "regex/perf", "regex-syntax/unicode"]
wasm = ["std", "wasmtime-c-api"]

[dependencies]
regex = { version = "1.11.1", default-features = false, features = ["unicode"] }
regex-syntax = { version = "0.8.5", default-features = false }
tree-sitter-language.workspace = true
streaming-iterator = "0.1.9"

[dependencies.wasmtime-c-api]
version = "29.0.1"
optional = true
package = "wasmtime-c-api-impl"
default-features = false
features = ["cranelift", "gc-drc"]

[build-dependencies]
bindgen = { version = "0.72.0", optional = true }
cc.workspace = true
serde_json.workspace = true

[lib]
path = "binding_rust/lib.rs"



================================================
FILE: lib/CMakeLists.txt
================================================
cmake_minimum_required(VERSION 3.13)

project(tree-sitter
        VERSION "0.26.0"
        DESCRIPTION "An incremental parsing system for programming tools"
        HOMEPAGE_URL "https://tree-sitter.github.io/tree-sitter/"
        LANGUAGES C)

option(BUILD_SHARED_LIBS "Build using shared libraries" ON)
option(TREE_SITTER_FEATURE_WASM "Enable the Wasm feature" OFF)
option(AMALGAMATED "Build using an amalgamated source" OFF)

if(AMALGAMATED)
  set(TS_SOURCE_FILES "${PROJECT_SOURCE_DIR}/src/lib.c")
else()
  file(GLOB TS_SOURCE_FILES src/*.c)
  list(REMOVE_ITEM TS_SOURCE_FILES "${PROJECT_SOURCE_DIR}/src/lib.c")
endif()

add_library(tree-sitter ${TS_SOURCE_FILES})

target_include_directories(tree-sitter PRIVATE src src/wasm include)

if(MSVC)
  target_compile_options(tree-sitter PRIVATE
                         /wd4018 # disable 'signed/unsigned mismatch'
                         /wd4232 # disable 'nonstandard extension used'
                         /wd4244 # disable 'possible loss of data'
                         /wd4267 # disable 'possible loss of data (size_t)'
                         /wd4701 # disable 'potentially uninitialized local variable'
                         /we4022 # treat 'incompatible types' as an error
                         /W4)
else()
  target_compile_options(tree-sitter PRIVATE
                         -Wall -Wextra -Wshadow -Wpedantic
                         -Werror=incompatible-pointer-types)
endif()

if(TREE_SITTER_FEATURE_WASM)
  if(NOT DEFINED CACHE{WASMTIME_INCLUDE_DIR})
    message(CHECK_START "Looking for wasmtime headers")
    find_path(WASMTIME_INCLUDE_DIR wasmtime.h
              PATHS ENV DEP_WASMTIME_C_API_INCLUDE)
    if(NOT WASMTIME_INCLUDE_DIR)
      unset(WASMTIME_INCLUDE_DIR CACHE)
      message(FATAL_ERROR "Could not find wasmtime headers.\nDid you forget to set CMAKE_INCLUDE_PATH?")
    endif()
    message(CHECK_PASS "found")
  endif()

  if(NOT DEFINED CACHE{WASMTIME_LIBRARY})
    message(CHECK_START "Looking for wasmtime library")
    find_library(WASMTIME_LIBRARY wasmtime)
    if(NOT WASMTIME_LIBRARY)
      unset(WASMTIME_LIBRARY CACHE)
      message(FATAL_ERROR "Could not find wasmtime library.\nDid you forget to set CMAKE_LIBRARY_PATH?")
    endif()
    message(CHECK_PASS "found")
  endif()

  target_compile_definitions(tree-sitter PUBLIC TREE_SITTER_FEATURE_WASM)
  target_include_directories(tree-sitter SYSTEM PRIVATE "${WASMTIME_INCLUDE_DIR}")
  target_link_libraries(tree-sitter PUBLIC "${WASMTIME_LIBRARY}")
  set_property(TARGET tree-sitter PROPERTY C_STANDARD_REQUIRED ON)

  if(NOT BUILD_SHARED_LIBS)
    if(WIN32)
      target_compile_definitions(tree-sitter PRIVATE WASM_API_EXTERN= WASI_API_EXTERN=)
      target_link_libraries(tree-sitter INTERFACE ws2_32 advapi32 userenv ntdll shell32 ole32 bcrypt)
    elseif(NOT APPLE)
      target_link_libraries(tree-sitter INTERFACE pthread dl m)
    endif()
  endif()
endif()

set_target_properties(tree-sitter
                      PROPERTIES
                      C_STANDARD 11
                      C_VISIBILITY_PRESET hidden
                      POSITION_INDEPENDENT_CODE ON
                      SOVERSION "${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}"
                      DEFINE_SYMBOL "")

target_compile_definitions(tree-sitter PRIVATE _POSIX_C_SOURCE=200112L _DEFAULT_SOURCE)

include(GNUInstallDirs)

configure_file(tree-sitter.pc.in "${CMAKE_CURRENT_BINARY_DIR}/tree-sitter.pc" @ONLY)

install(FILES include/tree_sitter/api.h
        DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/tree_sitter")
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/tree-sitter.pc"
        DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig")
install(TARGETS tree-sitter
        LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}")



================================================
FILE: lib/tree-sitter.pc.in
================================================
prefix=@CMAKE_INSTALL_PREFIX@
libdir=${prefix}/@CMAKE_INSTALL_LIBDIR@
includedir=${prefix}/@CMAKE_INSTALL_INCLUDEDIR@

Name: tree-sitter
Description: @PROJECT_DESCRIPTION@
URL: @PROJECT_HOMEPAGE_URL@
Version: @PROJECT_VERSION@
Libs: -L${libdir} -ltree-sitter
Cflags: -I${includedir}



================================================
FILE: lib/.ccls
================================================
-std=c99
-Isrc
-Iinclude
-Iutf8proc



================================================
FILE: lib/binding_rust/README.md
================================================
# Rust Tree-sitter

[![crates.io badge]][crates.io]

[crates.io]: https://crates.io/crates/tree-sitter
[crates.io badge]: https://img.shields.io/crates/v/tree-sitter.svg?color=%23B48723

Rust bindings to the [Tree-sitter][] parsing library.

## Basic Usage

First, create a parser:

```rust
use tree_sitter::{InputEdit, Language, Parser, Point};

let mut parser = Parser::new();
```

Then, add a language as a dependency:

```toml
[dependencies]
tree-sitter = "0.24"
tree-sitter-rust = "0.23"
```

To use a language, you assign them to the parser.

```rust
parser.set_language(&tree_sitter_rust::LANGUAGE.into()).expect("Error loading Rust grammar");
```

Now you can parse source code:

```rust
let source_code = "fn test() {}";
let mut tree = parser.parse(source_code, None).unwrap();
let root_node = tree.root_node();

assert_eq!(root_node.kind(), "source_file");
assert_eq!(root_node.start_position().column, 0);
assert_eq!(root_node.end_position().column, 12);
```

### Editing

Once you have a syntax tree, you can update it when your source code changes.
Passing in the previous edited tree makes `parse` run much more quickly:

```rust
let new_source_code = "fn test(a: u32) {}";

tree.edit(&InputEdit {
  start_byte: 8,
  old_end_byte: 8,
  new_end_byte: 14,
  start_position: Point::new(0, 8),
  old_end_position: Point::new(0, 8),
  new_end_position: Point::new(0, 14),
});

let new_tree = parser.parse(new_source_code, Some(&tree));
```

### Text Input

The source code to parse can be provided either as a string, a slice, a vector,
or as a function that returns a slice. The text can be encoded as either UTF8 or UTF16:

```rust
// Store some source code in an array of lines.
let lines = &[
    "pub fn foo() {",
    "  1",
    "}",
];

// Parse the source code using a custom callback. The callback is called
// with both a byte offset and a row/column offset.
let tree = parser.parse_with(&mut |_byte: usize, position: Point| -> &[u8] {
    let row = position.row as usize;
    let column = position.column as usize;
    if row < lines.len() {
        if column < lines[row].as_bytes().len() {
            &lines[row].as_bytes()[column..]
        } else {
            b"\n"
        }
    } else {
        &[]
    }
}, None).unwrap();

assert_eq!(
  tree.root_node().to_sexp(),
  "(source_file (function_item (visibility_modifier) (identifier) (parameters) (block (number_literal))))"
);
```

## Using WASM Grammar Files

> Requires the feature **wasm** to be enabled.

First, create a parser with a WASM store:

```rust
use tree_sitter::{wasmtime::Engine, Parser, WasmStore};

let engine = Engine::default();
let store = WasmStore::new(&engine).unwrap();

let mut parser = Parser::new();
parser.set_wasm_store(store).unwrap();
```

Then, load the language from a WASM file:

```rust
const JAVASCRIPT_GRAMMAR: &[u8] = include_bytes!("path/to/tree-sitter-javascript.wasm");

let mut store = WasmStore::new(&engine).unwrap();
let javascript = store
    .load_language("javascript", JAVASCRIPT_GRAMMAR)
    .unwrap();

// The language may be loaded from a different WasmStore than the one set on
// the parser but it must use the same underlying WasmEngine.
parser.set_language(&javascript).unwrap();
```

Now you can parse source code:

```rust
let source_code = "let x = 1;";
let tree = parser.parse(source_code, None).unwrap();

assert_eq!(
    tree.root_node().to_sexp(),
    "(program (lexical_declaration (variable_declarator name: (identifier) value: (number))))"
);
```

[tree-sitter]: https://github.com/tree-sitter/tree-sitter

## Features

- **std** - This feature is enabled by default and allows `tree-sitter` to use the standard library.
  - Error types implement the `std::error:Error` trait.
  - `regex` performance optimizations are enabled.
  - The DOT graph methods are enabled.
- **wasm** - This feature allows `tree-sitter` to be built for Wasm targets using the `wasmtime-c-api` crate.



================================================
FILE: lib/binding_rust/build.rs
================================================
use std::{env, fs, path::PathBuf};

fn main() {
    let out_dir = PathBuf::from(env::var("OUT_DIR").unwrap());

    #[cfg(feature = "bindgen")]
    generate_bindings(&out_dir);

    fs::copy(
        "src/wasm/stdlib-symbols.txt",
        out_dir.join("stdlib-symbols.txt"),
    )
    .unwrap();

    let mut config = cc::Build::new();

    println!("cargo:rerun-if-env-changed=CARGO_FEATURE_WASM");
    if env::var("CARGO_FEATURE_WASM").is_ok() {
        config
            .define("TREE_SITTER_FEATURE_WASM", "")
            .define("static_assert(...)", "")
            .include(env::var("DEP_WASMTIME_C_API_INCLUDE").unwrap());
    }

    let manifest_path = PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap());
    let include_path = manifest_path.join("include");
    let src_path = manifest_path.join("src");
    let wasm_path = src_path.join("wasm");
    for entry in fs::read_dir(&src_path).unwrap() {
        let entry = entry.unwrap();
        let path = src_path.join(entry.file_name());
        println!("cargo:rerun-if-changed={}", path.to_str().unwrap());
    }

    config
        .flag_if_supported("-std=c11")
        .flag_if_supported("-fvisibility=hidden")
        .flag_if_supported("-Wshadow")
        .flag_if_supported("-Wno-unused-parameter")
        .flag_if_supported("-Wno-incompatible-pointer-types")
        .include(&src_path)
        .include(&wasm_path)
        .include(&include_path)
        .define("_POSIX_C_SOURCE", "200112L")
        .define("_DEFAULT_SOURCE", None)
        .warnings(false)
        .file(src_path.join("lib.c"))
        .compile("tree-sitter");

    println!("cargo:include={}", include_path.display());
}

#[cfg(feature = "bindgen")]
fn generate_bindings(out_dir: &std::path::Path) {
    use std::str::FromStr;

    use bindgen::RustTarget;

    const HEADER_PATH: &str = "include/tree_sitter/api.h";

    println!("cargo:rerun-if-changed={HEADER_PATH}");

    let no_copy = [
        "TSInput",
        "TSLanguage",
        "TSLogger",
        "TSLookaheadIterator",
        "TSParser",
        "TSTree",
        "TSQuery",
        "TSQueryCursor",
        "TSQueryCapture",
        "TSQueryMatch",
        "TSQueryPredicateStep",
    ];

    let rust_version = env!("CARGO_PKG_RUST_VERSION");

    let bindings = bindgen::Builder::default()
        .header(HEADER_PATH)
        .layout_tests(false)
        .allowlist_type("^TS.*")
        .allowlist_function("^ts_.*")
        .allowlist_var("^TREE_SITTER.*")
        .no_copy(no_copy.join("|"))
        .prepend_enum_name(false)
        .use_core()
        .clang_arg("-D TREE_SITTER_FEATURE_WASM")
        .rust_target(RustTarget::from_str(rust_version).unwrap())
        .generate()
        .expect("Failed to generate bindings");

    let bindings_rs = out_dir.join("bindings.rs");
    bindings.write_to_file(&bindings_rs).unwrap_or_else(|_| {
        panic!(
            "Failed to write bindings into path: {}",
            bindings_rs.display()
        )
    });
}



================================================
FILE: lib/binding_rust/ffi.rs
================================================
#![allow(dead_code)]
#![allow(non_upper_case_globals)]
#![allow(non_camel_case_types)]
#![allow(clippy::missing_const_for_fn)]

#[cfg(feature = "bindgen")]
include!(concat!(env!("OUT_DIR"), "/bindings.rs"));

#[cfg(not(feature = "bindgen"))]
include!("./bindings.rs");

#[cfg(unix)]
#[cfg(feature = "std")]
extern "C" {
    pub(crate) fn _ts_dup(fd: std::os::raw::c_int) -> std::os::raw::c_int;
}

#[cfg(windows)]
#[cfg(feature = "std")]
extern "C" {
    pub(crate) fn _ts_dup(handle: *mut std::os::raw::c_void) -> std::os::raw::c_int;
}

use core::{marker::PhantomData, mem::ManuallyDrop, ptr::NonNull, str};

use crate::{
    Language, LookaheadIterator, Node, ParseState, Parser, Query, QueryCursor, QueryCursorState,
    QueryError, Tree, TreeCursor,
};

impl Language {
    /// Reconstructs a [`Language`] from a raw pointer.
    ///
    /// # Safety
    ///
    /// `ptr` must be non-null.
    #[must_use]
    pub const unsafe fn from_raw(ptr: *const TSLanguage) -> Self {
        Self(ptr)
    }

    /// Consumes the [`Language`], returning a raw pointer to the underlying C structure.
    #[must_use]
    pub fn into_raw(self) -> *const TSLanguage {
        ManuallyDrop::new(self).0
    }
}

impl Parser {
    /// Reconstructs a [`Parser`] from a raw pointer.
    ///
    /// # Safety
    ///
    /// `ptr` must be non-null.
    #[must_use]
    pub const unsafe fn from_raw(ptr: *mut TSParser) -> Self {
        Self(NonNull::new_unchecked(ptr))
    }

    /// Consumes the [`Parser`], returning a raw pointer to the underlying C structure.
    ///
    /// # Safety
    ///
    /// It's a caller responsibility to adjust parser's state
    /// like disable logging or dot graphs printing if this
    /// may cause issues like use after free.
    #[must_use]
    pub fn into_raw(self) -> *mut TSParser {
        ManuallyDrop::new(self).0.as_ptr()
    }
}

impl ParseState {
    /// Reconstructs a [`ParseState`] from a raw pointer
    ///
    /// # Safety
    ///
    /// `ptr` must be non-null.
    #[must_use]
    pub const unsafe fn from_raw(ptr: *mut TSParseState) -> Self {
        Self(NonNull::new_unchecked(ptr))
    }

    /// Consumes the [`ParseState`], returning a raw pointer to the underlying C structure.
    #[must_use]
    pub fn into_raw(self) -> *mut TSParseState {
        ManuallyDrop::new(self).0.as_ptr()
    }
}

impl Tree {
    /// Reconstructs a [`Tree`] from a raw pointer.
    ///
    /// # Safety
    ///
    /// `ptr` must be non-null.
    #[must_use]
    pub const unsafe fn from_raw(ptr: *mut TSTree) -> Self {
        Self(NonNull::new_unchecked(ptr))
    }

    /// Consumes the [`Tree`], returning a raw pointer to the underlying C structure.
    #[must_use]
    pub fn into_raw(self) -> *mut TSTree {
        ManuallyDrop::new(self).0.as_ptr()
    }
}

impl Node<'_> {
    /// Reconstructs a [`Node`] from a raw pointer.
    ///
    /// # Safety
    ///
    /// `ptr` must be non-null.
    #[must_use]
    pub const unsafe fn from_raw(raw: TSNode) -> Self {
        Self(raw, PhantomData)
    }

    /// Consumes the [`Node`], returning a raw pointer to the underlying C structure.
    #[must_use]
    pub fn into_raw(self) -> TSNode {
        ManuallyDrop::new(self).0
    }
}

impl TreeCursor<'_> {
    /// Reconstructs a [`TreeCursor`] from a raw pointer.
    ///
    /// # Safety
    ///
    /// `ptr` must be non-null.
    #[must_use]
    pub const unsafe fn from_raw(raw: TSTreeCursor) -> Self {
        Self(raw, PhantomData)
    }

    /// Consumes the [`TreeCursor`], returning a raw pointer to the underlying C structure.
    #[must_use]
    pub fn into_raw(self) -> TSTreeCursor {
        ManuallyDrop::new(self).0
    }
}

impl Query {
    /// Reconstructs a [`Query`] from a raw pointer.
    ///
    /// # Safety
    ///
    /// `ptr` must be non-null.
    pub unsafe fn from_raw(ptr: *mut TSQuery, source: &str) -> Result<Self, QueryError> {
        Self::from_raw_parts(ptr, source)
    }

    /// Consumes the [`Query`], returning a raw pointer to the underlying C structure.
    #[must_use]
    pub fn into_raw(self) -> *mut TSQuery {
        ManuallyDrop::new(self).ptr.as_ptr()
    }
}

impl QueryCursor {
    /// Reconstructs a [`QueryCursor`] from a raw pointer.
    ///
    /// # Safety
    ///
    /// `ptr` must be non-null.
    #[must_use]
    pub const unsafe fn from_raw(ptr: *mut TSQueryCursor) -> Self {
        Self {
            ptr: NonNull::new_unchecked(ptr),
        }
    }

    /// Consumes the [`QueryCursor`], returning a raw pointer to the underlying C structure.
    #[must_use]
    pub fn into_raw(self) -> *mut TSQueryCursor {
        ManuallyDrop::new(self).ptr.as_ptr()
    }
}

impl QueryCursorState {
    /// Reconstructs a [`QueryCursorState`] from a raw pointer.
    ///
    /// # Safety
    ///
    /// `ptr` must be non-null.
    #[must_use]
    pub const unsafe fn from_raw(ptr: *mut TSQueryCursorState) -> Self {
        Self(NonNull::new_unchecked(ptr))
    }

    /// Consumes the [`QueryCursorState`], returning a raw pointer to the underlying C structure.
    #[must_use]
    pub fn into_raw(self) -> *mut TSQueryCursorState {
        ManuallyDrop::new(self).0.as_ptr()
    }
}

impl LookaheadIterator {
    /// Reconstructs a [`LookaheadIterator`] from a raw pointer.
    ///
    /// # Safety
    ///
    /// `ptr` must be non-null.
    #[must_use]
    pub const unsafe fn from_raw(ptr: *mut TSLookaheadIterator) -> Self {
        Self(NonNull::new_unchecked(ptr))
    }

    /// Consumes the [`LookaheadIterator`], returning a raw pointer to the underlying C structure.
    #[must_use]
    pub fn into_raw(self) -> *mut TSLookaheadIterator {
        ManuallyDrop::new(self).0.as_ptr()
    }
}



================================================
FILE: lib/binding_rust/util.rs
================================================
use core::ffi::c_void;

use super::FREE_FN;

/// A raw pointer and a length, exposed as an iterator.
pub struct CBufferIter<T> {
    ptr: *mut T,
    count: usize,
    i: usize,
}

impl<T> CBufferIter<T> {
    pub const unsafe fn new(ptr: *mut T, count: usize) -> Self {
        Self { ptr, count, i: 0 }
    }
}

impl<T: Copy> Iterator for CBufferIter<T> {
    type Item = T;

    fn next(&mut self) -> Option<Self::Item> {
        let i = self.i;
        if i >= self.count {
            None
        } else {
            self.i += 1;
            Some(unsafe { *self.ptr.add(i) })
        }
    }

    fn size_hint(&self) -> (usize, Option<usize>) {
        let remaining = self.count - self.i;
        (remaining, Some(remaining))
    }
}

impl<T: Copy> ExactSizeIterator for CBufferIter<T> {}

impl<T> Drop for CBufferIter<T> {
    fn drop(&mut self) {
        if !self.ptr.is_null() {
            unsafe { (FREE_FN)(self.ptr.cast::<c_void>()) };
        }
    }
}



================================================
FILE: lib/binding_rust/wasm_language.rs
================================================
use std::{
    error,
    ffi::{CStr, CString},
    fmt,
    mem::{self, MaybeUninit},
    os::raw::c_char,
};

pub use wasmtime_c_api::wasmtime;

use crate::{ffi, Language, LanguageError, Parser, FREE_FN};

// Force Cargo to include wasmtime-c-api as a dependency of this crate,
// even though it is only used by the C code.
#[allow(unused)]
fn _use_wasmtime() {
    wasmtime_c_api::wasm_engine_new();
}

#[repr(C)]
#[derive(Clone)]
#[allow(non_camel_case_types)]
pub struct wasm_engine_t {
    pub(crate) engine: wasmtime::Engine,
}

pub struct WasmStore(*mut ffi::TSWasmStore);

unsafe impl Send for WasmStore {}
unsafe impl Sync for WasmStore {}

#[derive(Debug, PartialEq, Eq)]
pub struct WasmError {
    pub kind: WasmErrorKind,
    pub message: String,
}

#[derive(Debug, PartialEq, Eq)]
pub enum WasmErrorKind {
    Parse,
    Compile,
    Instantiate,
    Other,
}

impl WasmStore {
    pub fn new(engine: &wasmtime::Engine) -> Result<Self, WasmError> {
        unsafe {
            let mut error = MaybeUninit::<ffi::TSWasmError>::uninit();
            let store = ffi::ts_wasm_store_new(
                std::ptr::from_ref::<wasmtime::Engine>(engine)
                    .cast_mut()
                    .cast(),
                error.as_mut_ptr(),
            );
            if store.is_null() {
                Err(WasmError::new(error.assume_init()))
            } else {
                Ok(Self(store))
            }
        }
    }

    pub fn load_language(&mut self, name: &str, bytes: &[u8]) -> Result<Language, WasmError> {
        let name = CString::new(name).unwrap();
        unsafe {
            let mut error = MaybeUninit::<ffi::TSWasmError>::uninit();
            let language = ffi::ts_wasm_store_load_language(
                self.0,
                name.as_ptr(),
                bytes.as_ptr().cast::<c_char>(),
                bytes.len() as u32,
                error.as_mut_ptr(),
            );
            if language.is_null() {
                Err(WasmError::new(error.assume_init()))
            } else {
                Ok(Language(language))
            }
        }
    }

    #[must_use]
    pub fn language_count(&self) -> usize {
        unsafe { ffi::ts_wasm_store_language_count(self.0) }
    }
}

impl WasmError {
    unsafe fn new(error: ffi::TSWasmError) -> Self {
        let message = CStr::from_ptr(error.message).to_str().unwrap().to_string();
        (FREE_FN)(error.message.cast());
        Self {
            kind: match error.kind {
                ffi::TSWasmErrorKindParse => WasmErrorKind::Parse,
                ffi::TSWasmErrorKindCompile => WasmErrorKind::Compile,
                ffi::TSWasmErrorKindInstantiate => WasmErrorKind::Instantiate,
                _ => WasmErrorKind::Other,
            },
            message,
        }
    }
}

impl Language {
    #[must_use]
    pub fn is_wasm(&self) -> bool {
        unsafe { ffi::ts_language_is_wasm(self.0) }
    }
}

impl Parser {
    pub fn set_wasm_store(&mut self, store: WasmStore) -> Result<(), LanguageError> {
        unsafe { ffi::ts_parser_set_wasm_store(self.0.as_ptr(), store.0) };
        mem::forget(store);
        Ok(())
    }

    pub fn take_wasm_store(&mut self) -> Option<WasmStore> {
        let ptr = unsafe { ffi::ts_parser_take_wasm_store(self.0.as_ptr()) };
        if ptr.is_null() {
            None
        } else {
            Some(WasmStore(ptr))
        }
    }
}

impl Drop for WasmStore {
    fn drop(&mut self) {
        unsafe { ffi::ts_wasm_store_delete(self.0) };
    }
}

impl fmt::Display for WasmError {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        let kind = match self.kind {
            WasmErrorKind::Parse => "Failed to parse wasm",
            WasmErrorKind::Compile => "Failed to compile wasm",
            WasmErrorKind::Instantiate => "Failed to instantiate wasm module",
            WasmErrorKind::Other => "Unknown error",
        };
        write!(f, "{kind}: {}", self.message)
    }
}

impl error::Error for WasmError {}



================================================
FILE: lib/binding_web/README.md
================================================
# Web Tree-sitter

[![npmjs.com badge]][npmjs.com]

[npmjs.com]: https://www.npmjs.org/package/web-tree-sitter
[npmjs.com badge]: https://img.shields.io/npm/v/web-tree-sitter.svg?color=%23BF4A4A

WebAssembly bindings to the [Tree-sitter](https://github.com/tree-sitter/tree-sitter) parsing library.

## Setup

You can download the `web-tree-sitter.js` and `web-tree-sitter.wasm` files from [the latest GitHub release][gh release] and load
them using a standalone script:

```html
<script src="/the/path/to/web-tree-sitter.js"></script>

<script>
  const { Parser } = window.TreeSitter;
  Parser.init().then(() => { /* the library is ready */ });
</script>
```

You can also install [the `web-tree-sitter` module][npm module] from NPM and load it using a system like Webpack:

```js
const { Parser } = require('web-tree-sitter');
Parser.init().then(() => { /* the library is ready */ });
```

or Vite:

```js
import { Parser }  from 'web-tree-sitter';
Parser.init().then(() => { /* the library is ready */ });
```

With Vite, you also need to make sure your server provides the `tree-sitter.wasm`
file to your `public` directory. You can do this automatically with a `postinstall`
[script](https://docs.npmjs.com/cli/v10/using-npm/scripts) in your `package.json`:

```js
"postinstall": "cp node_modules/web-tree-sitter/tree-sitter.wasm public"
```

You can also use this module with [deno](https://deno.land/):

```js
import Parser from "npm:web-tree-sitter";
await Parser.init();
// the library is ready
```

To use the debug version of the library, replace your import of `web-tree-sitter` with `web-tree-sitter/debug`:

```js
import { Parser } from 'web-tree-sitter/debug'; // or require('web-tree-sitter/debug')

Parser.init().then(() => { /* the library is ready */ });
```

This will load the debug version of the `.js` and `.wasm` file, which includes debug symbols and assertions.

> [!NOTE]
> The `web-tree-sitter.js` file on GH releases is an ES6 module. If you are interested in using a pure CommonJS library, such
> as for Electron, you should use the `web-tree-sitter.cjs` file instead.

### Basic Usage

First, create a parser:

```js
const parser = new Parser();
```

Then assign a language to the parser. Tree-sitter languages are packaged as individual `.wasm` files (more on this below):

```js
const { Language } = require('web-tree-sitter');
const JavaScript = await Language.load('/path/to/tree-sitter-javascript.wasm');
parser.setLanguage(JavaScript);
```

Now you can parse source code:

```js
const sourceCode = 'let x = 1; console.log(x);';
const tree = parser.parse(sourceCode);
```

and inspect the syntax tree.

```javascript
console.log(tree.rootNode.toString());

// (program
//   (lexical_declaration
//     (variable_declarator (identifier) (number)))
//   (expression_statement
//     (call_expression
//       (member_expression (identifier) (property_identifier))
//       (arguments (identifier)))))

const callExpression = tree.rootNode.child(1).firstChild;
console.log(callExpression);

// { type: 'call_expression',
//   startPosition: {row: 0, column: 16},
//   endPosition: {row: 0, column: 30},
//   startIndex: 0,
//   endIndex: 30 }
```

### Editing

If your source code *changes*, you can update the syntax tree. This will take less time than the first parse.

```javascript
// Replace 'let' with 'const'
const newSourceCode = 'const x = 1; console.log(x);';

tree.edit({
  startIndex: 0,
  oldEndIndex: 3,
  newEndIndex: 5,
  startPosition: {row: 0, column: 0},
  oldEndPosition: {row: 0, column: 3},
  newEndPosition: {row: 0, column: 5},
});

const newTree = parser.parse(newSourceCode, tree);
```

### Parsing Text From a Custom Data Structure

If your text is stored in a data structure other than a single string, you can parse it by supplying a callback to `parse`
instead of a string:

```javascript
const sourceLines = [
  'let x = 1;',
  'console.log(x);'
];

const tree = parser.parse((index, position) => {
  let line = sourceLines[position.row];
  if (line) return line.slice(position.column);
});
```

### Getting the `.wasm` language files

There are several options on how to get the `.wasm` files for the languages you want to parse.

#### From npmjs.com

The recommended way is to just install the package from npm. For example, to parse JavaScript, you can install the `tree-sitter-javascript`
package:

```sh
npm install tree-sitter-javascript
```

Then you can find the `.wasm` file in the `node_modules/tree-sitter-javascript` directory.

#### From GitHub

You can also download the `.wasm` files from GitHub releases, so long as the repository uses our reusable workflow to publish
them.
For example, you can download the JavaScript `.wasm` file from the tree-sitter-javascript [releases page][gh release js].

#### Generating `.wasm` files

You can also generate the `.wasm` file for your desired grammar. Shown below is an example of how to generate the `.wasm`
file for the JavaScript grammar.

**IMPORTANT**: [Emscripten][emscripten], [Docker][docker], or [Podman][podman] need to be installed.

First install `tree-sitter-cli`, and the tree-sitter language for which to generate `.wasm`
(`tree-sitter-javascript` in this example):

```sh
npm install --save-dev tree-sitter-cli tree-sitter-javascript
```

Then just use tree-sitter cli tool to generate the `.wasm`.

```sh
npx tree-sitter build --wasm node_modules/tree-sitter-javascript
```

If everything is fine, file `tree-sitter-javascript.wasm` should be generated in current directory.

### Running .wasm in Node.js

Notice that executing `.wasm` files in Node.js is considerably slower than running [Node.js bindings][node bindings].
However, this could be useful for testing purposes:

```javascript
const Parser = require('web-tree-sitter');

(async () => {
  await Parser.init();
  const parser = new Parser();
  const Lang = await Parser.Language.load('tree-sitter-javascript.wasm');
  parser.setLanguage(Lang);
  const tree = parser.parse('let x = 1;');
  console.log(tree.rootNode.toString());
})();
```

### Running .wasm in browser

`web-tree-sitter` can run in the browser, but there are some common pitfalls.

#### Loading the .wasm file

`web-tree-sitter` needs to load the `tree-sitter.wasm` file. By default, it assumes that this file is available in the
same path as the JavaScript code. Therefore, if the code is being served from `http://localhost:3000/bundle.js`, then
the wasm file should be at `http://localhost:3000/tree-sitter.wasm`.

For server side frameworks like NextJS, this can be tricky as pages are often served from a path such as
`http://localhost:3000/_next/static/chunks/pages/index.js`. The loader will therefore look for the wasm file at
`http://localhost:3000/_next/static/chunks/pages/tree-sitter.wasm`. The solution is to pass a `locateFile` function in
the `moduleOptions` argument to `Parser.init()`:

```javascript
await Parser.init({
  locateFile(scriptName: string, scriptDirectory: string) {
    return scriptName;
  },
});
```

`locateFile` takes in two parameters, `scriptName`, i.e. the wasm file name, and `scriptDirectory`, i.e. the directory
where the loader expects the script to be. It returns the path where the loader will look for the wasm file. In the NextJS
case, we want to return just the `scriptName` so that the loader will look at `http://localhost:3000/tree-sitter.wasm`
and not `http://localhost:3000/_next/static/chunks/pages/tree-sitter.wasm`.

For more information on the module options you can pass in, see the [emscripten documentation][emscripten-module-options].

#### "Can't resolve 'fs' in 'node_modules/web-tree-sitter"

Most bundlers will notice that the `web-tree-sitter.js` file is attempting to import `fs`, i.e. node's file system library.
Since this doesn't exist in the browser, the bundlers will get confused. For Webpack, you can fix this by adding the
following to your webpack config:

```javascript
{
  resolve: {
    fallback: {
      fs: false
    }
  }
}
```

[docker]: https://www.docker.com
[emscripten]: https://emscripten.org
[emscripten-module-options]: https://emscripten.org/docs/api_reference/module.html#affecting-execution
[gh release]: https://github.com/tree-sitter/tree-sitter/releases/latest
[gh release js]: https://github.com/tree-sitter/tree-sitter-javascript/releases/latest
[node bindings]: https://github.com/tree-sitter/node-tree-sitter
[npm module]: https://www.npmjs.com/package/web-tree-sitter
[podman]: https://podman.io



================================================
FILE: lib/binding_web/CONTRIBUTING.md
================================================
# Contributing

## Code of Conduct

Contributors to Tree-sitter should abide by the [Contributor Covenant][covenant].

## Developing Web-tree-sitter

### Prerequisites

To make changes to Web-tree-sitter, you should have:

1. A [Rust toolchain][rust], for running the xtasks necessary to build the library.
2. Node.js and NPM (or an equivalent package manager).
3. Either [Emscripten][emscripten], [Docker][docker], or [podman][podman] for
compiling the library to WASM.

### Building

Clone the repository:

```sh
git clone https://github.com/tree-sitter/tree-sitter
cd tree-sitter/lib/binding_web
```

Install the necessary dependencies:

```sh
npm install
```

Build the library:

```sh
npm run build
```

Note that the build process requires a Rust toolchain to be installed. If you don't have one installed, you can install it
by visiting the [Rust website][rust] and following the instructions there.

> [!NOTE]
> By default, the build process will emit an ES6 module. If you need a CommonJS module, export `CJS` to `true`, or just
> run `CJS=true npm run build` (or the equivalent command for Windows).

> [!TIP]
> To build the library with debug information, you can run `npm run build:debug`. The `CJS` environment variable is still
> taken into account.

### Putting it together

#### The C side

There are several components that come together to build the final JS and WASM files. First, we use `emscripten` in our
xtask located at `xtask/src/build_wasm.rs` from the root directory to compile the WASM files. This WASM module is output into the
local `lib` folder, and is used only in [`src/bindings.ts`][bindings.ts] to handle loading the WASM module. The C code that
is compiled into the WASM module is located in at [`lib/tree-sitter.c`][tree-sitter.c], and contains all the necessary
glue code to interact with the JS environment. If you need to update the imported functions from the tree-sitter library,
or anywhere else, you must update [`lib/exports.txt`][exports.txt]. Lastly, the type information for the WASM module is
located at [`lib/tree-sitter.d.ts`][tree-sitter.d.ts], and can be updated by running `cargo xtask build-wasm --emit-tsd`
from the root directory.

#### The TypeScript side

The TypeScript library is a higher level abstraction over the WASM module, and is located in `src`. This is where the
public API is defined, and where the WASM module is loaded and initialized. The TypeScript library is built into a single
ES6 (or CommonJS) module, and is output into the same directory as `package.json`. If you need to update the public API,
you can do so by editing the files in `src`.

If you make changes to the library that require updating the type definitions, such as adding a new public API method,
you should run:

```sh
npm run build:dts
```

This uses [`dts-buddy`][dts-buddy] to generate `web-tree-sitter.d.ts` from the public types in `src`. Additionally, a sourcemap
is generated for the `.d.ts` file, which enables `go-to definition` and other editor integrations to take you straight
to the TypeScript source code.

This TypeScript code is then compiled into a single JavaScript file with `esbuild`. The build configuration for this can
be found in [`script/build.js`][build.js], but this shouldn't need to be updated. This step is responsible for emitting
the final JS and WASM files that are shipped with the library, as well as their sourcemaps.

### Testing

Before you can run the tests, you need to fetch and build some upstream grammars that are used for testing.
Run this in the root of the repository:

```sh
cargo xtask fetch-fixtures
```

Optionally, to update the generated parser.c files:

```sh
cargo xtask generate-fixtures
```

Then you can build the WASM modules:

```sh
cargo xtask generate-fixtures --wasm
```

Now, you can run the tests. In the `lib/binding_web` directory, run:

```sh
npm test
```

> [!NOTE]
> We use `vitest` to run the tests. If you want to run a specific test, you can use the `-t` flag to pass in a pattern.
> If you want to run a specific file, you can just pass the name of the file as is. For example, to run the `parser` tests
> in `test/parser.test.ts`, you can run `npm test parser`. To run tests that have the name `descendant` somewhere, run
> `npm test -- -t descendant`.
>
> For coverage information, you can run `npm test -- --coverage`.

### Debugging

You might have noticed that when you ran `npm build`, the build process generated a couple of [sourcemaps][sourcemap]:
`web-tree-sitter.js.map` and `web-tree-sitter.wasm.map`. These sourcemaps can be used to debug the library in the browser, and are
shipped with the library on both NPM and the GitHub releases.

#### Tweaking the Emscripten build

If you're trying to tweak the Emscripten build, or are trying to debug an issue, the code for this lies in `xtask/src/build_wasm.rs`
file mentioned earlier, namely in the `run_wasm` function.

[bindings.ts]: src/bindings.ts
[build.js]: script/build.js
[covenant]: https://www.contributor-covenant.org/version/1/4/code-of-conduct
[docker]: https://www.docker.com
[dts-buddy]: https://github.com/Rich-Harris/dts-buddy
[emscripten]: https://emscripten.org
[exports.txt]: lib/exports.txt
[podman]: https://podman.io
[rust]: https://www.rust-lang.org/tools/install
[sourcemap]: https://developer.mozilla.org/en-US/docs/Glossary/Source_map
[tree-sitter.c]: lib/tree-sitter.c
[tree-sitter.d.ts]: lib/tree-sitter.d.ts



================================================
FILE: lib/binding_web/eslint.config.mjs
================================================
import eslint from '@eslint/js';
import tseslint from 'typescript-eslint';

export default tseslint.config(
  eslint.configs.recommended,
  tseslint.configs.recommendedTypeChecked,
  tseslint.configs.strictTypeChecked,
  tseslint.configs.stylisticTypeChecked,
  {
    languageOptions: {
      parserOptions: {
        projectService: true,
        tsconfigRootDir: import.meta.dirname,
      },
    },
    rules: {
      'no-fallthrough': 'off',
      '@typescript-eslint/no-non-null-assertion': 'off',
      '@typescript-eslint/no-unnecessary-condition': ['error', {
        allowConstantLoopConditions: true
      }],
      '@typescript-eslint/restrict-template-expressions': ['error', {
        allowNumber: true
      }],
    }
  },
);



================================================
FILE: lib/binding_web/package.json
================================================
{
  "name": "web-tree-sitter",
  "version": "0.26.0",
  "description": "Tree-sitter bindings for the web",
  "repository": "https://github.com/tree-sitter/tree-sitter",
  "homepage": "https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_web",
  "license": "MIT",
  "author": {
    "name": "Max Brunsfeld",
    "email": "maxbrunsfeld@gmail.com"
  },
  "maintainers": [
    {
      "name": "Amaan Qureshi",
      "email": "amaanq12@gmail.com"
    }
  ],
  "type": "module",
  "exports": {
    ".": {
      "import": {
        "types": "./web-tree-sitter.d.ts",
        "default": "./web-tree-sitter.js"
      },
      "require": {
        "types": "./web-tree-sitter.d.cts",
        "default": "./web-tree-sitter.cjs"
      }
    },
    "./web-tree-sitter.wasm": "./web-tree-sitter.wasm",
    "./debug": {
      "import": {
        "types": "./web-tree-sitter.d.ts",
        "default": "./debug/web-tree-sitter.js"
      },
      "require": {
        "types": "./web-tree-sitter.d.cts",
        "default": "./debug/web-tree-sitter.cjs"
      }
    },
    "./debug/web-tree-sitter.wasm": "./debug/web-tree-sitter.wasm"
  },
  "types": "web-tree-sitter.d.ts",
  "keywords": [
    "incremental",
    "parsing",
    "tree-sitter",
    "wasm"
  ],
  "files": [
    "README.md",
    "web-tree-sitter.cjs",
    "web-tree-sitter.cjs.map",
    "web-tree-sitter.js",
    "web-tree-sitter.js.map",
    "web-tree-sitter.wasm",
    "web-tree-sitter.wasm.map",
    "debug/web-tree-sitter.cjs",
    "debug/web-tree-sitter.cjs.map",
    "debug/web-tree-sitter.js",
    "debug/web-tree-sitter.js.map",
    "debug/web-tree-sitter.wasm",
    "debug/web-tree-sitter.wasm.map",
    "web-tree-sitter.d.ts",
    "web-tree-sitter.d.ts.map",
    "web-tree-sitter.d.cts",
    "web-tree-sitter.d.cts.map",
    "src/**/*.ts",
    "lib/*.c",
    "lib/*.h"
  ],
  "devDependencies": {
    "@eslint/js": "^9.20.0",
    "@types/emscripten": "^1.40.0",
    "@types/node": "^22.13.1",
    "@vitest/coverage-v8": "^3.0.5",
    "dts-buddy": "^0.5.4",
    "esbuild": "^0.25.0",
    "eslint": "^9.20.0",
    "source-map": "^0.7.4",
    "tsx": "^4.19.2",
    "typescript": "^5.7.3",
    "typescript-eslint": "^8.23.0",
    "vitest": "^3.0.5"
  },
  "scripts": {
    "build:ts": "node script/build.js",
    "build:wasm": "cd ../../ && cargo xtask build-wasm",
    "build:wasm:debug": "cd ../../ && cargo xtask build-wasm --debug",
    "build": "npm run build:wasm && npm run build:ts",
    "build:debug": "npm run build:wasm:debug && npm run build:ts -- --debug",
    "build:dts": "node script/generate-dts.js",
    "lint": "eslint src/*.ts script/*.ts test/*.ts",
    "lint:fix": "eslint src/*.ts script/*.ts test/*.ts --fix",
    "test": "vitest run",
    "test:watch": "vitest",
    "prepack": "cp ../../LICENSE .",
    "prepublishOnly": "tsx script/check-artifacts-fresh.ts"
  }
}



================================================
FILE: lib/binding_web/tsconfig.json
================================================
{
  "compilerOptions": {
    "target": "es2022",
    "module": "es2022",
    "lib": [
      "es2022",
      "dom"
    ],
    "declaration": true,
    "declarationMap": true,
    "sourceMap": true,
    "rootDir": "./",
    "outDir": "./dist",
    "strict": true,
    "noImplicitAny": true,
    "strictNullChecks": true,
    "strictFunctionTypes": true,
    "strictPropertyInitialization": true,
    "noImplicitThis": true,
    "alwaysStrict": true,
    "noUnusedLocals": true,
    "noUnusedParameters": true,
    "noImplicitReturns": true,
    "moduleResolution": "node",
    "esModuleInterop": true,
    "forceConsistentCasingInFileNames": true,
    "skipLibCheck": true,
  },
  "include": [
    "src/**/*",
    "script/**/*",
    "test/**/*",
  ],
  "exclude": [
    "node_modules",
    "dist",
  ]
}



================================================
FILE: lib/binding_web/vitest.config.ts
================================================
import { defineConfig } from 'vitest/config'

export default defineConfig({
  test: {
    globals: true,
    environment: 'node',
    coverage: {
      include: [
        'web-tree-sitter.js',
      ],
      exclude: [
        'test/**',
        'dist/**',
        'lib/**',
        'wasm/**'
      ],
    },
  }
})



================================================
FILE: lib/binding_web/web-tree-sitter.d.cts
================================================
declare module 'web-tree-sitter' {
	/**
	 * A position in a multi-line text document, in terms of rows and columns.
	 *
	 * Rows and columns are zero-based.
	 */
	export interface Point {
		/** The zero-based row number. */
		row: number;
		/** The zero-based column number. */
		column: number;
	}
	/**
	 *  A range of positions in a multi-line text document, both in terms of bytes
	 *  and of rows and columns.
	 */
	export interface Range {
		/** The start position of the range. */
		startPosition: Point;
		/** The end position of the range. */
		endPosition: Point;
		/** The start index of the range. */
		startIndex: number;
		/** The end index of the range. */
		endIndex: number;
	}
	/**
	 * A summary of a change to a text document.
	 */
	export interface Edit {
		/** The start position of the change. */
		startPosition: Point;
		/** The end position of the change before the edit. */
		oldEndPosition: Point;
		/** The end position of the change after the edit. */
		newEndPosition: Point;
		/** The start index of the change. */
		startIndex: number;
		/** The end index of the change before the edit. */
		oldEndIndex: number;
		/** The end index of the change after the edit. */
		newEndIndex: number;
	}
	/**
	 * A callback for parsing that takes an index and point, and should return a string.
	 */
	export type ParseCallback = (index: number, position: Point) => string | undefined;
	/**
	 * A callback that receives the parse state during parsing.
	 */
	export type ProgressCallback = (progress: ParseState) => boolean;
	/**
	 * A callback for logging messages.
	 *
	 * If `isLex` is `true`, the message is from the lexer, otherwise it's from the parser.
	 */
	export type LogCallback = (message: string, isLex: boolean) => void;
	/**
	 * Options for parsing
	 *
	 * The `includedRanges` property is an array of {@link Range} objects that
	 * represent the ranges of text that the parser should include when parsing.
	 *
	 * The `progressCallback` property is a function that is called periodically
	 * during parsing to check whether parsing should be cancelled.
	 *
	 * See {@link Parser#parse} for more information.
	 */
	export interface ParseOptions {
		/**
		 * An array of {@link Range} objects that
		 * represent the ranges of text that the parser should include when parsing.
		 *
		 * This sets the ranges of text that the parser should include when parsing.
		 * By default, the parser will always include entire documents. This
		 * function allows you to parse only a *portion* of a document but
		 * still return a syntax tree whose ranges match up with the document
		 * as a whole. You can also pass multiple disjoint ranges.
		 * If `ranges` is empty, then the entire document will be parsed.
		 * Otherwise, the given ranges must be ordered from earliest to latest
		 * in the document, and they must not overlap. That is, the following
		 * must hold for all `i` < `length - 1`:
		 * ```text
		 *     ranges[i].end_byte <= ranges[i + 1].start_byte
		 * ```
		 */
		includedRanges?: Range[];
		/**
		 * A function that is called periodically during parsing to check
		 * whether parsing should be cancelled. If the progress callback returns
		 * `true`, then parsing will be cancelled. You can also use this to instrument
		 * parsing and check where the parser is at in the document. The progress callback
		 * takes a single argument, which is a {@link ParseState} representing the current
		 * state of the parser.
		 */
		progressCallback?: (state: ParseState) => void;
	}
	/**
	 * A stateful object that is passed into the progress callback {@link ParseOptions#progressCallback}
	 * to provide the current state of the parser.
	 */
	export interface ParseState {
		/** The byte offset in the document that the parser is at. */
		currentOffset: number;
		/** Indicates whether the parser has encountered an error during parsing. */
		hasError: boolean;
	}
	/**
	 * The latest ABI version that is supported by the current version of the
	 * library.
	 *
	 * When Languages are generated by the Tree-sitter CLI, they are
	 * assigned an ABI version number that corresponds to the current CLI version.
	 * The Tree-sitter library is generally backwards-compatible with languages
	 * generated using older CLI versions, but is not forwards-compatible.
	 */
	export let LANGUAGE_VERSION: number;
	/**
	 * The earliest ABI version that is supported by the current version of the
	 * library.
	 */
	export let MIN_COMPATIBLE_VERSION: number;
	/**
	 * A stateful object that is used to produce a {@link Tree} based on some
	 * source code.
	 */
	export class Parser {
		/** The parser's current language. */
		language: Language | null;
		/**
		 * This must always be called before creating a Parser.
		 *
		 * You can optionally pass in options to configure the WASM module, the most common
		 * one being `locateFile` to help the module find the `.wasm` file.
		 */
		static init(moduleOptions?: EmscriptenModule): Promise<void>;
		/**
		 * Create a new parser.
		 */
		constructor();
		/** Delete the parser, freeing its resources. */
		delete(): void;
		/**
		 * Set the language that the parser should use for parsing.
		 *
		 * If the language was not successfully assigned, an error will be thrown.
		 * This happens if the language was generated with an incompatible
		 * version of the Tree-sitter CLI. Check the language's version using
		 * {@link Language#version} and compare it to this library's
		 * {@link LANGUAGE_VERSION} and {@link MIN_COMPATIBLE_VERSION} constants.
		 */
		setLanguage(language: Language | null): this;
		/**
		 * Parse a slice of UTF8 text.
		 *
		 * @param callback - The UTF8-encoded text to parse or a callback function.
		 *
		 * @param oldTree - A previous syntax tree parsed from the same document. If the text of the
		 *   document has changed since `oldTree` was created, then you must edit `oldTree` to match
		 *   the new text using {@link Tree#edit}.
		 *
		 * @param options - Options for parsing the text.
		 *  This can be used to set the included ranges, or a progress callback.
		 *
		 * @returns A {@link Tree} if parsing succeeded, or `null` if:
		 *  - The parser has not yet had a language assigned with {@link Parser#setLanguage}.
		 *  - The progress callback returned true.
		 */
		parse(callback: string | ParseCallback, oldTree?: Tree | null, options?: ParseOptions): Tree | null;
		/**
		 * Instruct the parser to start the next parse from the beginning.
		 *
		 * If the parser previously failed because of a timeout, cancellation,
		 * or callback, then by default, it will resume where it left off on the
		 * next call to {@link Parser#parse} or other parsing functions.
		 * If you don't want to resume, and instead intend to use this parser to
		 * parse some other document, you must call `reset` first.
		 */
		reset(): void;
		/** Get the ranges of text that the parser will include when parsing. */
		getIncludedRanges(): Range[];
		/**
		 * @deprecated since version 0.25.0, prefer passing a progress callback to {@link Parser#parse}
		 *
		 * Get the duration in microseconds that parsing is allowed to take.
		 *
		 * This is set via {@link Parser#setTimeoutMicros}.
		 */
		getTimeoutMicros(): number;
		/**
		 * @deprecated since version 0.25.0, prefer passing a progress callback to {@link Parser#parse}
		 *
		 * Set the maximum duration in microseconds that parsing should be allowed
		 * to take before halting.
		 *
		 * If parsing takes longer than this, it will halt early, returning `null`.
		 * See {@link Parser#parse} for more information.
		 */
		setTimeoutMicros(timeout: number): void;
		/** Set the logging callback that a parser should use during parsing. */
		setLogger(callback: LogCallback | boolean | null): this;
		/** Get the parser's current logger. */
		getLogger(): LogCallback | null;
	}
	class LanguageMetadata {
		readonly major_version: number;
		readonly minor_version: number;
		readonly patch_version: number;
	}
	/**
	 * An opaque object that defines how to parse a particular language.
	 * The code for each `Language` is generated by the Tree-sitter CLI.
	 */
	export class Language {
		/**
		 * A list of all node types in the language. The index of each type in this
		 * array is its node type id.
		 */
		types: string[];
		/**
		 * A list of all field names in the language. The index of each field name in
		 * this array is its field id.
		 */
		fields: (string | null)[];
		/**
		 * Gets the name of the language.
		 */
		get name(): string | null;
		/**
		 * @deprecated since version 0.25.0, use {@link Language#abiVersion} instead
		 * Gets the version of the language.
		 */
		get version(): number;
		/**
		 * Gets the ABI version of the language.
		 */
		get abiVersion(): number;
		/**
		* Get the metadata for this language. This information is generated by the
		* CLI, and relies on the language author providing the correct metadata in
		* the language's `tree-sitter.json` file.
		*/
		get metadata(): LanguageMetadata | null;
		/**
		 * Gets the number of fields in the language.
		 */
		get fieldCount(): number;
		/**
		 * Gets the number of states in the language.
		 */
		get stateCount(): number;
		/**
		 * Get the field id for a field name.
		 */
		fieldIdForName(fieldName: string): number | null;
		/**
		 * Get the field name for a field id.
		 */
		fieldNameForId(fieldId: number): string | null;
		/**
		 * Get the node type id for a node type name.
		 */
		idForNodeType(type: string, named: boolean): number | null;
		/**
		 * Gets the number of node types in the language.
		 */
		get nodeTypeCount(): number;
		/**
		 * Get the node type name for a node type id.
		 */
		nodeTypeForId(typeId: number): string | null;
		/**
		 * Check if a node type is named.
		 *
		 * @see {@link https://tree-sitter.github.io/tree-sitter/using-parsers/2-basic-parsing.html#named-vs-anonymous-nodes}
		 */
		nodeTypeIsNamed(typeId: number): boolean;
		/**
		 * Check if a node type is visible.
		 */
		nodeTypeIsVisible(typeId: number): boolean;
		/**
		 * Get the supertypes ids of this language.
		 *
		 * @see {@link https://tree-sitter.github.io/tree-sitter/using-parsers/6-static-node-types.html?highlight=supertype#supertype-nodes}
		 */
		get supertypes(): number[];
		/**
		 * Get the subtype ids for a given supertype node id.
		 */
		subtypes(supertype: number): number[];
		/**
		 * Get the next state id for a given state id and node type id.
		 */
		nextState(stateId: number, typeId: number): number;
		/**
		 * Create a new lookahead iterator for this language and parse state.
		 *
		 * This returns `null` if state is invalid for this language.
		 *
		 * Iterating {@link LookaheadIterator} will yield valid symbols in the given
		 * parse state. Newly created lookahead iterators will return the `ERROR`
		 * symbol from {@link LookaheadIterator#currentType}.
		 *
		 * Lookahead iterators can be useful for generating suggestions and improving
		 * syntax error diagnostics. To get symbols valid in an `ERROR` node, use the
		 * lookahead iterator on its first leaf node state. For `MISSING` nodes, a
		 * lookahead iterator created on the previous non-extra leaf node may be
		 * appropriate.
		 */
		lookaheadIterator(stateId: number): LookaheadIterator | null;
		/**
		 * @deprecated since version 0.25.0, call `new` on a {@link Query} instead
		 *
		 * Create a new query from a string containing one or more S-expression
		 * patterns.
		 *
		 * The query is associated with a particular language, and can only be run
		 * on syntax nodes parsed with that language. References to Queries can be
		 * shared between multiple threads.
		 *
		 * @link {@see https://tree-sitter.github.io/tree-sitter/using-parsers/queries}
		 */
		query(source: string): Query;
		/**
		 * Load a language from a WebAssembly module.
		 * The module can be provided as a path to a file or as a buffer.
		 */
		static load(input: string | Uint8Array): Promise<Language>;
	}
	/** A tree that represents the syntactic structure of a source code file. */
	export class Tree {
		/** The language that was used to parse the syntax tree. */
		language: Language;
		/** Create a shallow copy of the syntax tree. This is very fast. */
		copy(): Tree;
		/** Delete the syntax tree, freeing its resources. */
		delete(): void;
		/** Get the root node of the syntax tree. */
		get rootNode(): Node;
		/**
		 * Get the root node of the syntax tree, but with its position shifted
		 * forward by the given offset.
		 */
		rootNodeWithOffset(offsetBytes: number, offsetExtent: Point): Node;
		/**
		 * Edit the syntax tree to keep it in sync with source code that has been
		 * edited.
		 *
		 * You must describe the edit both in terms of byte offsets and in terms of
		 * row/column coordinates.
		 */
		edit(edit: Edit): void;
		/** Create a new {@link TreeCursor} starting from the root of the tree. */
		walk(): TreeCursor;
		/**
		 * Compare this old edited syntax tree to a new syntax tree representing
		 * the same document, returning a sequence of ranges whose syntactic
		 * structure has changed.
		 *
		 * For this to work correctly, this syntax tree must have been edited such
		 * that its ranges match up to the new tree. Generally, you'll want to
		 * call this method right after calling one of the [`Parser::parse`]
		 * functions. Call it on the old tree that was passed to parse, and
		 * pass the new tree that was returned from `parse`.
		 */
		getChangedRanges(other: Tree): Range[];
		/** Get the included ranges that were used to parse the syntax tree. */
		getIncludedRanges(): Range[];
	}
	/** A single node within a syntax {@link Tree}. */
	export class Node {
		/**
		 * The numeric id for this node that is unique.
		 *
		 * Within a given syntax tree, no two nodes have the same id. However:
		 *
		 * * If a new tree is created based on an older tree, and a node from the old tree is reused in
		 *   the process, then that node will have the same id in both trees.
		 *
		 * * A node not marked as having changes does not guarantee it was reused.
		 *
		 * * If a node is marked as having changed in the old tree, it will not be reused.
		 */
		id: number;
		/** The byte index where this node starts. */
		startIndex: number;
		/** The position where this node starts. */
		startPosition: Point;
		/** The tree that this node belongs to. */
		tree: Tree;
		/** Get this node's type as a numerical id. */
		get typeId(): number;
		/**
		 * Get the node's type as a numerical id as it appears in the grammar,
		 * ignoring aliases.
		 */
		get grammarId(): number;
		/** Get this node's type as a string. */
		get type(): string;
		/**
		 * Get this node's symbol name as it appears in the grammar, ignoring
		 * aliases as a string.
		 */
		get grammarType(): string;
		/**
		 * Check if this node is *named*.
		 *
		 * Named nodes correspond to named rules in the grammar, whereas
		 * *anonymous* nodes correspond to string literals in the grammar.
		 */
		get isNamed(): boolean;
		/**
		 * Check if this node is *extra*.
		 *
		 * Extra nodes represent things like comments, which are not required
		 * by the grammar, but can appear anywhere.
		 */
		get isExtra(): boolean;
		/**
		 * Check if this node represents a syntax error.
		 *
		 * Syntax errors represent parts of the code that could not be incorporated
		 * into a valid syntax tree.
		 */
		get isError(): boolean;
		/**
		 * Check if this node is *missing*.
		 *
		 * Missing nodes are inserted by the parser in order to recover from
		 * certain kinds of syntax errors.
		 */
		get isMissing(): boolean;
		/** Check if this node has been edited. */
		get hasChanges(): boolean;
		/**
		 * Check if this node represents a syntax error or contains any syntax
		 * errors anywhere within it.
		 */
		get hasError(): boolean;
		/** Get the byte index where this node ends. */
		get endIndex(): number;
		/** Get the position where this node ends. */
		get endPosition(): Point;
		/** Get the string content of this node. */
		get text(): string;
		/** Get this node's parse state. */
		get parseState(): number;
		/** Get the parse state after this node. */
		get nextParseState(): number;
		/** Check if this node is equal to another node. */
		equals(other: Node): boolean;
		/**
		 * Get the node's child at the given index, where zero represents the first child.
		 *
		 * This method is fairly fast, but its cost is technically log(n), so if
		 * you might be iterating over a long list of children, you should use
		 * {@link Node#children} instead.
		 */
		child(index: number): Node | null;
		/**
		 * Get this node's *named* child at the given index.
		 *
		 * See also {@link Node#isNamed}.
		 * This method is fairly fast, but its cost is technically log(n), so if
		 * you might be iterating over a long list of children, you should use
		 * {@link Node#namedChildren} instead.
		 */
		namedChild(index: number): Node | null;
		/**
		 * Get this node's child with the given numerical field id.
		 *
		 * See also {@link Node#childForFieldName}. You can
		 * convert a field name to an id using {@link Language#fieldIdForName}.
		 */
		childForFieldId(fieldId: number): Node | null;
		/**
		 * Get the first child with the given field name.
		 *
		 * If multiple children may have the same field name, access them using
		 * {@link Node#childrenForFieldName}.
		 */
		childForFieldName(fieldName: string): Node | null;
		/** Get the field name of this node's child at the given index. */
		fieldNameForChild(index: number): string | null;
		/** Get the field name of this node's named child at the given index. */
		fieldNameForNamedChild(index: number): string | null;
		/**
		 * Get an array of this node's children with a given field name.
		 *
		 * See also {@link Node#children}.
		 */
		childrenForFieldName(fieldName: string): (Node | null)[];
		/**
		  * Get an array of this node's children with a given field id.
		  *
		  * See also {@link Node#childrenForFieldName}.
		  */
		childrenForFieldId(fieldId: number): (Node | null)[];
		/** Get the node's first child that contains or starts after the given byte offset. */
		firstChildForIndex(index: number): Node | null;
		/** Get the node's first named child that contains or starts after the given byte offset. */
		firstNamedChildForIndex(index: number): Node | null;
		/** Get this node's number of children. */
		get childCount(): number;
		/**
		 * Get this node's number of *named* children.
		 *
		 * See also {@link Node#isNamed}.
		 */
		get namedChildCount(): number;
		/** Get this node's first child. */
		get firstChild(): Node | null;
		/**
		 * Get this node's first named child.
		 *
		 * See also {@link Node#isNamed}.
		 */
		get firstNamedChild(): Node | null;
		/** Get this node's last child. */
		get lastChild(): Node | null;
		/**
		 * Get this node's last named child.
		 *
		 * See also {@link Node#isNamed}.
		 */
		get lastNamedChild(): Node | null;
		/**
		 * Iterate over this node's children.
		 *
		 * If you're walking the tree recursively, you may want to use the
		 * {@link TreeCursor} APIs directly instead.
		 */
		get children(): (Node | null)[];
		/**
		 * Iterate over this node's named children.
		 *
		 * See also {@link Node#children}.
		 */
		get namedChildren(): (Node | null)[];
		/**
		 * Get the descendants of this node that are the given type, or in the given types array.
		 *
		 * The types array should contain node type strings, which can be retrieved from {@link Language#types}.
		 *
		 * Additionally, a `startPosition` and `endPosition` can be passed in to restrict the search to a byte range.
		 */
		descendantsOfType(types: string | string[], startPosition?: Point, endPosition?: Point): (Node | null)[];
		/** Get this node's next sibling. */
		get nextSibling(): Node | null;
		/** Get this node's previous sibling. */
		get previousSibling(): Node | null;
		/**
		 * Get this node's next *named* sibling.
		 *
		 * See also {@link Node#isNamed}.
		 */
		get nextNamedSibling(): Node | null;
		/**
		 * Get this node's previous *named* sibling.
		 *
		 * See also {@link Node#isNamed}.
		 */
		get previousNamedSibling(): Node | null;
		/** Get the node's number of descendants, including one for the node itself. */
		get descendantCount(): number;
		/**
		 * Get this node's immediate parent.
		 * Prefer {@link Node#childWithDescendant} for iterating over this node's ancestors.
		 */
		get parent(): Node | null;
		/**
		 * Get the node that contains `descendant`.
		 *
		 * Note that this can return `descendant` itself.
		 */
		childWithDescendant(descendant: Node): Node | null;
		/** Get the smallest node within this node that spans the given byte range. */
		descendantForIndex(start: number, end?: number): Node | null;
		/** Get the smallest named node within this node that spans the given byte range. */
		namedDescendantForIndex(start: number, end?: number): Node | null;
		/** Get the smallest node within this node that spans the given point range. */
		descendantForPosition(start: Point, end?: Point): Node | null;
		/** Get the smallest named node within this node that spans the given point range. */
		namedDescendantForPosition(start: Point, end?: Point): Node | null;
		/**
		 * Create a new {@link TreeCursor} starting from this node.
		 *
		 * Note that the given node is considered the root of the cursor,
		 * and the cursor cannot walk outside this node.
		 */
		walk(): TreeCursor;
		/**
		 * Edit this node to keep it in-sync with source code that has been edited.
		 *
		 * This function is only rarely needed. When you edit a syntax tree with
		 * the {@link Tree#edit} method, all of the nodes that you retrieve from
		 * the tree afterward will already reflect the edit. You only need to
		 * use {@link Node#edit} when you have a specific {@link Node} instance that
		 * you want to keep and continue to use after an edit.
		 */
		edit(edit: Edit): void;
		/** Get the S-expression representation of this node. */
		toString(): string;
	}
	/** A stateful object for walking a syntax {@link Tree} efficiently. */
	export class TreeCursor {
		/** Creates a deep copy of the tree cursor. This allocates new memory. */
		copy(): TreeCursor;
		/** Delete the tree cursor, freeing its resources. */
		delete(): void;
		/** Get the tree cursor's current {@link Node}. */
		get currentNode(): Node;
		/**
		 * Get the numerical field id of this tree cursor's current node.
		 *
		 * See also {@link TreeCursor#currentFieldName}.
		 */
		get currentFieldId(): number;
		/** Get the field name of this tree cursor's current node. */
		get currentFieldName(): string | null;
		/**
		 * Get the depth of the cursor's current node relative to the original
		 * node that the cursor was constructed with.
		 */
		get currentDepth(): number;
		/**
		 * Get the index of the cursor's current node out of all of the
		 * descendants of the original node that the cursor was constructed with.
		 */
		get currentDescendantIndex(): number;
		/** Get the type of the cursor's current node. */
		get nodeType(): string;
		/** Get the type id of the cursor's current node. */
		get nodeTypeId(): number;
		/** Get the state id of the cursor's current node. */
		get nodeStateId(): number;
		/** Get the id of the cursor's current node. */
		get nodeId(): number;
		/**
		 * Check if the cursor's current node is *named*.
		 *
		 * Named nodes correspond to named rules in the grammar, whereas
		 * *anonymous* nodes correspond to string literals in the grammar.
		 */
		get nodeIsNamed(): boolean;
		/**
		 * Check if the cursor's current node is *missing*.
		 *
		 * Missing nodes are inserted by the parser in order to recover from
		 * certain kinds of syntax errors.
		 */
		get nodeIsMissing(): boolean;
		/** Get the string content of the cursor's current node. */
		get nodeText(): string;
		/** Get the start position of the cursor's current node. */
		get startPosition(): Point;
		/** Get the end position of the cursor's current node. */
		get endPosition(): Point;
		/** Get the start index of the cursor's current node. */
		get startIndex(): number;
		/** Get the end index of the cursor's current node. */
		get endIndex(): number;
		/**
		 * Move this cursor to the first child of its current node.
		 *
		 * This returns `true` if the cursor successfully moved, and returns
		 * `false` if there were no children.
		 */
		gotoFirstChild(): boolean;
		/**
		 * Move this cursor to the last child of its current node.
		 *
		 * This returns `true` if the cursor successfully moved, and returns
		 * `false` if there were no children.
		 *
		 * Note that this function may be slower than
		 * {@link TreeCursor#gotoFirstChild} because it needs to
		 * iterate through all the children to compute the child's position.
		 */
		gotoLastChild(): boolean;
		/**
		 * Move this cursor to the parent of its current node.
		 *
		 * This returns `true` if the cursor successfully moved, and returns
		 * `false` if there was no parent node (the cursor was already on the
		 * root node).
		 *
		 * Note that the node the cursor was constructed with is considered the root
		 * of the cursor, and the cursor cannot walk outside this node.
		 */
		gotoParent(): boolean;
		/**
		 * Move this cursor to the next sibling of its current node.
		 *
		 * This returns `true` if the cursor successfully moved, and returns
		 * `false` if there was no next sibling node.
		 *
		 * Note that the node the cursor was constructed with is considered the root
		 * of the cursor, and the cursor cannot walk outside this node.
		 */
		gotoNextSibling(): boolean;
		/**
		 * Move this cursor to the previous sibling of its current node.
		 *
		 * This returns `true` if the cursor successfully moved, and returns
		 * `false` if there was no previous sibling node.
		 *
		 * Note that this function may be slower than
		 * {@link TreeCursor#gotoNextSibling} due to how node
		 * positions are stored. In the worst case, this will need to iterate
		 * through all the children up to the previous sibling node to recalculate
		 * its position. Also note that the node the cursor was constructed with is
		 * considered the root of the cursor, and the cursor cannot walk outside this node.
		 */
		gotoPreviousSibling(): boolean;
		/**
		 * Move the cursor to the node that is the nth descendant of
		 * the original node that the cursor was constructed with, where
		 * zero represents the original node itself.
		 */
		gotoDescendant(goalDescendantIndex: number): void;
		/**
		 * Move this cursor to the first child of its current node that contains or
		 * starts after the given byte offset.
		 *
		 * This returns `true` if the cursor successfully moved to a child node, and returns
		 * `false` if no such child was found.
		 */
		gotoFirstChildForIndex(goalIndex: number): boolean;
		/**
		 * Move this cursor to the first child of its current node that contains or
		 * starts after the given byte offset.
		 *
		 * This returns the index of the child node if one was found, and returns
		 * `null` if no such child was found.
		 */
		gotoFirstChildForPosition(goalPosition: Point): boolean;
		/**
		 * Re-initialize this tree cursor to start at the original node that the
		 * cursor was constructed with.
		 */
		reset(node: Node): void;
		/**
		 * Re-initialize a tree cursor to the same position as another cursor.
		 *
		 * Unlike {@link TreeCursor#reset}, this will not lose parent
		 * information and allows reusing already created cursors.
		 */
		resetTo(cursor: TreeCursor): void;
	}
	/**
	 * Options for query execution
	 */
	export interface QueryOptions {
		/** The start position of the range to query */
		startPosition?: Point;
		/** The end position of the range to query */
		endPosition?: Point;
		/** The start index of the range to query */
		startIndex?: number;
		/** The end index of the range to query */
		endIndex?: number;
		/**
		 * The maximum number of in-progress matches for this query.
		 * The limit must be > 0 and <= 65536.
		 */
		matchLimit?: number;
		/**
		 * The maximum start depth for a query cursor.
		 *
		 * This prevents cursors from exploring children nodes at a certain depth.
		 * Note if a pattern includes many children, then they will still be
		 * checked.
		 *
		 * The zero max start depth value can be used as a special behavior and
		 * it helps to destructure a subtree by staying on a node and using
		 * captures for interested parts. Note that the zero max start depth
		 * only limit a search depth for a pattern's root node but other nodes
		 * that are parts of the pattern may be searched at any depth what
		 * defined by the pattern structure.
		 *
		 * Set to `null` to remove the maximum start depth.
		 */
		maxStartDepth?: number;
		/**
		 * The maximum duration in microseconds that query execution should be allowed to
		 * take before halting.
		 *
		 * If query execution takes longer than this, it will halt early, returning an empty array.
		 */
		timeoutMicros?: number;
		/**
		 * A function that will be called periodically during the execution of the query to check
		 * if query execution should be cancelled. You can also use this to instrument query execution
		 * and check where the query is at in the document. The progress callback takes a single argument,
		 * which is a {@link QueryState} representing the current state of the query.
		 */
		progressCallback?: (state: QueryState) => void;
	}
	/**
	 * A stateful object that is passed into the progress callback {@link QueryOptions#progressCallback}
	 * to provide the current state of the query.
	 */
	export interface QueryState {
		/** The byte offset in the document that the query is at. */
		currentOffset: number;
	}
	/** A record of key-value pairs associated with a particular pattern in a {@link Query}. */
	export type QueryProperties = Record<string, string | null>;
	/**
	 * A predicate that contains an operator and list of operands.
	 */
	export interface QueryPredicate {
		/** The operator of the predicate, like `match?`, `eq?`, `set!`, etc. */
		operator: string;
		/** The operands of the predicate, which are either captures or strings. */
		operands: PredicateStep[];
	}
	/**
	 * A particular {@link Node} that has been captured with a particular name within a
	 * {@link Query}.
	 */
	export interface QueryCapture {
		/** The index of the pattern that matched. */
		patternIndex: number;
		/** The name of the capture */
		name: string;
		/** The captured node */
		node: Node;
		/** The properties for predicates declared with the operator `set!`. */
		setProperties?: QueryProperties;
		/** The properties for predicates declared with the operator `is?`. */
		assertedProperties?: QueryProperties;
		/** The properties for predicates declared with the operator `is-not?`. */
		refutedProperties?: QueryProperties;
	}
	/** A match of a {@link Query} to a particular set of {@link Node}s. */
	export interface QueryMatch {
		/** @deprecated since version 0.25.0, use `patternIndex` instead. */
		pattern: number;
		/** The index of the pattern that matched. */
		patternIndex: number;
		/** The captures associated with the match. */
		captures: QueryCapture[];
		/** The properties for predicates declared with the operator `set!`. */
		setProperties?: QueryProperties;
		/** The properties for predicates declared with the operator `is?`. */
		assertedProperties?: QueryProperties;
		/** The properties for predicates declared with the operator `is-not?`. */
		refutedProperties?: QueryProperties;
	}
	/** A quantifier for captures */
	export const CaptureQuantifier: {
		readonly Zero: 0;
		readonly ZeroOrOne: 1;
		readonly ZeroOrMore: 2;
		readonly One: 3;
		readonly OneOrMore: 4;
	};
	/** A quantifier for captures */
	export type CaptureQuantifier = typeof CaptureQuantifier[keyof typeof CaptureQuantifier];
	/**
	 * Predicates are represented as a single array of steps. There are two
	 * types of steps, which correspond to the two legal values for
	 * the `type` field:
	 *
	 * - `CapturePredicateStep` - Steps with this type represent names
	 *    of captures.
	 *
	 * - `StringPredicateStep` - Steps with this type represent literal
	 *    strings.
	 */
	export type PredicateStep = CapturePredicateStep | StringPredicateStep;
	/**
	 * A step in a predicate that refers to a capture.
	 *
	 * The `name` field is the name of the capture.
	 */
	interface CapturePredicateStep {
		type: 'capture';
		name: string;
	}
	/**
	 * A step in a predicate that refers to a string.
	 *
	 * The `value` field is the string value.
	 */
	interface StringPredicateStep {
		type: 'string';
		value: string;
	}
	export class Query {
		/** The names of the captures used in the query. */
		readonly captureNames: string[];
		/** The quantifiers of the captures used in the query. */
		readonly captureQuantifiers: CaptureQuantifier[][];
		/**
		 * The other user-defined predicates associated with the given index.
		 *
		 * This includes predicates with operators other than:
		 * - `match?`
		 * - `eq?` and `not-eq?`
		 * - `any-of?` and `not-any-of?`
		 * - `is?` and `is-not?`
		 * - `set!`
		 */
		readonly predicates: QueryPredicate[][];
		/** The properties for predicates with the operator `set!`. */
		readonly setProperties: QueryProperties[];
		/** The properties for predicates with the operator `is?`. */
		readonly assertedProperties: QueryProperties[];
		/** The properties for predicates with the operator `is-not?`. */
		readonly refutedProperties: QueryProperties[];
		/** The maximum number of in-progress matches for this cursor. */
		matchLimit?: number;
		/**
		 * Create a new query from a string containing one or more S-expression
		 * patterns.
		 *
		 * The query is associated with a particular language, and can only be run
		 * on syntax nodes parsed with that language. References to Queries can be
		 * shared between multiple threads.
		 *
		 * @link {@see https://tree-sitter.github.io/tree-sitter/using-parsers/queries}
		 */
		constructor(language: Language, source: string);
		/** Delete the query, freeing its resources. */
		delete(): void;
		/**
		 * Iterate over all of the matches in the order that they were found.
		 *
		 * Each match contains the index of the pattern that matched, and a list of
		 * captures. Because multiple patterns can match the same set of nodes,
		 * one match may contain captures that appear *before* some of the
		 * captures from a previous match.
		 *
		 * @param node - The node to execute the query on.
		 *
		 * @param options - Options for query execution.
		 */
		matches(node: Node, options?: QueryOptions): QueryMatch[];
		/**
		 * Iterate over all of the individual captures in the order that they
		 * appear.
		 *
		 * This is useful if you don't care about which pattern matched, and just
		 * want a single, ordered sequence of captures.
		 *
		 * @param node - The node to execute the query on.
		 *
		 * @param options - Options for query execution.
		 */
		captures(node: Node, options?: QueryOptions): QueryCapture[];
		/** Get the predicates for a given pattern. */
		predicatesForPattern(patternIndex: number): QueryPredicate[];
		/**
		 * Disable a certain capture within a query.
		 *
		 * This prevents the capture from being returned in matches, and also
		 * avoids any resource usage associated with recording the capture.
		 */
		disableCapture(captureName: string): void;
		/**
		 * Disable a certain pattern within a query.
		 *
		 * This prevents the pattern from matching, and also avoids any resource
		 * usage associated with the pattern. This throws an error if the pattern
		 * index is out of bounds.
		 */
		disablePattern(patternIndex: number): void;
		/**
		 * Check if, on its last execution, this cursor exceeded its maximum number
		 * of in-progress matches.
		 */
		didExceedMatchLimit(): boolean;
		/** Get the byte offset where the given pattern starts in the query's source. */
		startIndexForPattern(patternIndex: number): number;
		/** Get the byte offset where the given pattern ends in the query's source. */
		endIndexForPattern(patternIndex: number): number;
		/** Get the number of patterns in the query. */
		patternCount(): number;
		/** Get the index for a given capture name. */
		captureIndexForName(captureName: string): number;
		/** Check if a given pattern within a query has a single root node. */
		isPatternRooted(patternIndex: number): boolean;
		/** Check if a given pattern within a query has a single root node. */
		isPatternNonLocal(patternIndex: number): boolean;
		/**
		 * Check if a given step in a query is 'definite'.
		 *
		 * A query step is 'definite' if its parent pattern will be guaranteed to
		 * match successfully once it reaches the step.
		 */
		isPatternGuaranteedAtStep(byteIndex: number): boolean;
	}
	export class LookaheadIterator implements Iterable<string> {
		/** Get the current symbol of the lookahead iterator. */
		get currentTypeId(): number;
		/** Get the current symbol name of the lookahead iterator. */
		get currentType(): string;
		/** Delete the lookahead iterator, freeing its resources. */
		delete(): void;
		/**
		 * Reset the lookahead iterator.
		 *
		 * This returns `true` if the language was set successfully and `false`
		 * otherwise.
		 */
		reset(language: Language, stateId: number): boolean;
		/**
		 * Reset the lookahead iterator to another state.
		 *
		 * This returns `true` if the iterator was reset to the given state and
		 * `false` otherwise.
		 */
		resetState(stateId: number): boolean;
		/**
		 * Returns an iterator that iterates over the symbols of the lookahead iterator.
		 *
		 * The iterator will yield the current symbol name as a string for each step
		 * until there are no more symbols to iterate over.
		 */
		[Symbol.iterator](): Iterator<string>;
	}

	export {};
}

//# sourceMappingURL=web-tree-sitter.d.cts.map


================================================
FILE: lib/binding_web/web-tree-sitter.d.ts
================================================
declare module 'web-tree-sitter' {
	/**
	 * A position in a multi-line text document, in terms of rows and columns.
	 *
	 * Rows and columns are zero-based.
	 */
	export interface Point {
		/** The zero-based row number. */
		row: number;
		/** The zero-based column number. */
		column: number;
	}
	/**
	 *  A range of positions in a multi-line text document, both in terms of bytes
	 *  and of rows and columns.
	 */
	export interface Range {
		/** The start position of the range. */
		startPosition: Point;
		/** The end position of the range. */
		endPosition: Point;
		/** The start index of the range. */
		startIndex: number;
		/** The end index of the range. */
		endIndex: number;
	}
	/**
	 * A summary of a change to a text document.
	 */
	export interface Edit {
		/** The start position of the change. */
		startPosition: Point;
		/** The end position of the change before the edit. */
		oldEndPosition: Point;
		/** The end position of the change after the edit. */
		newEndPosition: Point;
		/** The start index of the change. */
		startIndex: number;
		/** The end index of the change before the edit. */
		oldEndIndex: number;
		/** The end index of the change after the edit. */
		newEndIndex: number;
	}
	/**
	 * A callback for parsing that takes an index and point, and should return a string.
	 */
	export type ParseCallback = (index: number, position: Point) => string | undefined;
	/**
	 * A callback that receives the parse state during parsing.
	 */
	export type ProgressCallback = (progress: ParseState) => boolean;
	/**
	 * A callback for logging messages.
	 *
	 * If `isLex` is `true`, the message is from the lexer, otherwise it's from the parser.
	 */
	export type LogCallback = (message: string, isLex: boolean) => void;
	/**
	 * Options for parsing
	 *
	 * The `includedRanges` property is an array of {@link Range} objects that
	 * represent the ranges of text that the parser should include when parsing.
	 *
	 * The `progressCallback` property is a function that is called periodically
	 * during parsing to check whether parsing should be cancelled.
	 *
	 * See {@link Parser#parse} for more information.
	 */
	export interface ParseOptions {
		/**
		 * An array of {@link Range} objects that
		 * represent the ranges of text that the parser should include when parsing.
		 *
		 * This sets the ranges of text that the parser should include when parsing.
		 * By default, the parser will always include entire documents. This
		 * function allows you to parse only a *portion* of a document but
		 * still return a syntax tree whose ranges match up with the document
		 * as a whole. You can also pass multiple disjoint ranges.
		 * If `ranges` is empty, then the entire document will be parsed.
		 * Otherwise, the given ranges must be ordered from earliest to latest
		 * in the document, and they must not overlap. That is, the following
		 * must hold for all `i` < `length - 1`:
		 * ```text
		 *     ranges[i].end_byte <= ranges[i + 1].start_byte
		 * ```
		 */
		includedRanges?: Range[];
		/**
		 * A function that is called periodically during parsing to check
		 * whether parsing should be cancelled. If the progress callback returns
		 * `true`, then parsing will be cancelled. You can also use this to instrument
		 * parsing and check where the parser is at in the document. The progress callback
		 * takes a single argument, which is a {@link ParseState} representing the current
		 * state of the parser.
		 */
		progressCallback?: (state: ParseState) => void;
	}
	/**
	 * A stateful object that is passed into the progress callback {@link ParseOptions#progressCallback}
	 * to provide the current state of the parser.
	 */
	export interface ParseState {
		/** The byte offset in the document that the parser is at. */
		currentOffset: number;
		/** Indicates whether the parser has encountered an error during parsing. */
		hasError: boolean;
	}
	/**
	 * The latest ABI version that is supported by the current version of the
	 * library.
	 *
	 * When Languages are generated by the Tree-sitter CLI, they are
	 * assigned an ABI version number that corresponds to the current CLI version.
	 * The Tree-sitter library is generally backwards-compatible with languages
	 * generated using older CLI versions, but is not forwards-compatible.
	 */
	export let LANGUAGE_VERSION: number;
	/**
	 * The earliest ABI version that is supported by the current version of the
	 * library.
	 */
	export let MIN_COMPATIBLE_VERSION: number;
	/**
	 * A stateful object that is used to produce a {@link Tree} based on some
	 * source code.
	 */
	export class Parser {
		/** The parser's current language. */
		language: Language | null;
		/**
		 * This must always be called before creating a Parser.
		 *
		 * You can optionally pass in options to configure the WASM module, the most common
		 * one being `locateFile` to help the module find the `.wasm` file.
		 */
		static init(moduleOptions?: EmscriptenModule): Promise<void>;
		/**
		 * Create a new parser.
		 */
		constructor();
		/** Delete the parser, freeing its resources. */
		delete(): void;
		/**
		 * Set the language that the parser should use for parsing.
		 *
		 * If the language was not successfully assigned, an error will be thrown.
		 * This happens if the language was generated with an incompatible
		 * version of the Tree-sitter CLI. Check the language's version using
		 * {@link Language#version} and compare it to this library's
		 * {@link LANGUAGE_VERSION} and {@link MIN_COMPATIBLE_VERSION} constants.
		 */
		setLanguage(language: Language | null): this;
		/**
		 * Parse a slice of UTF8 text.
		 *
		 * @param callback - The UTF8-encoded text to parse or a callback function.
		 *
		 * @param oldTree - A previous syntax tree parsed from the same document. If the text of the
		 *   document has changed since `oldTree` was created, then you must edit `oldTree` to match
		 *   the new text using {@link Tree#edit}.
		 *
		 * @param options - Options for parsing the text.
		 *  This can be used to set the included ranges, or a progress callback.
		 *
		 * @returns A {@link Tree} if parsing succeeded, or `null` if:
		 *  - The parser has not yet had a language assigned with {@link Parser#setLanguage}.
		 *  - The progress callback returned true.
		 */
		parse(callback: string | ParseCallback, oldTree?: Tree | null, options?: ParseOptions): Tree | null;
		/**
		 * Instruct the parser to start the next parse from the beginning.
		 *
		 * If the parser previously failed because of a timeout, cancellation,
		 * or callback, then by default, it will resume where it left off on the
		 * next call to {@link Parser#parse} or other parsing functions.
		 * If you don't want to resume, and instead intend to use this parser to
		 * parse some other document, you must call `reset` first.
		 */
		reset(): void;
		/** Get the ranges of text that the parser will include when parsing. */
		getIncludedRanges(): Range[];
		/**
		 * @deprecated since version 0.25.0, prefer passing a progress callback to {@link Parser#parse}
		 *
		 * Get the duration in microseconds that parsing is allowed to take.
		 *
		 * This is set via {@link Parser#setTimeoutMicros}.
		 */
		getTimeoutMicros(): number;
		/**
		 * @deprecated since version 0.25.0, prefer passing a progress callback to {@link Parser#parse}
		 *
		 * Set the maximum duration in microseconds that parsing should be allowed
		 * to take before halting.
		 *
		 * If parsing takes longer than this, it will halt early, returning `null`.
		 * See {@link Parser#parse} for more information.
		 */
		setTimeoutMicros(timeout: number): void;
		/** Set the logging callback that a parser should use during parsing. */
		setLogger(callback: LogCallback | boolean | null): this;
		/** Get the parser's current logger. */
		getLogger(): LogCallback | null;
	}
	class LanguageMetadata {
		readonly major_version: number;
		readonly minor_version: number;
		readonly patch_version: number;
	}
	/**
	 * An opaque object that defines how to parse a particular language.
	 * The code for each `Language` is generated by the Tree-sitter CLI.
	 */
	export class Language {
		/**
		 * A list of all node types in the language. The index of each type in this
		 * array is its node type id.
		 */
		types: string[];
		/**
		 * A list of all field names in the language. The index of each field name in
		 * this array is its field id.
		 */
		fields: (string | null)[];
		/**
		 * Gets the name of the language.
		 */
		get name(): string | null;
		/**
		 * @deprecated since version 0.25.0, use {@link Language#abiVersion} instead
		 * Gets the version of the language.
		 */
		get version(): number;
		/**
		 * Gets the ABI version of the language.
		 */
		get abiVersion(): number;
		/**
		* Get the metadata for this language. This information is generated by the
		* CLI, and relies on the language author providing the correct metadata in
		* the language's `tree-sitter.json` file.
		*/
		get metadata(): LanguageMetadata | null;
		/**
		 * Gets the number of fields in the language.
		 */
		get fieldCount(): number;
		/**
		 * Gets the number of states in the language.
		 */
		get stateCount(): number;
		/**
		 * Get the field id for a field name.
		 */
		fieldIdForName(fieldName: string): number | null;
		/**
		 * Get the field name for a field id.
		 */
		fieldNameForId(fieldId: number): string | null;
		/**
		 * Get the node type id for a node type name.
		 */
		idForNodeType(type: string, named: boolean): number | null;
		/**
		 * Gets the number of node types in the language.
		 */
		get nodeTypeCount(): number;
		/**
		 * Get the node type name for a node type id.
		 */
		nodeTypeForId(typeId: number): string | null;
		/**
		 * Check if a node type is named.
		 *
		 * @see {@link https://tree-sitter.github.io/tree-sitter/using-parsers/2-basic-parsing.html#named-vs-anonymous-nodes}
		 */
		nodeTypeIsNamed(typeId: number): boolean;
		/**
		 * Check if a node type is visible.
		 */
		nodeTypeIsVisible(typeId: number): boolean;
		/**
		 * Get the supertypes ids of this language.
		 *
		 * @see {@link https://tree-sitter.github.io/tree-sitter/using-parsers/6-static-node-types.html?highlight=supertype#supertype-nodes}
		 */
		get supertypes(): number[];
		/**
		 * Get the subtype ids for a given supertype node id.
		 */
		subtypes(supertype: number): number[];
		/**
		 * Get the next state id for a given state id and node type id.
		 */
		nextState(stateId: number, typeId: number): number;
		/**
		 * Create a new lookahead iterator for this language and parse state.
		 *
		 * This returns `null` if state is invalid for this language.
		 *
		 * Iterating {@link LookaheadIterator} will yield valid symbols in the given
		 * parse state. Newly created lookahead iterators will return the `ERROR`
		 * symbol from {@link LookaheadIterator#currentType}.
		 *
		 * Lookahead iterators can be useful for generating suggestions and improving
		 * syntax error diagnostics. To get symbols valid in an `ERROR` node, use the
		 * lookahead iterator on its first leaf node state. For `MISSING` nodes, a
		 * lookahead iterator created on the previous non-extra leaf node may be
		 * appropriate.
		 */
		lookaheadIterator(stateId: number): LookaheadIterator | null;
		/**
		 * @deprecated since version 0.25.0, call `new` on a {@link Query} instead
		 *
		 * Create a new query from a string containing one or more S-expression
		 * patterns.
		 *
		 * The query is associated with a particular language, and can only be run
		 * on syntax nodes parsed with that language. References to Queries can be
		 * shared between multiple threads.
		 *
		 * @link {@see https://tree-sitter.github.io/tree-sitter/using-parsers/queries}
		 */
		query(source: string): Query;
		/**
		 * Load a language from a WebAssembly module.
		 * The module can be provided as a path to a file or as a buffer.
		 */
		static load(input: string | Uint8Array): Promise<Language>;
	}
	/** A tree that represents the syntactic structure of a source code file. */
	export class Tree {
		/** The language that was used to parse the syntax tree. */
		language: Language;
		/** Create a shallow copy of the syntax tree. This is very fast. */
		copy(): Tree;
		/** Delete the syntax tree, freeing its resources. */
		delete(): void;
		/** Get the root node of the syntax tree. */
		get rootNode(): Node;
		/**
		 * Get the root node of the syntax tree, but with its position shifted
		 * forward by the given offset.
		 */
		rootNodeWithOffset(offsetBytes: number, offsetExtent: Point): Node;
		/**
		 * Edit the syntax tree to keep it in sync with source code that has been
		 * edited.
		 *
		 * You must describe the edit both in terms of byte offsets and in terms of
		 * row/column coordinates.
		 */
		edit(edit: Edit): void;
		/** Create a new {@link TreeCursor} starting from the root of the tree. */
		walk(): TreeCursor;
		/**
		 * Compare this old edited syntax tree to a new syntax tree representing
		 * the same document, returning a sequence of ranges whose syntactic
		 * structure has changed.
		 *
		 * For this to work correctly, this syntax tree must have been edited such
		 * that its ranges match up to the new tree. Generally, you'll want to
		 * call this method right after calling one of the [`Parser::parse`]
		 * functions. Call it on the old tree that was passed to parse, and
		 * pass the new tree that was returned from `parse`.
		 */
		getChangedRanges(other: Tree): Range[];
		/** Get the included ranges that were used to parse the syntax tree. */
		getIncludedRanges(): Range[];
	}
	/** A single node within a syntax {@link Tree}. */
	export class Node {
		/**
		 * The numeric id for this node that is unique.
		 *
		 * Within a given syntax tree, no two nodes have the same id. However:
		 *
		 * * If a new tree is created based on an older tree, and a node from the old tree is reused in
		 *   the process, then that node will have the same id in both trees.
		 *
		 * * A node not marked as having changes does not guarantee it was reused.
		 *
		 * * If a node is marked as having changed in the old tree, it will not be reused.
		 */
		id: number;
		/** The byte index where this node starts. */
		startIndex: number;
		/** The position where this node starts. */
		startPosition: Point;
		/** The tree that this node belongs to. */
		tree: Tree;
		/** Get this node's type as a numerical id. */
		get typeId(): number;
		/**
		 * Get the node's type as a numerical id as it appears in the grammar,
		 * ignoring aliases.
		 */
		get grammarId(): number;
		/** Get this node's type as a string. */
		get type(): string;
		/**
		 * Get this node's symbol name as it appears in the grammar, ignoring
		 * aliases as a string.
		 */
		get grammarType(): string;
		/**
		 * Check if this node is *named*.
		 *
		 * Named nodes correspond to named rules in the grammar, whereas
		 * *anonymous* nodes correspond to string literals in the grammar.
		 */
		get isNamed(): boolean;
		/**
		 * Check if this node is *extra*.
		 *
		 * Extra nodes represent things like comments, which are not required
		 * by the grammar, but can appear anywhere.
		 */
		get isExtra(): boolean;
		/**
		 * Check if this node represents a syntax error.
		 *
		 * Syntax errors represent parts of the code that could not be incorporated
		 * into a valid syntax tree.
		 */
		get isError(): boolean;
		/**
		 * Check if this node is *missing*.
		 *
		 * Missing nodes are inserted by the parser in order to recover from
		 * certain kinds of syntax errors.
		 */
		get isMissing(): boolean;
		/** Check if this node has been edited. */
		get hasChanges(): boolean;
		/**
		 * Check if this node represents a syntax error or contains any syntax
		 * errors anywhere within it.
		 */
		get hasError(): boolean;
		/** Get the byte index where this node ends. */
		get endIndex(): number;
		/** Get the position where this node ends. */
		get endPosition(): Point;
		/** Get the string content of this node. */
		get text(): string;
		/** Get this node's parse state. */
		get parseState(): number;
		/** Get the parse state after this node. */
		get nextParseState(): number;
		/** Check if this node is equal to another node. */
		equals(other: Node): boolean;
		/**
		 * Get the node's child at the given index, where zero represents the first child.
		 *
		 * This method is fairly fast, but its cost is technically log(n), so if
		 * you might be iterating over a long list of children, you should use
		 * {@link Node#children} instead.
		 */
		child(index: number): Node | null;
		/**
		 * Get this node's *named* child at the given index.
		 *
		 * See also {@link Node#isNamed}.
		 * This method is fairly fast, but its cost is technically log(n), so if
		 * you might be iterating over a long list of children, you should use
		 * {@link Node#namedChildren} instead.
		 */
		namedChild(index: number): Node | null;
		/**
		 * Get this node's child with the given numerical field id.
		 *
		 * See also {@link Node#childForFieldName}. You can
		 * convert a field name to an id using {@link Language#fieldIdForName}.
		 */
		childForFieldId(fieldId: number): Node | null;
		/**
		 * Get the first child with the given field name.
		 *
		 * If multiple children may have the same field name, access them using
		 * {@link Node#childrenForFieldName}.
		 */
		childForFieldName(fieldName: string): Node | null;
		/** Get the field name of this node's child at the given index. */
		fieldNameForChild(index: number): string | null;
		/** Get the field name of this node's named child at the given index. */
		fieldNameForNamedChild(index: number): string | null;
		/**
		 * Get an array of this node's children with a given field name.
		 *
		 * See also {@link Node#children}.
		 */
		childrenForFieldName(fieldName: string): Node[];
		/**
		  * Get an array of this node's children with a given field id.
		  *
		  * See also {@link Node#childrenForFieldName}.
		  */
		childrenForFieldId(fieldId: number): Node[];
		/** Get the node's first child that contains or starts after the given byte offset. */
		firstChildForIndex(index: number): Node | null;
		/** Get the node's first named child that contains or starts after the given byte offset. */
		firstNamedChildForIndex(index: number): Node | null;
		/** Get this node's number of children. */
		get childCount(): number;
		/**
		 * Get this node's number of *named* children.
		 *
		 * See also {@link Node#isNamed}.
		 */
		get namedChildCount(): number;
		/** Get this node's first child. */
		get firstChild(): Node | null;
		/**
		 * Get this node's first named child.
		 *
		 * See also {@link Node#isNamed}.
		 */
		get firstNamedChild(): Node | null;
		/** Get this node's last child. */
		get lastChild(): Node | null;
		/**
		 * Get this node's last named child.
		 *
		 * See also {@link Node#isNamed}.
		 */
		get lastNamedChild(): Node | null;
		/**
		 * Iterate over this node's children.
		 *
		 * If you're walking the tree recursively, you may want to use the
		 * {@link TreeCursor} APIs directly instead.
		 */
		get children(): Node[];
		/**
		 * Iterate over this node's named children.
		 *
		 * See also {@link Node#children}.
		 */
		get namedChildren(): Node[];
		/**
		 * Get the descendants of this node that are the given type, or in the given types array.
		 *
		 * The types array should contain node type strings, which can be retrieved from {@link Language#types}.
		 *
		 * Additionally, a `startPosition` and `endPosition` can be passed in to restrict the search to a byte range.
		 */
		descendantsOfType(types: string | string[], startPosition?: Point, endPosition?: Point): Node[];
		/** Get this node's next sibling. */
		get nextSibling(): Node | null;
		/** Get this node's previous sibling. */
		get previousSibling(): Node | null;
		/**
		 * Get this node's next *named* sibling.
		 *
		 * See also {@link Node#isNamed}.
		 */
		get nextNamedSibling(): Node | null;
		/**
		 * Get this node's previous *named* sibling.
		 *
		 * See also {@link Node#isNamed}.
		 */
		get previousNamedSibling(): Node | null;
		/** Get the node's number of descendants, including one for the node itself. */
		get descendantCount(): number;
		/**
		 * Get this node's immediate parent.
		 * Prefer {@link Node#childWithDescendant} for iterating over this node's ancestors.
		 */
		get parent(): Node | null;
		/**
		 * Get the node that contains `descendant`.
		 *
		 * Note that this can return `descendant` itself.
		 */
		childWithDescendant(descendant: Node): Node | null;
		/** Get the smallest node within this node that spans the given byte range. */
		descendantForIndex(start: number, end?: number): Node | null;
		/** Get the smallest named node within this node that spans the given byte range. */
		namedDescendantForIndex(start: number, end?: number): Node | null;
		/** Get the smallest node within this node that spans the given point range. */
		descendantForPosition(start: Point, end?: Point): Node | null;
		/** Get the smallest named node within this node that spans the given point range. */
		namedDescendantForPosition(start: Point, end?: Point): Node | null;
		/**
		 * Create a new {@link TreeCursor} starting from this node.
		 *
		 * Note that the given node is considered the root of the cursor,
		 * and the cursor cannot walk outside this node.
		 */
		walk(): TreeCursor;
		/**
		 * Edit this node to keep it in-sync with source code that has been edited.
		 *
		 * This function is only rarely needed. When you edit a syntax tree with
		 * the {@link Tree#edit} method, all of the nodes that you retrieve from
		 * the tree afterward will already reflect the edit. You only need to
		 * use {@link Node#edit} when you have a specific {@link Node} instance that
		 * you want to keep and continue to use after an edit.
		 */
		edit(edit: Edit): void;
		/** Get the S-expression representation of this node. */
		toString(): string;
	}
	/** A stateful object for walking a syntax {@link Tree} efficiently. */
	export class TreeCursor {
		/** Creates a deep copy of the tree cursor. This allocates new memory. */
		copy(): TreeCursor;
		/** Delete the tree cursor, freeing its resources. */
		delete(): void;
		/** Get the tree cursor's current {@link Node}. */
		get currentNode(): Node;
		/**
		 * Get the numerical field id of this tree cursor's current node.
		 *
		 * See also {@link TreeCursor#currentFieldName}.
		 */
		get currentFieldId(): number;
		/** Get the field name of this tree cursor's current node. */
		get currentFieldName(): string | null;
		/**
		 * Get the depth of the cursor's current node relative to the original
		 * node that the cursor was constructed with.
		 */
		get currentDepth(): number;
		/**
		 * Get the index of the cursor's current node out of all of the
		 * descendants of the original node that the cursor was constructed with.
		 */
		get currentDescendantIndex(): number;
		/** Get the type of the cursor's current node. */
		get nodeType(): string;
		/** Get the type id of the cursor's current node. */
		get nodeTypeId(): number;
		/** Get the state id of the cursor's current node. */
		get nodeStateId(): number;
		/** Get the id of the cursor's current node. */
		get nodeId(): number;
		/**
		 * Check if the cursor's current node is *named*.
		 *
		 * Named nodes correspond to named rules in the grammar, whereas
		 * *anonymous* nodes correspond to string literals in the grammar.
		 */
		get nodeIsNamed(): boolean;
		/**
		 * Check if the cursor's current node is *missing*.
		 *
		 * Missing nodes are inserted by the parser in order to recover from
		 * certain kinds of syntax errors.
		 */
		get nodeIsMissing(): boolean;
		/** Get the string content of the cursor's current node. */
		get nodeText(): string;
		/** Get the start position of the cursor's current node. */
		get startPosition(): Point;
		/** Get the end position of the cursor's current node. */
		get endPosition(): Point;
		/** Get the start index of the cursor's current node. */
		get startIndex(): number;
		/** Get the end index of the cursor's current node. */
		get endIndex(): number;
		/**
		 * Move this cursor to the first child of its current node.
		 *
		 * This returns `true` if the cursor successfully moved, and returns
		 * `false` if there were no children.
		 */
		gotoFirstChild(): boolean;
		/**
		 * Move this cursor to the last child of its current node.
		 *
		 * This returns `true` if the cursor successfully moved, and returns
		 * `false` if there were no children.
		 *
		 * Note that this function may be slower than
		 * {@link TreeCursor#gotoFirstChild} because it needs to
		 * iterate through all the children to compute the child's position.
		 */
		gotoLastChild(): boolean;
		/**
		 * Move this cursor to the parent of its current node.
		 *
		 * This returns `true` if the cursor successfully moved, and returns
		 * `false` if there was no parent node (the cursor was already on the
		 * root node).
		 *
		 * Note that the node the cursor was constructed with is considered the root
		 * of the cursor, and the cursor cannot walk outside this node.
		 */
		gotoParent(): boolean;
		/**
		 * Move this cursor to the next sibling of its current node.
		 *
		 * This returns `true` if the cursor successfully moved, and returns
		 * `false` if there was no next sibling node.
		 *
		 * Note that the node the cursor was constructed with is considered the root
		 * of the cursor, and the cursor cannot walk outside this node.
		 */
		gotoNextSibling(): boolean;
		/**
		 * Move this cursor to the previous sibling of its current node.
		 *
		 * This returns `true` if the cursor successfully moved, and returns
		 * `false` if there was no previous sibling node.
		 *
		 * Note that this function may be slower than
		 * {@link TreeCursor#gotoNextSibling} due to how node
		 * positions are stored. In the worst case, this will need to iterate
		 * through all the children up to the previous sibling node to recalculate
		 * its position. Also note that the node the cursor was constructed with is
		 * considered the root of the cursor, and the cursor cannot walk outside this node.
		 */
		gotoPreviousSibling(): boolean;
		/**
		 * Move the cursor to the node that is the nth descendant of
		 * the original node that the cursor was constructed with, where
		 * zero represents the original node itself.
		 */
		gotoDescendant(goalDescendantIndex: number): void;
		/**
		 * Move this cursor to the first child of its current node that contains or
		 * starts after the given byte offset.
		 *
		 * This returns `true` if the cursor successfully moved to a child node, and returns
		 * `false` if no such child was found.
		 */
		gotoFirstChildForIndex(goalIndex: number): boolean;
		/**
		 * Move this cursor to the first child of its current node that contains or
		 * starts after the given byte offset.
		 *
		 * This returns the index of the child node if one was found, and returns
		 * `null` if no such child was found.
		 */
		gotoFirstChildForPosition(goalPosition: Point): boolean;
		/**
		 * Re-initialize this tree cursor to start at the original node that the
		 * cursor was constructed with.
		 */
		reset(node: Node): void;
		/**
		 * Re-initialize a tree cursor to the same position as another cursor.
		 *
		 * Unlike {@link TreeCursor#reset}, this will not lose parent
		 * information and allows reusing already created cursors.
		 */
		resetTo(cursor: TreeCursor): void;
	}
	/**
	 * Options for query execution
	 */
	export interface QueryOptions {
		/** The start position of the range to query */
		startPosition?: Point;
		/** The end position of the range to query */
		endPosition?: Point;
		/** The start index of the range to query */
		startIndex?: number;
		/** The end index of the range to query */
		endIndex?: number;
		/**
		 * The maximum number of in-progress matches for this query.
		 * The limit must be > 0 and <= 65536.
		 */
		matchLimit?: number;
		/**
		 * The maximum start depth for a query cursor.
		 *
		 * This prevents cursors from exploring children nodes at a certain depth.
		 * Note if a pattern includes many children, then they will still be
		 * checked.
		 *
		 * The zero max start depth value can be used as a special behavior and
		 * it helps to destructure a subtree by staying on a node and using
		 * captures for interested parts. Note that the zero max start depth
		 * only limit a search depth for a pattern's root node but other nodes
		 * that are parts of the pattern may be searched at any depth what
		 * defined by the pattern structure.
		 *
		 * Set to `null` to remove the maximum start depth.
		 */
		maxStartDepth?: number;
		/**
		 * The maximum duration in microseconds that query execution should be allowed to
		 * take before halting.
		 *
		 * If query execution takes longer than this, it will halt early, returning an empty array.
		 */
		timeoutMicros?: number;
		/**
		 * A function that will be called periodically during the execution of the query to check
		 * if query execution should be cancelled. You can also use this to instrument query execution
		 * and check where the query is at in the document. The progress callback takes a single argument,
		 * which is a {@link QueryState} representing the current state of the query.
		 */
		progressCallback?: (state: QueryState) => void;
	}
	/**
	 * A stateful object that is passed into the progress callback {@link QueryOptions#progressCallback}
	 * to provide the current state of the query.
	 */
	export interface QueryState {
		/** The byte offset in the document that the query is at. */
		currentOffset: number;
	}
	/** A record of key-value pairs associated with a particular pattern in a {@link Query}. */
	export type QueryProperties = Record<string, string | null>;
	/**
	 * A predicate that contains an operator and list of operands.
	 */
	export interface QueryPredicate {
		/** The operator of the predicate, like `match?`, `eq?`, `set!`, etc. */
		operator: string;
		/** The operands of the predicate, which are either captures or strings. */
		operands: PredicateStep[];
	}
	/**
	 * A particular {@link Node} that has been captured with a particular name within a
	 * {@link Query}.
	 */
	export interface QueryCapture {
		/** The index of the pattern that matched. */
		patternIndex: number;
		/** The name of the capture */
		name: string;
		/** The captured node */
		node: Node;
		/** The properties for predicates declared with the operator `set!`. */
		setProperties?: QueryProperties;
		/** The properties for predicates declared with the operator `is?`. */
		assertedProperties?: QueryProperties;
		/** The properties for predicates declared with the operator `is-not?`. */
		refutedProperties?: QueryProperties;
	}
	/** A match of a {@link Query} to a particular set of {@link Node}s. */
	export interface QueryMatch {
		/** @deprecated since version 0.25.0, use `patternIndex` instead. */
		pattern: number;
		/** The index of the pattern that matched. */
		patternIndex: number;
		/** The captures associated with the match. */
		captures: QueryCapture[];
		/** The properties for predicates declared with the operator `set!`. */
		setProperties?: QueryProperties;
		/** The properties for predicates declared with the operator `is?`. */
		assertedProperties?: QueryProperties;
		/** The properties for predicates declared with the operator `is-not?`. */
		refutedProperties?: QueryProperties;
	}
	/** A quantifier for captures */
	export const CaptureQuantifier: {
		readonly Zero: 0;
		readonly ZeroOrOne: 1;
		readonly ZeroOrMore: 2;
		readonly One: 3;
		readonly OneOrMore: 4;
	};
	/** A quantifier for captures */
	export type CaptureQuantifier = typeof CaptureQuantifier[keyof typeof CaptureQuantifier];
	/**
	 * Predicates are represented as a single array of steps. There are two
	 * types of steps, which correspond to the two legal values for
	 * the `type` field:
	 *
	 * - `CapturePredicateStep` - Steps with this type represent names
	 *    of captures.
	 *
	 * - `StringPredicateStep` - Steps with this type represent literal
	 *    strings.
	 */
	export type PredicateStep = CapturePredicateStep | StringPredicateStep;
	/**
	 * A step in a predicate that refers to a capture.
	 *
	 * The `name` field is the name of the capture.
	 */
	interface CapturePredicateStep {
		type: 'capture';
		name: string;
	}
	/**
	 * A step in a predicate that refers to a string.
	 *
	 * The `value` field is the string value.
	 */
	interface StringPredicateStep {
		type: 'string';
		value: string;
	}
	export class Query {
		/** The names of the captures used in the query. */
		readonly captureNames: string[];
		/** The quantifiers of the captures used in the query. */
		readonly captureQuantifiers: CaptureQuantifier[][];
		/**
		 * The other user-defined predicates associated with the given index.
		 *
		 * This includes predicates with operators other than:
		 * - `match?`
		 * - `eq?` and `not-eq?`
		 * - `any-of?` and `not-any-of?`
		 * - `is?` and `is-not?`
		 * - `set!`
		 */
		readonly predicates: QueryPredicate[][];
		/** The properties for predicates with the operator `set!`. */
		readonly setProperties: QueryProperties[];
		/** The properties for predicates with the operator `is?`. */
		readonly assertedProperties: QueryProperties[];
		/** The properties for predicates with the operator `is-not?`. */
		readonly refutedProperties: QueryProperties[];
		/** The maximum number of in-progress matches for this cursor. */
		matchLimit?: number;
		/**
		 * Create a new query from a string containing one or more S-expression
		 * patterns.
		 *
		 * The query is associated with a particular language, and can only be run
		 * on syntax nodes parsed with that language. References to Queries can be
		 * shared between multiple threads.
		 *
		 * @link {@see https://tree-sitter.github.io/tree-sitter/using-parsers/queries}
		 */
		constructor(language: Language, source: string);
		/** Delete the query, freeing its resources. */
		delete(): void;
		/**
		 * Iterate over all of the matches in the order that they were found.
		 *
		 * Each match contains the index of the pattern that matched, and a list of
		 * captures. Because multiple patterns can match the same set of nodes,
		 * one match may contain captures that appear *before* some of the
		 * captures from a previous match.
		 *
		 * @param node - The node to execute the query on.
		 *
		 * @param options - Options for query execution.
		 */
		matches(node: Node, options?: QueryOptions): QueryMatch[];
		/**
		 * Iterate over all of the individual captures in the order that they
		 * appear.
		 *
		 * This is useful if you don't care about which pattern matched, and just
		 * want a single, ordered sequence of captures.
		 *
		 * @param node - The node to execute the query on.
		 *
		 * @param options - Options for query execution.
		 */
		captures(node: Node, options?: QueryOptions): QueryCapture[];
		/** Get the predicates for a given pattern. */
		predicatesForPattern(patternIndex: number): QueryPredicate[];
		/**
		 * Disable a certain capture within a query.
		 *
		 * This prevents the capture from being returned in matches, and also
		 * avoids any resource usage associated with recording the capture.
		 */
		disableCapture(captureName: string): void;
		/**
		 * Disable a certain pattern within a query.
		 *
		 * This prevents the pattern from matching, and also avoids any resource
		 * usage associated with the pattern. This throws an error if the pattern
		 * index is out of bounds.
		 */
		disablePattern(patternIndex: number): void;
		/**
		 * Check if, on its last execution, this cursor exceeded its maximum number
		 * of in-progress matches.
		 */
		didExceedMatchLimit(): boolean;
		/** Get the byte offset where the given pattern starts in the query's source. */
		startIndexForPattern(patternIndex: number): number;
		/** Get the byte offset where the given pattern ends in the query's source. */
		endIndexForPattern(patternIndex: number): number;
		/** Get the number of patterns in the query. */
		patternCount(): number;
		/** Get the index for a given capture name. */
		captureIndexForName(captureName: string): number;
		/** Check if a given pattern within a query has a single root node. */
		isPatternRooted(patternIndex: number): boolean;
		/** Check if a given pattern within a query has a single root node. */
		isPatternNonLocal(patternIndex: number): boolean;
		/**
		 * Check if a given step in a query is 'definite'.
		 *
		 * A query step is 'definite' if its parent pattern will be guaranteed to
		 * match successfully once it reaches the step.
		 */
		isPatternGuaranteedAtStep(byteIndex: number): boolean;
	}
	export class LookaheadIterator implements Iterable<string> {
		/** Get the current symbol of the lookahead iterator. */
		get currentTypeId(): number;
		/** Get the current symbol name of the lookahead iterator. */
		get currentType(): string;
		/** Delete the lookahead iterator, freeing its resources. */
		delete(): void;
		/**
		 * Reset the lookahead iterator.
		 *
		 * This returns `true` if the language was set successfully and `false`
		 * otherwise.
		 */
		reset(language: Language, stateId: number): boolean;
		/**
		 * Reset the lookahead iterator to another state.
		 *
		 * This returns `true` if the iterator was reset to the given state and
		 * `false` otherwise.
		 */
		resetState(stateId: number): boolean;
		/**
		 * Returns an iterator that iterates over the symbols of the lookahead iterator.
		 *
		 * The iterator will yield the current symbol name as a string for each step
		 * until there are no more symbols to iterate over.
		 */
		[Symbol.iterator](): Iterator<string>;
	}

	export {};
}

//# sourceMappingURL=web-tree-sitter.d.ts.map


================================================
FILE: lib/binding_web/lib/exports.txt
================================================
"ts_init",
"ts_language_field_count",
"ts_language_field_name_for_id",
"ts_language_type_is_named_wasm",
"ts_language_type_is_visible_wasm",
"ts_language_symbol_count",
"ts_language_state_count",
"ts_language_supertypes_wasm",
"ts_language_subtypes_wasm",
"ts_language_symbol_for_name",
"ts_language_symbol_name",
"ts_language_symbol_type",
"ts_language_name",
"ts_language_version",
"ts_language_abi_version",
"ts_language_metadata",
"ts_language_next_state",
"ts_node_field_name_for_child_wasm",
"ts_node_field_name_for_named_child_wasm",
"ts_node_children_by_field_id_wasm",
"ts_node_first_child_for_byte_wasm",
"ts_node_first_named_child_for_byte_wasm",
"ts_node_child_by_field_id_wasm",
"ts_node_child_count_wasm",
"ts_node_child_wasm",
"ts_node_children_wasm",
"ts_node_descendant_for_index_wasm",
"ts_node_descendant_for_position_wasm",
"ts_node_descendants_of_type_wasm",
"ts_node_end_index_wasm",
"ts_node_end_point_wasm",
"ts_node_has_changes_wasm",
"ts_node_has_error_wasm",
"ts_node_is_error_wasm",
"ts_node_is_missing_wasm",
"ts_node_is_extra_wasm",
"ts_node_is_named_wasm",
"ts_node_parse_state_wasm",
"ts_node_next_parse_state_wasm",
"ts_node_named_child_count_wasm",
"ts_node_named_child_wasm",
"ts_node_named_children_wasm",
"ts_node_named_descendant_for_index_wasm",
"ts_node_named_descendant_for_position_wasm",
"ts_node_next_named_sibling_wasm",
"ts_node_next_sibling_wasm",
"ts_node_parent_wasm",
"ts_node_child_with_descendant_wasm",
"ts_node_prev_named_sibling_wasm",
"ts_node_prev_sibling_wasm",
"ts_node_descendant_count_wasm",
"ts_node_start_index_wasm",
"ts_node_start_point_wasm",
"ts_node_symbol_wasm",
"ts_node_grammar_symbol_wasm",
"ts_node_to_string_wasm",
"ts_parser_delete",
"ts_parser_enable_logger_wasm",
"ts_parser_new_wasm",
"ts_parser_parse_wasm",
"ts_parser_reset",
"ts_parser_set_language",
"ts_parser_set_included_ranges",
"ts_parser_included_ranges_wasm",
"ts_parser_set_timeout_micros",
"ts_parser_timeout_micros",
"ts_query_capture_count",
"ts_query_capture_name_for_id",
"ts_query_captures_wasm",
"ts_query_delete",
"ts_query_matches_wasm",
"ts_query_new",
"ts_query_pattern_count",
"ts_query_predicates_for_pattern",
"ts_query_disable_capture",
"ts_query_start_byte_for_pattern",
"ts_query_end_byte_for_pattern",
"ts_query_string_count",
"ts_query_string_value_for_id",
"ts_query_disable_pattern",
"ts_query_capture_quantifier_for_id",
"ts_query_is_pattern_non_local",
"ts_query_is_pattern_rooted",
"ts_query_is_pattern_guaranteed_at_step",
"ts_tree_copy",
"ts_tree_cursor_current_field_id_wasm",
"ts_tree_cursor_current_depth_wasm",
"ts_tree_cursor_current_descendant_index_wasm",
"ts_tree_cursor_current_node_id_wasm",
"ts_tree_cursor_current_node_is_missing_wasm",
"ts_tree_cursor_current_node_is_named_wasm",
"ts_tree_cursor_current_node_type_id_wasm",
"ts_tree_cursor_current_node_state_id_wasm",
"ts_tree_cursor_current_node_wasm",
"ts_tree_cursor_delete_wasm",
"ts_tree_cursor_end_index_wasm",
"ts_tree_cursor_end_position_wasm",
"ts_tree_cursor_goto_first_child_wasm",
"ts_tree_cursor_goto_last_child_wasm",
"ts_tree_cursor_goto_first_child_for_index_wasm",
"ts_tree_cursor_goto_first_child_for_position_wasm",
"ts_tree_cursor_goto_next_sibling_wasm",
"ts_tree_cursor_goto_previous_sibling_wasm",
"ts_tree_cursor_goto_descendant_wasm",
"ts_tree_cursor_goto_parent_wasm",
"ts_tree_cursor_new_wasm",
"ts_tree_cursor_reset_wasm",
"ts_tree_cursor_reset_to_wasm",
"ts_tree_cursor_start_index_wasm",
"ts_tree_cursor_start_position_wasm",
"ts_tree_cursor_copy_wasm",
"ts_tree_delete",
"ts_tree_included_ranges_wasm",
"ts_tree_edit_wasm",
"ts_tree_get_changed_ranges_wasm",
"ts_tree_root_node_wasm",
"ts_tree_root_node_with_offset_wasm",
"ts_lookahead_iterator_new",
"ts_lookahead_iterator_delete",
"ts_lookahead_iterator_reset_state",
"ts_lookahead_iterator_reset",
"ts_lookahead_iterator_next",
"ts_lookahead_iterator_current_symbol",



================================================
FILE: lib/binding_web/lib/imports.js
================================================
mergeInto(LibraryManager.library, {
  tree_sitter_parse_callback(
    inputBufferAddress,
    index,
    row,
    column,
    lengthAddress,
  ) {
    const INPUT_BUFFER_SIZE = 10 * 1024;
    const string = Module.currentParseCallback(index, { row, column });
    if (typeof string === 'string') {
      setValue(lengthAddress, string.length, 'i32');
      stringToUTF16(string, inputBufferAddress, INPUT_BUFFER_SIZE);
    } else {
      setValue(lengthAddress, 0, 'i32');
    }
  },

  tree_sitter_log_callback(isLexMessage, messageAddress) {
    if (Module.currentLogCallback) {
      const message = UTF8ToString(messageAddress);
      Module.currentLogCallback(message, isLexMessage !== 0);
    }
  },

  tree_sitter_progress_callback(currentOffset, hasError) {
    if (Module.currentProgressCallback) {
      return Module.currentProgressCallback({ currentOffset, hasError });
    }
    return false;
  },

  tree_sitter_query_progress_callback(currentOffset) {
    if (Module.currentQueryProgressCallback) {
      return Module.currentQueryProgressCallback({ currentOffset });
    }
    return false;
  },
});



================================================
FILE: lib/binding_web/lib/prefix.js
================================================
Module.currentQueryProgressCallback = null;
Module.currentProgressCallback = null;
Module.currentLogCallback = null;
Module.currentParseCallback = null;



================================================
FILE: lib/binding_web/lib/tree-sitter.c
================================================
#include "array.h"
#include "point.h"

#include <emscripten.h>
#include <tree_sitter/api.h>

/*****************************/
/* Section - Data marshaling */
/*****************************/

static const uint32_t INPUT_BUFFER_SIZE = 10 * 1024;

const void *TRANSFER_BUFFER[12] = {
  NULL, NULL, NULL, NULL,
  NULL, NULL, NULL, NULL,
  NULL, NULL, NULL, NULL,
};

static const int SIZE_OF_CURSOR = 4;
static const int SIZE_OF_NODE = 5;
static const int SIZE_OF_POINT = 2;
static const int SIZE_OF_RANGE = 2 + (2 * SIZE_OF_POINT);
static const int SIZE_OF_CAPTURE = 1 + SIZE_OF_NODE;

void *ts_init() {
  TRANSFER_BUFFER[0] = (const void *)TREE_SITTER_LANGUAGE_VERSION;
  TRANSFER_BUFFER[1] = (const void *)TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION;
  return (void*)TRANSFER_BUFFER;
}

static uint32_t code_unit_to_byte(uint32_t unit) {
  return unit << 1;
}

static uint32_t byte_to_code_unit(uint32_t byte) {
  return byte >> 1;
}

static inline void marshal_node(const void **buffer, TSNode node) {
  buffer[0] = node.id;
  buffer[1] = (const void *)byte_to_code_unit(node.context[0]);
  buffer[2] = (const void *)node.context[1];
  buffer[3] = (const void *)byte_to_code_unit(node.context[2]);
  buffer[4] = (const void *)node.context[3];
}

static inline TSNode unmarshal_node_at(const TSTree *tree, uint32_t index) {
  TSNode node;
  const void **buffer = TRANSFER_BUFFER + index * SIZE_OF_NODE;
  node.id = buffer[0];
  node.context[0] = code_unit_to_byte((uint32_t)buffer[1]);
  node.context[1] = (uint32_t)buffer[2];
  node.context[2] = code_unit_to_byte((uint32_t)buffer[3]);
  node.context[3] = (uint32_t)buffer[4];
  node.tree = tree;
  return node;
}

static inline TSNode unmarshal_node(const TSTree *tree) {
  return unmarshal_node_at(tree, 0);
}

static inline void marshal_cursor(const TSTreeCursor *cursor) {
  TRANSFER_BUFFER[0] = cursor->id;
  TRANSFER_BUFFER[1] = (const void *)cursor->context[0];
  TRANSFER_BUFFER[2] = (const void *)cursor->context[1];
  TRANSFER_BUFFER[3] = (const void *)cursor->context[2];
}

static inline TSTreeCursor unmarshal_cursor(const void **buffer, const TSTree *tree) {
  TSTreeCursor cursor;
  cursor.id = buffer[0];
  cursor.context[0] = (uint32_t)buffer[1];
  cursor.context[1] = (uint32_t)buffer[2];
  cursor.context[2] = (uint32_t)buffer[3];
  cursor.tree = tree;
  return cursor;
}

static void marshal_point(TSPoint point) {
  TRANSFER_BUFFER[0] = (const void *)point.row;
  TRANSFER_BUFFER[1] = (const void *)byte_to_code_unit(point.column);
}

static TSPoint unmarshal_point(const void **address) {
  TSPoint point;
  point.row = (uint32_t)address[0];
  point.column = code_unit_to_byte((uint32_t)address[1]);
  return point;
}

static void marshal_range(TSRange *range) {
  range->start_byte = byte_to_code_unit(range->start_byte);
  range->end_byte = byte_to_code_unit(range->end_byte);
  range->start_point.column = byte_to_code_unit(range->start_point.column);
  range->end_point.column = byte_to_code_unit(range->end_point.column);
}

static void unmarshal_range(TSRange *range) {
  range->start_byte = code_unit_to_byte(range->start_byte);
  range->end_byte = code_unit_to_byte(range->end_byte);
  range->start_point.column = code_unit_to_byte(range->start_point.column);
  range->end_point.column = code_unit_to_byte(range->end_point.column);
}

static TSInputEdit unmarshal_edit() {
  TSInputEdit edit;
  const void **address = TRANSFER_BUFFER;
  edit.start_point = unmarshal_point(address); address += SIZE_OF_POINT;
  edit.old_end_point = unmarshal_point(address); address += SIZE_OF_POINT;
  edit.new_end_point = unmarshal_point(address); address += SIZE_OF_POINT;
  edit.start_byte = code_unit_to_byte((uint32_t)*address); address += 1;
  edit.old_end_byte = code_unit_to_byte((uint32_t)*address); address += 1;
  edit.new_end_byte = code_unit_to_byte((uint32_t)*address); address += 1;
  return edit;
}

static void marshal_language_metadata(const TSLanguageMetadata *metadata) {
  if (metadata == NULL) {
    TRANSFER_BUFFER[0] = 0;
    return;
  }
  TRANSFER_BUFFER[0] = (const void*)3;
  TRANSFER_BUFFER[1] = (const void*)(uint32_t)metadata->major_version;
  TRANSFER_BUFFER[2] = (const void*)(uint32_t)metadata->minor_version;
  TRANSFER_BUFFER[3] = (const void*)(uint32_t)metadata->patch_version;
}

/********************/
/* Section - Parser */
/********************/

extern void tree_sitter_parse_callback(
  char *input_buffer,
  uint32_t index,
  uint32_t row,
  uint32_t column,
  uint32_t *length_read
);

extern void tree_sitter_log_callback(
  bool is_lex_message,
  const char *message
);

extern bool tree_sitter_progress_callback(
  uint32_t current_offset,
  bool has_error
);

extern bool tree_sitter_query_progress_callback(
  uint32_t current_offset
);

static const char *call_parse_callback(
  void *payload,
  uint32_t byte,
  TSPoint position,
  uint32_t *bytes_read
) {
  char *buffer = (char *)payload;
  tree_sitter_parse_callback(
    buffer,
    byte_to_code_unit(byte),
    position.row,
    byte_to_code_unit(position.column),
    bytes_read
  );
  *bytes_read = code_unit_to_byte(*bytes_read);
  if (*bytes_read >= INPUT_BUFFER_SIZE) {
    *bytes_read = INPUT_BUFFER_SIZE - 2;
  }
  return buffer;
}

static void call_log_callback(
  void *payload,
  TSLogType log_type,
  const char *message
) {
  tree_sitter_log_callback(log_type == TSLogTypeLex, message);
}

static bool progress_callback(
  TSParseState *state
) {
  return tree_sitter_progress_callback(state->current_byte_offset, state->has_error);
}

static bool query_progress_callback(
  TSQueryCursorState *state
) {
  return tree_sitter_query_progress_callback(state->current_byte_offset);
}

void ts_parser_new_wasm() {
  TSParser *parser = ts_parser_new();
  char *input_buffer = calloc(INPUT_BUFFER_SIZE, sizeof(char));
  TRANSFER_BUFFER[0] = parser;
  TRANSFER_BUFFER[1] = input_buffer;
}

void ts_parser_enable_logger_wasm(TSParser *self, bool should_log) {
  TSLogger logger = {self, should_log ? call_log_callback : NULL};
  ts_parser_set_logger(self, logger);
}

TSTree *ts_parser_parse_wasm(
  TSParser *self,
  char *input_buffer,
  const TSTree *old_tree,
  TSRange *ranges,
  uint32_t range_count
) {
  TSInput input = {
    input_buffer,
    call_parse_callback,
    TSInputEncodingUTF16LE,
    NULL,
  };
  if (range_count) {
    for (unsigned i = 0; i < range_count; i++) {
      unmarshal_range(&ranges[i]);
    }
    ts_parser_set_included_ranges(self, ranges, range_count);
    free(ranges);
  } else {
    ts_parser_set_included_ranges(self, NULL, 0);
  }

  TSParseOptions options = {.payload = NULL, .progress_callback = progress_callback};

  return ts_parser_parse_with_options(self, old_tree, input, options);
}

void ts_parser_included_ranges_wasm(TSParser *self) {
  uint32_t range_count = 0;
  const TSRange *ranges = ts_parser_included_ranges(self, &range_count);
  TSRange *copied_ranges = malloc(sizeof(TSRange) * range_count);
  memcpy(copied_ranges, ranges, sizeof(TSRange) * range_count);
  for (unsigned i = 0; i < range_count; i++) {
    marshal_range(&copied_ranges[i]);
  }
  TRANSFER_BUFFER[0] = range_count ? (const void *)range_count : NULL;
  TRANSFER_BUFFER[1] = copied_ranges;
}

/**********************/
/* Section - Language */
/**********************/

int ts_language_type_is_named_wasm(const TSLanguage *self, TSSymbol typeId) {
  const TSSymbolType symbolType = ts_language_symbol_type(self, typeId);
  return symbolType == TSSymbolTypeRegular;
}

int ts_language_type_is_visible_wasm(const TSLanguage *self, TSSymbol typeId) {
  const TSSymbolType symbolType = ts_language_symbol_type(self, typeId);
  return symbolType <= TSSymbolTypeAnonymous;
}

void ts_language_metadata_wasm(const TSLanguage *self) {
  const TSLanguageMetadata *metadata = ts_language_metadata(self);
  marshal_language_metadata(metadata);
}

void ts_language_supertypes_wasm(const TSLanguage *self) {
  uint32_t length;
  const TSSymbol *supertypes = ts_language_supertypes(self, &length);
  TRANSFER_BUFFER[0] = (const void *)length;
  TRANSFER_BUFFER[1] = supertypes;
}

void ts_language_subtypes_wasm(const TSLanguage *self, TSSymbol supertype) {
  uint32_t length;
  const TSSymbol *subtypes = ts_language_subtypes(self, supertype, &length);
  TRANSFER_BUFFER[0] = (const void *)length;
  TRANSFER_BUFFER[1] = subtypes;
}

/******************/
/* Section - Tree */
/******************/

void ts_tree_root_node_wasm(const TSTree *tree) {
  marshal_node(TRANSFER_BUFFER, ts_tree_root_node(tree));
}

void ts_tree_root_node_with_offset_wasm(const TSTree *tree) {
  // read int and point from transfer buffer
  const void **address = TRANSFER_BUFFER + SIZE_OF_NODE;
  uint32_t offset = code_unit_to_byte((uint32_t)address[0]);
  TSPoint extent = unmarshal_point(address + 1);
  TSNode node = ts_tree_root_node_with_offset(tree, offset, extent);
  marshal_node(TRANSFER_BUFFER, node);
}

void ts_tree_edit_wasm(TSTree *tree) {
  TSInputEdit edit = unmarshal_edit();
  ts_tree_edit(tree, &edit);
}

void ts_tree_included_ranges_wasm(const TSTree *tree) {
  uint32_t range_count;
  TSRange *ranges = ts_tree_included_ranges(tree, &range_count);
  for (unsigned i = 0; i < range_count; i++) {
    marshal_range(&ranges[i]);
  }
  TRANSFER_BUFFER[0] = (range_count ? (const void *)range_count : NULL);
  TRANSFER_BUFFER[1] = (const void *)ranges;
}

void ts_tree_get_changed_ranges_wasm(TSTree *tree, TSTree *other) {
  unsigned range_count;
  TSRange *ranges = ts_tree_get_changed_ranges(tree, other, &range_count);
  for (unsigned i = 0; i < range_count; i++) {
    marshal_range(&ranges[i]);
  }
  TRANSFER_BUFFER[0] = (const void *)range_count;
  TRANSFER_BUFFER[1] = (const void *)ranges;
}

/************************/
/* Section - TreeCursor */
/************************/

void ts_tree_cursor_new_wasm(const TSTree *tree) {
  TSNode node = unmarshal_node(tree);
  TSTreeCursor cursor = ts_tree_cursor_new(node);
  marshal_cursor(&cursor);
}

void ts_tree_cursor_copy_wasm(const TSTree *tree) {
  TSTreeCursor cursor = unmarshal_cursor(TRANSFER_BUFFER, tree);
  TSTreeCursor copy = ts_tree_cursor_copy(&cursor);
  marshal_cursor(&copy);
}

void ts_tree_cursor_delete_wasm(const TSTree *tree) {
  TSTreeCursor cursor = unmarshal_cursor(TRANSFER_BUFFER, tree);
  ts_tree_cursor_delete(&cursor);
}

void ts_tree_cursor_reset_wasm(const TSTree *tree) {
  TSNode node = unmarshal_node(tree);
  TSTreeCursor cursor = unmarshal_cursor(&TRANSFER_BUFFER[SIZE_OF_NODE], tree);
  ts_tree_cursor_reset(&cursor, node);
  marshal_cursor(&cursor);
}

void ts_tree_cursor_reset_to_wasm(const TSTree *_dst, const TSTree *_src) {
  TSTreeCursor cursor = unmarshal_cursor(TRANSFER_BUFFER, _dst);
  TSTreeCursor src = unmarshal_cursor(&TRANSFER_BUFFER[SIZE_OF_CURSOR], _src);
  ts_tree_cursor_reset_to(&cursor, &src);
  marshal_cursor(&cursor);
}

bool ts_tree_cursor_goto_first_child_wasm(const TSTree *tree) {
  TSTreeCursor cursor = unmarshal_cursor(TRANSFER_BUFFER, tree);
  bool result = ts_tree_cursor_goto_first_child(&cursor);
  marshal_cursor(&cursor);
  return result;
}

bool ts_tree_cursor_goto_last_child_wasm(const TSTree *tree) {
  TSTreeCursor cursor = unmarshal_cursor(TRANSFER_BUFFER, tree);
  bool result = ts_tree_cursor_goto_last_child(&cursor);
  marshal_cursor(&cursor);
  return result;
}

bool ts_tree_cursor_goto_first_child_for_index_wasm(const TSTree *tree) {
  TSTreeCursor cursor = unmarshal_cursor(TRANSFER_BUFFER, tree);
  const void **address = TRANSFER_BUFFER + 3;
  uint32_t index = code_unit_to_byte((uint32_t)address[0]);
  bool result = ts_tree_cursor_goto_first_child_for_byte(&cursor, index);
  marshal_cursor(&cursor);
  return result;
}

bool ts_tree_cursor_goto_first_child_for_position_wasm(const TSTree *tree) {
  TSTreeCursor cursor = unmarshal_cursor(TRANSFER_BUFFER, tree);
  const void **address = TRANSFER_BUFFER + 3;
  TSPoint point = unmarshal_point(address);
  bool result = ts_tree_cursor_goto_first_child_for_point(&cursor, point);
  marshal_cursor(&cursor);
  return result;
}

bool ts_tree_cursor_goto_next_sibling_wasm(const TSTree *tree) {
  TSTreeCursor cursor = unmarshal_cursor(TRANSFER_BUFFER, tree);
  bool result = ts_tree_cursor_goto_next_sibling(&cursor);
  marshal_cursor(&cursor);
  return result;
}

bool ts_tree_cursor_goto_previous_sibling_wasm(const TSTree *tree) {
  TSTreeCursor cursor = unmarshal_cursor(TRANSFER_BUFFER, tree);
  bool result = ts_tree_cursor_goto_previous_sibling(&cursor);
  marshal_cursor(&cursor);
  return result;
}

void ts_tree_cursor_goto_descendant_wasm(const TSTree *tree, uint32_t goal_descendant_index) {
  TSTreeCursor cursor = unmarshal_cursor(TRANSFER_BUFFER, tree);
  ts_tree_cursor_goto_descendant(&cursor, goal_descendant_index);
  marshal_cursor(&cursor);
}

bool ts_tree_cursor_goto_parent_wasm(const TSTree *tree) {
  TSTreeCursor cursor = unmarshal_cursor(TRANSFER_BUFFER, tree);
  bool result = ts_tree_cursor_goto_parent(&cursor);
  marshal_cursor(&cursor);
  return result;
}

uint16_t ts_tree_cursor_current_node_type_id_wasm(const TSTree *tree) {
  TSTreeCursor cursor = unmarshal_cursor(TRANSFER_BUFFER, tree);
  TSNode node = ts_tree_cursor_current_node(&cursor);
  return ts_node_symbol(node);
}

uint16_t ts_tree_cursor_current_node_state_id_wasm(const TSTree *tree) {
  TSTreeCursor cursor = unmarshal_cursor(TRANSFER_BUFFER, tree);
  TSNode node = ts_tree_cursor_current_node(&cursor);
  return ts_node_parse_state(node);
}

bool ts_tree_cursor_current_node_is_named_wasm(const TSTree *tree) {
  TSTreeCursor cursor = unmarshal_cursor(TRANSFER_BUFFER, tree);
  TSNode node = ts_tree_cursor_current_node(&cursor);
  return ts_node_is_named(node);
}

bool ts_tree_cursor_current_node_is_missing_wasm(const TSTree *tree) {
  TSTreeCursor cursor = unmarshal_cursor(TRANSFER_BUFFER, tree);
  TSNode node = ts_tree_cursor_current_node(&cursor);
  return ts_node_is_missing(node);
}

uint32_t ts_tree_cursor_current_node_id_wasm(const TSTree *tree) {
  TSTreeCursor cursor = unmarshal_cursor(TRANSFER_BUFFER, tree);
  TSNode node = ts_tree_cursor_current_node(&cursor);
  return (uint32_t)node.id;
}

void ts_tree_cursor_start_position_wasm(const TSTree *tree) {
  TSTreeCursor cursor = unmarshal_cursor(TRANSFER_BUFFER, tree);
  TSNode node = ts_tree_cursor_current_node(&cursor);
  marshal_point(ts_node_start_point(node));
}

void ts_tree_cursor_end_position_wasm(const TSTree *tree) {
  TSTreeCursor cursor = unmarshal_cursor(TRANSFER_BUFFER, tree);
  TSNode node = ts_tree_cursor_current_node(&cursor);
  marshal_point(ts_node_end_point(node));
}

uint32_t ts_tree_cursor_start_index_wasm(const TSTree *tree) {
  TSTreeCursor cursor = unmarshal_cursor(TRANSFER_BUFFER, tree);
  TSNode node = ts_tree_cursor_current_node(&cursor);
  return byte_to_code_unit(ts_node_start_byte(node));
}

uint32_t ts_tree_cursor_end_index_wasm(const TSTree *tree) {
  TSTreeCursor cursor = unmarshal_cursor(TRANSFER_BUFFER, tree);
  TSNode node = ts_tree_cursor_current_node(&cursor);
  return byte_to_code_unit(ts_node_end_byte(node));
}

uint32_t ts_tree_cursor_current_field_id_wasm(const TSTree *tree) {
  TSTreeCursor cursor = unmarshal_cursor(TRANSFER_BUFFER, tree);
  return ts_tree_cursor_current_field_id(&cursor);
}

uint32_t ts_tree_cursor_current_depth_wasm(const TSTree *tree) {
  TSTreeCursor cursor = unmarshal_cursor(TRANSFER_BUFFER, tree);
  return ts_tree_cursor_current_depth(&cursor);
}

uint32_t ts_tree_cursor_current_descendant_index_wasm(const TSTree *tree) {
  TSTreeCursor cursor = unmarshal_cursor(TRANSFER_BUFFER, tree);
  return ts_tree_cursor_current_descendant_index(&cursor);
}

void ts_tree_cursor_current_node_wasm(const TSTree *tree) {
  TSTreeCursor cursor = unmarshal_cursor(TRANSFER_BUFFER, tree);
  marshal_node(TRANSFER_BUFFER, ts_tree_cursor_current_node(&cursor));
}

/******************/
/* Section - Node */
/******************/

static TSTreeCursor scratch_cursor = {0};
static TSQueryCursor *scratch_query_cursor = NULL;

uint16_t ts_node_symbol_wasm(const TSTree *tree) {
  TSNode node = unmarshal_node(tree);
  return ts_node_symbol(node);
}

const char *ts_node_field_name_for_child_wasm(const TSTree *tree, uint32_t index) {
  TSNode node = unmarshal_node(tree);
  return ts_node_field_name_for_child(node, index);
}

const char *ts_node_field_name_for_named_child_wasm(const TSTree *tree, uint32_t index) {
  TSNode node = unmarshal_node(tree);
  return ts_node_field_name_for_named_child(node, index);
}

void ts_node_children_by_field_id_wasm(const TSTree *tree, uint32_t field_id) {
  TSNode node = unmarshal_node(tree);
  TSTreeCursor cursor = ts_tree_cursor_new(node);

  bool done = field_id == 0;
  if (!done) {
    ts_tree_cursor_reset(&cursor, node);
    ts_tree_cursor_goto_first_child(&cursor);
  }

  Array(const void*) result = array_new();

  while (!done) {
    while (ts_tree_cursor_current_field_id(&cursor) != field_id) {
      if (!ts_tree_cursor_goto_next_sibling(&cursor)) {
        done = true;
        break;
      }
    }
    if (done) {
      break;
    }
    TSNode result_node = ts_tree_cursor_current_node(&cursor);
    if (!ts_tree_cursor_goto_next_sibling(&cursor)) {
      done = true;
    }
    array_grow_by(&result, SIZE_OF_NODE);
    marshal_node(result.contents + result.size - SIZE_OF_NODE, result_node);
  }
  ts_tree_cursor_delete(&cursor);

  TRANSFER_BUFFER[0] = (const void*)(result.size / SIZE_OF_NODE);
  TRANSFER_BUFFER[1] = (const void*)result.contents;
}

void ts_node_first_child_for_byte_wasm(const TSTree *tree) {
  TSNode node = unmarshal_node(tree);
  const void** address = TRANSFER_BUFFER + SIZE_OF_NODE;
  uint32_t byte = code_unit_to_byte((uint32_t)address[0]);
  marshal_node(TRANSFER_BUFFER, ts_node_first_child_for_byte(node, byte));
}

void ts_node_first_named_child_for_byte_wasm(const TSTree *tree) {
  TSNode node = unmarshal_node(tree);
  const void** address = TRANSFER_BUFFER + SIZE_OF_NODE;
  uint32_t byte = code_unit_to_byte((uint32_t)address[0]);
  marshal_node(TRANSFER_BUFFER, ts_node_first_named_child_for_byte(node, byte));
}

uint16_t ts_node_grammar_symbol_wasm(const TSTree *tree) {
  TSNode node = unmarshal_node(tree);
  return ts_node_grammar_symbol(node);
}

uint32_t ts_node_child_count_wasm(const TSTree *tree) {
  TSNode node = unmarshal_node(tree);
  return ts_node_child_count(node);
}

uint32_t ts_node_named_child_count_wasm(const TSTree *tree) {
  TSNode node = unmarshal_node(tree);
  return ts_node_named_child_count(node);
}

void ts_node_child_wasm(const TSTree *tree, uint32_t index) {
  TSNode node = unmarshal_node(tree);
  marshal_node(TRANSFER_BUFFER, ts_node_child(node, index));
}

void ts_node_named_child_wasm(const TSTree *tree, uint32_t index) {
  TSNode node = unmarshal_node(tree);
  marshal_node(TRANSFER_BUFFER, ts_node_named_child(node, index));
}

void ts_node_child_by_field_id_wasm(const TSTree *tree, uint32_t field_id) {
  TSNode node = unmarshal_node(tree);
  marshal_node(TRANSFER_BUFFER, ts_node_child_by_field_id(node, field_id));
}

void ts_node_next_sibling_wasm(const TSTree *tree) {
  TSNode node = unmarshal_node(tree);
  marshal_node(TRANSFER_BUFFER, ts_node_next_sibling(node));
}

void ts_node_prev_sibling_wasm(const TSTree *tree) {
  TSNode node = unmarshal_node(tree);
  marshal_node(TRANSFER_BUFFER, ts_node_prev_sibling(node));
}

void ts_node_next_named_sibling_wasm(const TSTree *tree) {
  TSNode node = unmarshal_node(tree);
  marshal_node(TRANSFER_BUFFER, ts_node_next_named_sibling(node));
}

void ts_node_prev_named_sibling_wasm(const TSTree *tree) {
  TSNode node = unmarshal_node(tree);
  marshal_node(TRANSFER_BUFFER, ts_node_prev_named_sibling(node));
}

uint32_t ts_node_descendant_count_wasm(const TSTree *tree) {
  TSNode node = unmarshal_node(tree);
  return ts_node_descendant_count(node);
}

void ts_node_parent_wasm(const TSTree *tree) {
  TSNode node = unmarshal_node(tree);
  marshal_node(TRANSFER_BUFFER, ts_node_parent(node));
}

void ts_node_child_with_descendant_wasm(const TSTree *tree) {
  TSNode node = unmarshal_node(tree);
  TSNode descendant = unmarshal_node_at(tree, 1);
  marshal_node(TRANSFER_BUFFER, ts_node_child_with_descendant(node, descendant));
}

void ts_node_descendant_for_index_wasm(const TSTree *tree) {
  TSNode node = unmarshal_node(tree);
  const void **address = TRANSFER_BUFFER + SIZE_OF_NODE;
  uint32_t start = code_unit_to_byte((uint32_t)address[0]);
  uint32_t end = code_unit_to_byte((uint32_t)address[1]);
  marshal_node(TRANSFER_BUFFER, ts_node_descendant_for_byte_range(node, start, end));
}

void ts_node_named_descendant_for_index_wasm(const TSTree *tree) {
  TSNode node = unmarshal_node(tree);
  const void **address = TRANSFER_BUFFER + SIZE_OF_NODE;
  uint32_t start = code_unit_to_byte((uint32_t)address[0]);
  uint32_t end = code_unit_to_byte((uint32_t)address[1]);
  marshal_node(TRANSFER_BUFFER, ts_node_named_descendant_for_byte_range(node, start, end));
}

void ts_node_descendant_for_position_wasm(const TSTree *tree) {
  TSNode node = unmarshal_node(tree);
  const void **address = TRANSFER_BUFFER + SIZE_OF_NODE;
  TSPoint start = unmarshal_point(address); address += SIZE_OF_POINT;
  TSPoint end = unmarshal_point(address);
  marshal_node(TRANSFER_BUFFER, ts_node_descendant_for_point_range(node, start, end));
}

void ts_node_named_descendant_for_position_wasm(const TSTree *tree) {
  TSNode node = unmarshal_node(tree);
  const void **address = TRANSFER_BUFFER + SIZE_OF_NODE;
  TSPoint start = unmarshal_point(address); address += SIZE_OF_POINT;
  TSPoint end = unmarshal_point(address);
  marshal_node(TRANSFER_BUFFER, ts_node_named_descendant_for_point_range(node, start, end));
}

void ts_node_start_point_wasm(const TSTree *tree) {
  TSNode node = unmarshal_node(tree);
  marshal_point(ts_node_start_point(node));
}

void ts_node_end_point_wasm(const TSTree *tree) {
  TSNode node = unmarshal_node(tree);
  marshal_point(ts_node_end_point(node));
}

uint32_t ts_node_start_index_wasm(const TSTree *tree) {
  TSNode node = unmarshal_node(tree);
  return byte_to_code_unit(ts_node_start_byte(node));
}

uint32_t ts_node_end_index_wasm(const TSTree *tree) {
  TSNode node = unmarshal_node(tree);
  return byte_to_code_unit(ts_node_end_byte(node));
}

char *ts_node_to_string_wasm(const TSTree *tree) {
  TSNode node = unmarshal_node(tree);
  return ts_node_string(node);
}

void ts_node_children_wasm(const TSTree *tree) {
  TSNode node = unmarshal_node(tree);
  uint32_t count = ts_node_child_count(node);
  const void **result = NULL;
  if (count > 0) {
    result = (const void**)calloc(sizeof(void *), SIZE_OF_NODE * count);
    const void **address = result;
    ts_tree_cursor_reset(&scratch_cursor, node);
    ts_tree_cursor_goto_first_child(&scratch_cursor);
    marshal_node(address, ts_tree_cursor_current_node(&scratch_cursor));
    for (uint32_t i = 1; i < count; i++) {
      address += SIZE_OF_NODE;
      ts_tree_cursor_goto_next_sibling(&scratch_cursor);
      TSNode child = ts_tree_cursor_current_node(&scratch_cursor);
      marshal_node(address, child);
    }
  }
  TRANSFER_BUFFER[0] = (const void *)count;
  TRANSFER_BUFFER[1] = (const void *)result;
}

void ts_node_named_children_wasm(const TSTree *tree) {
  TSNode node = unmarshal_node(tree);
  uint32_t count = ts_node_named_child_count(node);
  const void **result = NULL;
  if (count > 0) {
    result = (const void**)calloc(sizeof(void *), SIZE_OF_NODE * count);
    const void **address = result;
    ts_tree_cursor_reset(&scratch_cursor, node);
    ts_tree_cursor_goto_first_child(&scratch_cursor);
    uint32_t i = 0;
    for (;;) {
      TSNode child = ts_tree_cursor_current_node(&scratch_cursor);
      if (ts_node_is_named(child)) {
        marshal_node(address, child);
        address += SIZE_OF_NODE;
        i++;
        if (i == count) {
          break;
        }
      }
      if (!ts_tree_cursor_goto_next_sibling(&scratch_cursor)) {
        break;
      }
    }
  }
  TRANSFER_BUFFER[0] = (const void *)count;
  TRANSFER_BUFFER[1] = (const void *)result;
}

bool symbols_contain(const uint32_t *set, uint32_t length, uint32_t value) {
  for (unsigned i = 0; i < length; i++) {
    if (set[i] == value) {
      return true;
    }
    if (set[i] > value) {
      break;
    }
  }
  return false;
}

void ts_node_descendants_of_type_wasm(
  const TSTree *tree,
  const uint32_t *symbols,
  uint32_t symbol_count,
  uint32_t start_row,
  uint32_t start_column,
  uint32_t end_row,
  uint32_t end_column
) {
  TSNode node = unmarshal_node(tree);
  TSPoint start_point = {start_row, code_unit_to_byte(start_column)};
  TSPoint end_point = {end_row, code_unit_to_byte(end_column)};
  if (end_point.row == 0 && end_point.column == 0) {
    end_point = (TSPoint) {UINT32_MAX, UINT32_MAX};
  }

  Array(const void *) result = array_new();

  // Walk the tree depth first looking for matching nodes.
  ts_tree_cursor_reset(&scratch_cursor, node);
  bool already_visited_children = false;
  while (true) {
    TSNode descendant = ts_tree_cursor_current_node(&scratch_cursor);

    if (!already_visited_children) {
      // If this node is before the selected range, then avoid
      // descending into it.
      if (point_lte(ts_node_end_point(descendant), start_point)) {
        if (ts_tree_cursor_goto_next_sibling(&scratch_cursor)) {
          already_visited_children = false;
        } else {
          if (!ts_tree_cursor_goto_parent(&scratch_cursor)) {
            break;
          }
          already_visited_children = true;
        }
        continue;
      }

      // If this node is after the selected range, then stop walking.
      if (point_lte(end_point, ts_node_start_point(descendant))) {
        break;
      }

      // Add the node to the result if its type matches one of the given
      // node types.
      if (symbols_contain(symbols, symbol_count, ts_node_symbol(descendant))) {
        array_grow_by(&result, SIZE_OF_NODE);
        marshal_node(result.contents + result.size - SIZE_OF_NODE, descendant);
      }

      // Continue walking.
      if (ts_tree_cursor_goto_first_child(&scratch_cursor)) {
        already_visited_children = false;
      } else if (ts_tree_cursor_goto_next_sibling(&scratch_cursor)) {
        already_visited_children = false;
      } else {
        if (!ts_tree_cursor_goto_parent(&scratch_cursor)) {
          break;
        }
        already_visited_children = true;
      }
    } else {
      if (ts_tree_cursor_goto_next_sibling(&scratch_cursor)) {
        already_visited_children = false;
      } else {
        if (!ts_tree_cursor_goto_parent(&scratch_cursor)) {
          break;
        }
      }
    }
  }

  TRANSFER_BUFFER[0] = (const void *)(result.size / SIZE_OF_NODE);
  TRANSFER_BUFFER[1] = (const void *)result.contents;
}

int ts_node_is_named_wasm(const TSTree *tree) {
  TSNode node = unmarshal_node(tree);
  return ts_node_is_named(node);
}

int ts_node_has_changes_wasm(const TSTree *tree) {
  TSNode node = unmarshal_node(tree);
  return ts_node_has_changes(node);
}

int ts_node_has_error_wasm(const TSTree *tree) {
  TSNode node = unmarshal_node(tree);
  return ts_node_has_error(node);
}

int ts_node_is_error_wasm(const TSTree *tree) {
  TSNode node = unmarshal_node(tree);
  return ts_node_is_error(node);
}

int ts_node_is_missing_wasm(const TSTree *tree) {
  TSNode node = unmarshal_node(tree);
  return ts_node_is_missing(node);
}

int ts_node_is_extra_wasm(const TSTree *tree) {
  TSNode node = unmarshal_node(tree);
  return ts_node_is_extra(node);
}

uint16_t ts_node_parse_state_wasm(const TSTree *tree) {
  TSNode node = unmarshal_node(tree);
  return ts_node_parse_state(node);
}

uint16_t ts_node_next_parse_state_wasm(const TSTree *tree) {
  TSNode node = unmarshal_node(tree);
  return ts_node_next_parse_state(node);
}

/******************/
/* Section - Query */
/******************/

void ts_query_matches_wasm(
  const TSQuery *self,
  const TSTree *tree,
  uint32_t start_row,
  uint32_t start_column,
  uint32_t end_row,
  uint32_t end_column,
  uint32_t start_index,
  uint32_t end_index,
  uint32_t match_limit,
  uint32_t max_start_depth,
  uint32_t timeout_micros
) {
  if (!scratch_query_cursor) {
    scratch_query_cursor = ts_query_cursor_new();
  }
  if (match_limit == 0) {
    ts_query_cursor_set_match_limit(scratch_query_cursor, UINT32_MAX);
  } else {
    ts_query_cursor_set_match_limit(scratch_query_cursor, match_limit);
  }

  TSNode node = unmarshal_node(tree);
  TSPoint start_point = {start_row, code_unit_to_byte(start_column)};
  TSPoint end_point = {end_row, code_unit_to_byte(end_column)};
  ts_query_cursor_set_point_range(scratch_query_cursor, start_point, end_point);
  ts_query_cursor_set_byte_range(scratch_query_cursor, start_index, end_index);
  ts_query_cursor_set_match_limit(scratch_query_cursor, match_limit);
  ts_query_cursor_set_max_start_depth(scratch_query_cursor, max_start_depth);
  ts_query_cursor_set_timeout_micros(scratch_query_cursor, timeout_micros);

  TSQueryCursorOptions options = {.payload = NULL, .progress_callback = query_progress_callback};

  ts_query_cursor_exec_with_options(scratch_query_cursor, self, node, &options);

  uint32_t index = 0;
  uint32_t match_count = 0;
  Array(const void *) result = array_new();

  TSQueryMatch match;
  while (ts_query_cursor_next_match(scratch_query_cursor, &match)) {
    match_count++;
    array_grow_by(&result, 2 + (SIZE_OF_CAPTURE * match.capture_count));
    result.contents[index++] = (const void *)(uint32_t)match.pattern_index;
    result.contents[index++] = (const void *)(uint32_t)match.capture_count;
    for (unsigned i = 0; i < match.capture_count; i++) {
      const TSQueryCapture *capture = &match.captures[i];
      result.contents[index++] = (const void *)capture->index;
      marshal_node(result.contents + index, capture->node);
      index += SIZE_OF_NODE;
    }
  }

  bool did_exceed_match_limit =
    ts_query_cursor_did_exceed_match_limit(scratch_query_cursor);
  TRANSFER_BUFFER[0] = (const void *)(match_count);
  TRANSFER_BUFFER[1] = (const void *)result.contents;
  TRANSFER_BUFFER[2] = (const void *)(did_exceed_match_limit);
}

void ts_query_captures_wasm(
  const TSQuery *self,
  const TSTree *tree,
  uint32_t start_row,
  uint32_t start_column,
  uint32_t end_row,
  uint32_t end_column,
  uint32_t start_index,
  uint32_t end_index,
  uint32_t match_limit,
  uint32_t max_start_depth,
  uint32_t timeout_micros
) {
  if (!scratch_query_cursor) {
    scratch_query_cursor = ts_query_cursor_new();
  }

  ts_query_cursor_set_match_limit(scratch_query_cursor, match_limit);

  TSNode node = unmarshal_node(tree);
  TSPoint start_point = {start_row, code_unit_to_byte(start_column)};
  TSPoint end_point = {end_row, code_unit_to_byte(end_column)};
  ts_query_cursor_set_point_range(scratch_query_cursor, start_point, end_point);
  ts_query_cursor_set_byte_range(scratch_query_cursor, start_index, end_index);
  ts_query_cursor_set_match_limit(scratch_query_cursor, match_limit);
  ts_query_cursor_set_max_start_depth(scratch_query_cursor, max_start_depth);
  ts_query_cursor_set_timeout_micros(scratch_query_cursor, timeout_micros);
  ts_query_cursor_exec(scratch_query_cursor, self, node);

  unsigned index = 0;
  unsigned capture_count = 0;
  Array(const void *) result = array_new();

  TSQueryMatch match;
  uint32_t capture_index;
  while (ts_query_cursor_next_capture(
    scratch_query_cursor,
    &match,
    &capture_index
  )) {
    capture_count++;

    array_grow_by(&result, 3 + (SIZE_OF_CAPTURE * match.capture_count));
    result.contents[index++] = (const void *)(uint32_t)match.pattern_index;
    result.contents[index++] = (const void *)(uint32_t)match.capture_count;
    result.contents[index++] = (const void *)capture_index;
    for (unsigned i = 0; i < match.capture_count; i++) {
      const TSQueryCapture *capture = &match.captures[i];
      result.contents[index++] = (const void *)capture->index;
      marshal_node(result.contents + index, capture->node);
      index += SIZE_OF_NODE;
    }
  }

  bool did_exceed_match_limit =
    ts_query_cursor_did_exceed_match_limit(scratch_query_cursor);
  TRANSFER_BUFFER[0] = (const void *)(capture_count);
  TRANSFER_BUFFER[1] = (const void *)result.contents;
  TRANSFER_BUFFER[2] = (const void *)(did_exceed_match_limit);
}



================================================
FILE: lib/binding_web/lib/web-tree-sitter.d.ts
================================================
// TypeScript bindings for emscripten-generated code.  Automatically generated at compile time.
declare namespace RuntimeExports {
    function AsciiToString(ptr: number): string;
    function stringToUTF8(str: string, outPtr: number, maxBytesToWrite: number): number;
    /**
     * Given a pointer 'ptr' to a null-terminated UTF8-encoded string in the
     * emscripten HEAP, returns a copy of that string as a Javascript String object.
     *
     * @param {number} ptr
     * @param {number=} maxBytesToRead - An optional length that specifies the
     *   maximum number of bytes to read. You can omit this parameter to scan the
     *   string until the first 0 byte. If maxBytesToRead is passed, and the string
     *   at [ptr, ptr+maxBytesToReadr[ contains a null byte in the middle, then the
     *   string will cut short at that byte index (i.e. maxBytesToRead will not
     *   produce a string of exact length [ptr, ptr+maxBytesToRead[) N.B. mixing
     *   frequent uses of UTF8ToString() with and without maxBytesToRead may throw
     *   JS JIT optimizations off, so it is worth to consider consistently using one
     * @return {string}
     */
    function UTF8ToString(ptr: number, maxBytesToRead?: number): string;
    function lengthBytesUTF8(str: string): number;
    function stringToUTF16(str: string, outPtr: number, maxBytesToWrite: number): number;
    /**
     * @param {string=} libName
     * @param {Object=} localScope
     * @param {number=} handle
     */
    function loadWebAssemblyModule(
      binary: Uint8Array,
      flags: {
        allowUndefined?: boolean,
        loadAsync?: boolean,
        global?: boolean,
        nodelete?: boolean;
      },
      libName?: string,
      localScope?: Record<string, any>,
      handle?: number
    ): Promise<Record<string, () => number>>;
    /**
     * @param {number} ptr
     * @param {string} type
     */
    function getValue(ptr: number, type?: string): number;
    /**
     * @param {number} ptr
     * @param {number} value
     * @param {string} type
     */
    function setValue(ptr: number, value: number, type?: string): void;
    let currentParseCallback: ((index: number, position: {row: number, column: number}) => string | undefined) | null;
    let currentLogCallback: ((message: string, isLex: boolean) => void) | null;
    let currentProgressCallback: ((state: {currentOffset: number}) => void) | null;
    let currentQueryProgressCallback: ((state: {currentOffset: number}) => void) | null;
    let HEAPF32: Float32Array;
    let HEAPF64: Float64Array;
    let HEAP_DATA_VIEW: DataView;
    let HEAP8: Int8Array
    let HEAPU8: Uint8Array;
    let HEAP16: Int16Array;
    let HEAPU16: Uint16Array;
    let HEAP32: Int32Array;
    let HEAPU32: Uint32Array;
    let HEAP64: BigInt64Array;
    let HEAPU64: BigUint64Array;
}
interface WasmModule {
  _malloc(_0: number): number;
  _calloc(_0: number, _1: number): number;
  _realloc(_0: number, _1: number): number;
  _free(_0: number): void;
  _ts_language_symbol_count(_0: number): number;
  _ts_language_state_count(_0: number): number;
  _ts_language_version(_0: number): number;
  _ts_language_abi_version(_0: number): number;
  _ts_language_metadata(_0: number): number;
  _ts_language_name(_0: number): number;
  _ts_language_field_count(_0: number): number;
  _ts_language_next_state(_0: number, _1: number, _2: number): number;
  _ts_language_symbol_name(_0: number, _1: number): number;
  _ts_language_symbol_for_name(_0: number, _1: number, _2: number, _3: number): number;
  _strncmp(_0: number, _1: number, _2: number): number;
  _ts_language_symbol_type(_0: number, _1: number): number;
  _ts_language_field_name_for_id(_0: number, _1: number): number;
  _ts_lookahead_iterator_new(_0: number, _1: number): number;
  _ts_lookahead_iterator_delete(_0: number): void;
  _ts_lookahead_iterator_reset_state(_0: number, _1: number): number;
  _ts_lookahead_iterator_reset(_0: number, _1: number, _2: number): number;
  _ts_lookahead_iterator_next(_0: number): number;
  _ts_lookahead_iterator_current_symbol(_0: number): number;
  _memset(_0: number, _1: number, _2: number): number;
  _memcpy(_0: number, _1: number, _2: number): number;
  _ts_parser_delete(_0: number): void;
  _ts_parser_reset(_0: number): void;
  _ts_parser_set_language(_0: number, _1: number): number;
  _ts_parser_timeout_micros(_0: number): number;
  _ts_parser_set_timeout_micros(_0: number, _1: number, _2: number): void;
  _ts_parser_set_included_ranges(_0: number, _1: number, _2: number): number;
  _memmove(_0: number, _1: number, _2: number): number;
  _memcmp(_0: number, _1: number, _2: number): number;
  _ts_query_new(_0: number, _1: number, _2: number, _3: number, _4: number): number;
  _ts_query_delete(_0: number): void;
  _iswspace(_0: number): number;
  _iswalnum(_0: number): number;
  _ts_query_pattern_count(_0: number): number;
  _ts_query_capture_count(_0: number): number;
  _ts_query_string_count(_0: number): number;
  _ts_query_capture_name_for_id(_0: number, _1: number, _2: number): number;
  _ts_query_capture_quantifier_for_id(_0: number, _1: number, _2: number): number;
  _ts_query_string_value_for_id(_0: number, _1: number, _2: number): number;
  _ts_query_predicates_for_pattern(_0: number, _1: number, _2: number): number;
  _ts_query_start_byte_for_pattern(_0: number, _1: number): number;
  _ts_query_end_byte_for_pattern(_0: number, _1: number): number;
  _ts_query_is_pattern_rooted(_0: number, _1: number): number;
  _ts_query_is_pattern_non_local(_0: number, _1: number): number;
  _ts_query_is_pattern_guaranteed_at_step(_0: number, _1: number): number;
  _ts_query_disable_capture(_0: number, _1: number, _2: number): void;
  _ts_query_disable_pattern(_0: number, _1: number): void;
  _ts_tree_copy(_0: number): number;
  _ts_tree_delete(_0: number): void;
  _ts_init(): number;
  _ts_parser_new_wasm(): void;
  _ts_parser_enable_logger_wasm(_0: number, _1: number): void;
  _ts_parser_parse_wasm(_0: number, _1: number, _2: number, _3: number, _4: number): number;
  _ts_parser_included_ranges_wasm(_0: number): void;
  _ts_language_type_is_named_wasm(_0: number, _1: number): number;
  _ts_language_type_is_visible_wasm(_0: number, _1: number): number;
  _ts_language_supertypes_wasm(_0: number): void;
  _ts_language_subtypes_wasm(_0: number, _1: number): void;
  _ts_tree_root_node_wasm(_0: number): void;
  _ts_tree_root_node_with_offset_wasm(_0: number): void;
  _ts_tree_edit_wasm(_0: number): void;
  _ts_tree_included_ranges_wasm(_0: number): void;
  _ts_tree_get_changed_ranges_wasm(_0: number, _1: number): void;
  _ts_tree_cursor_new_wasm(_0: number): void;
  _ts_tree_cursor_copy_wasm(_0: number): void;
  _ts_tree_cursor_delete_wasm(_0: number): void;
  _ts_tree_cursor_reset_wasm(_0: number): void;
  _ts_tree_cursor_reset_to_wasm(_0: number, _1: number): void;
  _ts_tree_cursor_goto_first_child_wasm(_0: number): number;
  _ts_tree_cursor_goto_last_child_wasm(_0: number): number;
  _ts_tree_cursor_goto_first_child_for_index_wasm(_0: number): number;
  _ts_tree_cursor_goto_first_child_for_position_wasm(_0: number): number;
  _ts_tree_cursor_goto_next_sibling_wasm(_0: number): number;
  _ts_tree_cursor_goto_previous_sibling_wasm(_0: number): number;
  _ts_tree_cursor_goto_descendant_wasm(_0: number, _1: number): void;
  _ts_tree_cursor_goto_parent_wasm(_0: number): number;
  _ts_tree_cursor_current_node_type_id_wasm(_0: number): number;
  _ts_tree_cursor_current_node_state_id_wasm(_0: number): number;
  _ts_tree_cursor_current_node_is_named_wasm(_0: number): number;
  _ts_tree_cursor_current_node_is_missing_wasm(_0: number): number;
  _ts_tree_cursor_current_node_id_wasm(_0: number): number;
  _ts_tree_cursor_start_position_wasm(_0: number): void;
  _ts_tree_cursor_end_position_wasm(_0: number): void;
  _ts_tree_cursor_start_index_wasm(_0: number): number;
  _ts_tree_cursor_end_index_wasm(_0: number): number;
  _ts_tree_cursor_current_field_id_wasm(_0: number): number;
  _ts_tree_cursor_current_depth_wasm(_0: number): number;
  _ts_tree_cursor_current_descendant_index_wasm(_0: number): number;
  _ts_tree_cursor_current_node_wasm(_0: number): void;
  _ts_node_symbol_wasm(_0: number): number;
  _ts_node_field_name_for_child_wasm(_0: number, _1: number): number;
  _ts_node_field_name_for_named_child_wasm(_0: number, _1: number): number;
  _ts_node_children_by_field_id_wasm(_0: number, _1: number): void;
  _ts_node_first_child_for_byte_wasm(_0: number): void;
  _ts_node_first_named_child_for_byte_wasm(_0: number): void;
  _ts_node_grammar_symbol_wasm(_0: number): number;
  _ts_node_child_count_wasm(_0: number): number;
  _ts_node_named_child_count_wasm(_0: number): number;
  _ts_node_child_wasm(_0: number, _1: number): void;
  _ts_node_named_child_wasm(_0: number, _1: number): void;
  _ts_node_child_by_field_id_wasm(_0: number, _1: number): void;
  _ts_node_next_sibling_wasm(_0: number): void;
  _ts_node_prev_sibling_wasm(_0: number): void;
  _ts_node_next_named_sibling_wasm(_0: number): void;
  _ts_node_prev_named_sibling_wasm(_0: number): void;
  _ts_node_descendant_count_wasm(_0: number): number;
  _ts_node_parent_wasm(_0: number): void;
  _ts_node_child_with_descendant_wasm(_0: number): void;
  _ts_node_descendant_for_index_wasm(_0: number): void;
  _ts_node_named_descendant_for_index_wasm(_0: number): void;
  _ts_node_descendant_for_position_wasm(_0: number): void;
  _ts_node_named_descendant_for_position_wasm(_0: number): void;
  _ts_node_start_point_wasm(_0: number): void;
  _ts_node_end_point_wasm(_0: number): void;
  _ts_node_start_index_wasm(_0: number): number;
  _ts_node_end_index_wasm(_0: number): number;
  _ts_node_to_string_wasm(_0: number): number;
  _ts_node_children_wasm(_0: number): void;
  _ts_node_named_children_wasm(_0: number): void;
  _ts_node_descendants_of_type_wasm(_0: number, _1: number, _2: number, _3: number, _4: number, _5: number, _6: number): void;
  _ts_node_is_named_wasm(_0: number): number;
  _ts_node_has_changes_wasm(_0: number): number;
  _ts_node_has_error_wasm(_0: number): number;
  _ts_node_is_error_wasm(_0: number): number;
  _ts_node_is_missing_wasm(_0: number): number;
  _ts_node_is_extra_wasm(_0: number): number;
  _ts_node_parse_state_wasm(_0: number): number;
  _ts_node_next_parse_state_wasm(_0: number): number;
  _ts_query_matches_wasm(_0: number, _1: number, _2: number, _3: number, _4: number, _5: number, _6: number, _7: number, _8: number, _9: number, _10: number): void;
  _ts_query_captures_wasm(_0: number, _1: number, _2: number, _3: number, _4: number, _5: number, _6: number, _7: number, _8: number, _9: number, _10: number): void;
  _iswalpha(_0: number): number;
  _iswblank(_0: number): number;
  _iswdigit(_0: number): number;
  _iswlower(_0: number): number;
  _iswupper(_0: number): number;
  _iswxdigit(_0: number): number;
  _memchr(_0: number, _1: number, _2: number): number;
  _strlen(_0: number): number;
  _strcmp(_0: number, _1: number): number;
  _strncat(_0: number, _1: number, _2: number): number;
  _strncpy(_0: number, _1: number, _2: number): number;
  _towlower(_0: number): number;
  _towupper(_0: number): number;
  _orig$ts_parser_timeout_micros(_0: number): bigint;
  _orig$ts_parser_set_timeout_micros(_0: number, _1: bigint): void;
}

export type MainModule = WasmModule & typeof RuntimeExports;
export default function MainModuleFactory(options?: Partial<EmscriptenModule>): Promise<MainModule>;



================================================
FILE: lib/binding_web/script/build.js
================================================
import esbuild from 'esbuild';
import fs from 'fs/promises';
import path from 'path';

const format = process.env.CJS ? 'cjs' : 'esm';
const debug = process.argv.includes('--debug');
const outfile = `${debug ? 'debug/' : ''}web-tree-sitter.${format === 'esm' ? 'js' : 'cjs'}`;

// Copy source files to lib directory - we'll map the wasm's sourecmap to these files.
async function copySourceFiles() {
  const sourceDir = '../src';
  const files = await fs.readdir(sourceDir);

  for (const file of files) {
    if (file.endsWith('.c') || file.endsWith('.h')) {
      await fs.copyFile(
        path.join(sourceDir, file),
        path.join('lib', file)
      );
    }
  }
}

async function processWasmSourceMap(inputPath, outputPath) {
  const mapContent = await fs.readFile(inputPath, 'utf8');
  const sourceMap = JSON.parse(mapContent);

  // Filter out emscripten files and normalize paths
  sourceMap.sources = sourceMap.sources
    .filter(source => {
      // Keep only tree-sitter source files
      return source.includes('../../src/') || source === 'tree-sitter.c';
    })
    .map(source => {
      if (source.includes('../../src/')) {
        return source.replace('../../src/', debug ? '../lib/' : 'lib/');
      } else if (source === 'tree-sitter.c') {
        return debug ? '../lib/tree-sitter.c' : 'lib/tree-sitter.c';
      } else {
        return source;
      }
    });
  await fs.writeFile(outputPath, JSON.stringify(sourceMap, null, 2));
}


async function build() {
  await esbuild.build({
    entryPoints: ['src/index.ts'],
    bundle: true,
    platform: 'node',
    format,
    outfile,
    sourcemap: true,
    sourcesContent: true,
    keepNames: true,
    external: ['fs/*', 'fs/promises'],
    resolveExtensions: ['.ts', '.js', format === 'esm' ? '.mjs' : '.cjs'],
  });

  // Copy the WASM files to the appropriate spot, as esbuild doesn't "bundle" WASM files
  const outputWasmName = `${debug ? 'debug/' : ''}web-tree-sitter.wasm`;
  await fs.copyFile('lib/web-tree-sitter.wasm', outputWasmName);

  await copySourceFiles();
  await processWasmSourceMap('lib/web-tree-sitter.wasm.map', `${outputWasmName}.map`);
}

build().catch(console.error);



================================================
FILE: lib/binding_web/script/check-artifacts-fresh.ts
================================================
import fs from 'fs';
import path from 'path';
import { fileURLToPath } from 'node:url';

const scriptDir = path.dirname(fileURLToPath(import.meta.url));

const inputFiles = [
  '../lib/tree-sitter.c',
  '../src/constants.ts',
  '../src/index.ts',
  '../src/language.ts',
  '../src/lookahead_iterator.ts',
  '../src/marshal.ts',
  '../src/node.ts',
  '../src/parser.ts',
  '../src/query.ts',
  '../src/tree.ts',
  '../src/tree_cursor.ts',
  '../lib/exports.txt',
  '../lib/imports.js',
  '../lib/prefix.js',
  ...listFiles('../../include/tree_sitter'),
  ...listFiles('../../src'),
];

const outputFiles = ['../web-tree-sitter.js', '../web-tree-sitter.wasm'];
const outputMtime = Math.min(...outputFiles.map(getMtime));

for (const inputFile of inputFiles) {
  if (getMtime(inputFile) > outputMtime) {
    console.log(`File '${inputFile}' has changed. Re-run 'npm run build:wasm'.`);
    process.exit(1);
  }
}

function listFiles(dir: string): string[] {
  return fs
    .readdirSync(path.resolve(scriptDir, dir))
    .filter(p => !p.startsWith('.'))
    .map(p => path.join(dir, p));
}

function getMtime(p: string): number {
  return fs.statSync(path.resolve(scriptDir, p)).mtime.getTime();
}



================================================
FILE: lib/binding_web/script/generate-dts.js
================================================
import { createBundle } from 'dts-buddy';

for (let ext of ['ts', 'cts']) {
  await createBundle({
    project: 'tsconfig.json',
    output: `web-tree-sitter.d.${ext}`,
    modules: {
      'web-tree-sitter': 'src/index.ts'
    },
    compilerOptions: {
      stripInternal: true,
    },
  });
}



================================================
FILE: lib/binding_web/src/bindings.ts
================================================
import createModule, { type MainModule } from '../lib/web-tree-sitter';
// eslint-disable-next-line @typescript-eslint/no-unused-vars
import { type Parser } from './parser';

export let Module: MainModule | null = null;

/**
 * @internal
 *
 * Initialize the Tree-sitter WASM module. This should only be called by the {@link Parser} class via {@link Parser.init}.
 */
export async function initializeBinding(moduleOptions?: Partial<EmscriptenModule>): Promise<MainModule> {
  if (!Module) {
    Module = await createModule(moduleOptions);
  }
  return Module;
}

/**
 * @internal
 *
 * Checks if the Tree-sitter WASM module has been initialized.
 */
export function checkModule(): boolean {
  return !!Module;
}



================================================
FILE: lib/binding_web/src/constants.ts
================================================
import { type MainModule } from '../lib/web-tree-sitter';
// eslint-disable-next-line @typescript-eslint/no-unused-vars
import { ParseState, type Parser } from './parser';

/**
 * A position in a multi-line text document, in terms of rows and columns.
 *
 * Rows and columns are zero-based.
 */
export interface Point {
  /** The zero-based row number. */
  row: number;

  /** The zero-based column number. */
  column: number;
}

/**
 *  A range of positions in a multi-line text document, both in terms of bytes
 *  and of rows and columns.
 */
export interface Range {
  /** The start position of the range. */
  startPosition: Point;

  /** The end position of the range. */
  endPosition: Point;

  /** The start index of the range. */
  startIndex: number;

  /** The end index of the range. */
  endIndex: number;
}

/**
 * A summary of a change to a text document.
 */
export interface Edit {
  /** The start position of the change. */
  startPosition: Point;

  /** The end position of the change before the edit. */
  oldEndPosition: Point;

  /** The end position of the change after the edit. */
  newEndPosition: Point;

  /** The start index of the change. */
  startIndex: number;

  /** The end index of the change before the edit. */
  oldEndIndex: number;

  /** The end index of the change after the edit. */
  newEndIndex: number;
}

/** @internal */
export const SIZE_OF_SHORT = 2;

/** @internal */
export const SIZE_OF_INT = 4;

/** @internal */
export const SIZE_OF_CURSOR = 4 * SIZE_OF_INT;

/** @internal */
export const SIZE_OF_NODE = 5 * SIZE_OF_INT;

/** @internal */
export const SIZE_OF_POINT = 2 * SIZE_OF_INT;

/** @internal */
export const SIZE_OF_RANGE = 2 * SIZE_OF_INT + 2 * SIZE_OF_POINT;

/** @internal */
export const ZERO_POINT: Point = { row: 0, column: 0 };

/**
 * A callback for parsing that takes an index and point, and should return a string.
 */
export type ParseCallback = (index: number, position: Point) => string | undefined;

/**
 * A callback that receives the parse state during parsing.
 */
export type ProgressCallback = (progress: ParseState) => boolean;

/**
 * A callback for logging messages.
 *
 * If `isLex` is `true`, the message is from the lexer, otherwise it's from the parser.
 */
export type LogCallback = (message: string, isLex: boolean) => void;

// Helper type for internal use
/** @internal */
export const INTERNAL = Symbol('INTERNAL');
/** @internal */
export type Internal = typeof INTERNAL;

// Helper functions for type checking
/** @internal */
export function assertInternal(x: unknown): asserts x is Internal {
  if (x !== INTERNAL) throw new Error('Illegal constructor');
}

/** @internal */
export function isPoint(point?: Point): point is Point {
  return (
    !!point &&
    typeof (point).row === 'number' &&
    typeof (point).column === 'number'
  );
}

/**
 * @internal
 *
 * Sets the Tree-sitter WASM module. This should only be called by the {@link Parser} class via {@link Parser.init}.
 */
export function setModule(module: MainModule) {
  C = module;
}

/**
 * @internal
 *
 * `C` is a convenient shorthand for the Tree-sitter WASM module,
 * which allows us to call all of the exported functions.
 */
export let C: MainModule;



================================================
FILE: lib/binding_web/src/index.ts
================================================
export {
  Point,
  Range,
  Edit,
  ParseCallback,
  ProgressCallback,
  LogCallback,
} from './constants';
export {
  ParseOptions,
  ParseState,
  LANGUAGE_VERSION,
  MIN_COMPATIBLE_VERSION,
  Parser,
} from './parser';
export { Language } from './language';
export { Tree } from './tree';
export { Node } from './node';
export { TreeCursor } from './tree_cursor';
export {
  QueryOptions,
  QueryState,
  QueryProperties,
  QueryPredicate,
  QueryCapture,
  QueryMatch,
  CaptureQuantifier,
  PredicateStep,
  Query,
}  from './query';
export { LookaheadIterator } from './lookahead_iterator';



================================================
FILE: lib/binding_web/src/language.ts
================================================
import { C, INTERNAL, Internal, assertInternal, SIZE_OF_INT, SIZE_OF_SHORT } from './constants';
import { LookaheadIterator } from './lookahead_iterator';
import { unmarshalLanguageMetadata } from './marshal';
import { TRANSFER_BUFFER } from './parser';
import { Query } from './query';

const LANGUAGE_FUNCTION_REGEX = /^tree_sitter_\w+$/;

export class LanguageMetadata {
  readonly major_version: number;
  readonly minor_version: number;
  readonly patch_version: number;
}

/**
 * An opaque object that defines how to parse a particular language.
 * The code for each `Language` is generated by the Tree-sitter CLI.
 */
export class Language {
  /** @internal */
  private [0] = 0; // Internal handle for WASM

  /**
   * A list of all node types in the language. The index of each type in this
   * array is its node type id.
   */
  types: string[];

  /**
   * A list of all field names in the language. The index of each field name in
   * this array is its field id.
   */
  fields: (string | null)[];

  /** @internal */
  constructor(internal: Internal, address: number) {
    assertInternal(internal);
    this[0] = address;
    this.types = new Array<string>(C._ts_language_symbol_count(this[0]));
    for (let i = 0, n = this.types.length; i < n; i++) {
      if (C._ts_language_symbol_type(this[0], i) < 2) {
        this.types[i] = C.UTF8ToString(C._ts_language_symbol_name(this[0], i));
      }
    }
    this.fields = new Array<string>(C._ts_language_field_count(this[0]) + 1);
    for (let i = 0, n = this.fields.length; i < n; i++) {
      const fieldName = C._ts_language_field_name_for_id(this[0], i);
      if (fieldName !== 0) {
        this.fields[i] = C.UTF8ToString(fieldName);
      } else {
        this.fields[i] = null;
      }
    }
  }


  /**
   * Gets the name of the language.
   */
  get name(): string | null {
    const ptr = C._ts_language_name(this[0]);
    if (ptr === 0) return null;
    return C.UTF8ToString(ptr);
  }

  /**
   * @deprecated since version 0.25.0, use {@link Language#abiVersion} instead
   * Gets the version of the language.
   */
  get version(): number {
    return C._ts_language_version(this[0]);
  }

  /**
   * Gets the ABI version of the language.
   */
  get abiVersion(): number {
    return C._ts_language_abi_version(this[0]);
  }

  /**
  * Get the metadata for this language. This information is generated by the
  * CLI, and relies on the language author providing the correct metadata in
  * the language's `tree-sitter.json` file.
  */
  get metadata(): LanguageMetadata | null {
    C._ts_language_metadata(this[0]);
    const length = C.getValue(TRANSFER_BUFFER, 'i32');
    const address = C.getValue(TRANSFER_BUFFER + SIZE_OF_INT, 'i32');
    if (length === 0) return null;
    return unmarshalLanguageMetadata(address);
  }

  /**
   * Gets the number of fields in the language.
   */
  get fieldCount(): number {
    return this.fields.length - 1;
  }

  /**
   * Gets the number of states in the language.
   */
  get stateCount(): number {
    return C._ts_language_state_count(this[0]);
  }

  /**
   * Get the field id for a field name.
   */
  fieldIdForName(fieldName: string): number | null {
    const result = this.fields.indexOf(fieldName);
    return result !== -1 ? result : null;
  }

  /**
   * Get the field name for a field id.
   */
  fieldNameForId(fieldId: number): string | null {
    return this.fields[fieldId] ?? null;
  }

  /**
   * Get the node type id for a node type name.
   */
  idForNodeType(type: string, named: boolean): number | null {
    const typeLength = C.lengthBytesUTF8(type);
    const typeAddress = C._malloc(typeLength + 1);
    C.stringToUTF8(type, typeAddress, typeLength + 1);
    const result = C._ts_language_symbol_for_name(this[0], typeAddress, typeLength, named ? 1 : 0);
    C._free(typeAddress);
    return result || null;
  }

  /**
   * Gets the number of node types in the language.
   */
  get nodeTypeCount(): number {
    return C._ts_language_symbol_count(this[0]);
  }

  /**
   * Get the node type name for a node type id.
   */
  nodeTypeForId(typeId: number): string | null {
    const name = C._ts_language_symbol_name(this[0], typeId);
    return name ? C.UTF8ToString(name) : null;
  }

  /**
   * Check if a node type is named.
   *
   * @see {@link https://tree-sitter.github.io/tree-sitter/using-parsers/2-basic-parsing.html#named-vs-anonymous-nodes}
   */
  nodeTypeIsNamed(typeId: number): boolean {
    return C._ts_language_type_is_named_wasm(this[0], typeId) ? true : false;
  }

  /**
   * Check if a node type is visible.
   */
  nodeTypeIsVisible(typeId: number): boolean {
    return C._ts_language_type_is_visible_wasm(this[0], typeId) ? true : false;
  }

  /**
   * Get the supertypes ids of this language.
   *
   * @see {@link https://tree-sitter.github.io/tree-sitter/using-parsers/6-static-node-types.html?highlight=supertype#supertype-nodes}
   */
  get supertypes(): number[] {
    C._ts_language_supertypes_wasm(this[0]);
    const count = C.getValue(TRANSFER_BUFFER, 'i32');
    const buffer = C.getValue(TRANSFER_BUFFER + SIZE_OF_INT, 'i32');
    const result = new Array<number>(count);

    if (count > 0) {
      let address = buffer;
      for (let i = 0; i < count; i++) {
        result[i] = C.getValue(address, 'i16');
        address += SIZE_OF_SHORT;
      }
    }

    return result;
  }

  /**
   * Get the subtype ids for a given supertype node id.
   */
  subtypes(supertype: number): number[] {
    C._ts_language_subtypes_wasm(this[0], supertype);
    const count = C.getValue(TRANSFER_BUFFER, 'i32');
    const buffer = C.getValue(TRANSFER_BUFFER + SIZE_OF_INT, 'i32');
    const result = new Array<number>(count);

    if (count > 0) {
      let address = buffer;
      for (let i = 0; i < count; i++) {
        result[i] = C.getValue(address, 'i16');
        address += SIZE_OF_SHORT;
      }
    }

    return result;
  }

  /**
   * Get the next state id for a given state id and node type id.
   */
  nextState(stateId: number, typeId: number): number {
    return C._ts_language_next_state(this[0], stateId, typeId);
  }

  /**
   * Create a new lookahead iterator for this language and parse state.
   *
   * This returns `null` if state is invalid for this language.
   *
   * Iterating {@link LookaheadIterator} will yield valid symbols in the given
   * parse state. Newly created lookahead iterators will return the `ERROR`
   * symbol from {@link LookaheadIterator#currentType}.
   *
   * Lookahead iterators can be useful for generating suggestions and improving
   * syntax error diagnostics. To get symbols valid in an `ERROR` node, use the
   * lookahead iterator on its first leaf node state. For `MISSING` nodes, a
   * lookahead iterator created on the previous non-extra leaf node may be
   * appropriate.
   */
  lookaheadIterator(stateId: number): LookaheadIterator | null {
    const address = C._ts_lookahead_iterator_new(this[0], stateId);
    if (address) return new LookaheadIterator(INTERNAL, address, this);
    return null;
  }

  /**
   * @deprecated since version 0.25.0, call `new` on a {@link Query} instead
   *
   * Create a new query from a string containing one or more S-expression
   * patterns.
   *
   * The query is associated with a particular language, and can only be run
   * on syntax nodes parsed with that language. References to Queries can be
   * shared between multiple threads.
   *
   * @link {@see https://tree-sitter.github.io/tree-sitter/using-parsers/queries}
   */
  query(source: string): Query {
    console.warn('Language.query is deprecated. Use new Query(language, source) instead.');
    return new Query(this, source);
  }

  /**
   * Load a language from a WebAssembly module.
   * The module can be provided as a path to a file or as a buffer.
   */
  static async load(input: string | Uint8Array): Promise<Language> {
    let bytes: Promise<Uint8Array>;
    if (input instanceof Uint8Array) {
      bytes = Promise.resolve(input);
    } else {
      // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
      if (globalThis.process?.versions.node) {
        const fs: typeof import('fs/promises') = await import('fs/promises');
        bytes = fs.readFile(input);
      } else {
        bytes = fetch(input)
          .then((response) => response.arrayBuffer()
            .then((buffer) => {
              if (response.ok) {
                return new Uint8Array(buffer);
              } else {
                const body = new TextDecoder('utf-8').decode(buffer);
                throw new Error(`Language.load failed with status ${response.status}.\n\n${body}`);
              }
            }));
      }
    }

    const mod = await C.loadWebAssemblyModule(await bytes, { loadAsync: true });
    const symbolNames = Object.keys(mod);
    const functionName = symbolNames.find((key) => LANGUAGE_FUNCTION_REGEX.test(key) &&
      !key.includes('external_scanner_'));
    if (!functionName) {
        console.log(`Couldn't find language function in WASM file. Symbols:\n${JSON.stringify(symbolNames, null, 2)}`);
        throw new Error('Language.load failed: no language function found in WASM file');
    }
    const languageAddress = mod[functionName]();
    return new Language(INTERNAL, languageAddress);
  }
}



================================================
FILE: lib/binding_web/src/lookahead_iterator.ts
================================================
import { C, Internal, assertInternal } from './constants';
import { Language } from './language';

export class LookaheadIterator implements Iterable<string> {
  /** @internal */
  private [0] = 0; // Internal handle for WASM

  /** @internal */
  private language: Language;

  /** @internal */
  constructor(internal: Internal, address: number, language: Language) {
    assertInternal(internal);
    this[0] = address;
    this.language = language;
  }

  /** Get the current symbol of the lookahead iterator. */
  get currentTypeId(): number {
    return C._ts_lookahead_iterator_current_symbol(this[0]);
  }

  /** Get the current symbol name of the lookahead iterator. */
  get currentType(): string {
    return this.language.types[this.currentTypeId] || 'ERROR';
  }

  /** Delete the lookahead iterator, freeing its resources. */
  delete(): void {
    C._ts_lookahead_iterator_delete(this[0]);
    this[0] = 0;
  }


  /**
   * Reset the lookahead iterator.
   *
   * This returns `true` if the language was set successfully and `false`
   * otherwise.
   */
  reset(language: Language, stateId: number): boolean {
    if (C._ts_lookahead_iterator_reset(this[0], language[0], stateId)) {
      this.language = language;
      return true;
    }
    return false;
  }

  /**
   * Reset the lookahead iterator to another state.
   *
   * This returns `true` if the iterator was reset to the given state and
   * `false` otherwise.
   */
  resetState(stateId: number): boolean {
    return Boolean(C._ts_lookahead_iterator_reset_state(this[0], stateId));
  }

  /**
   * Returns an iterator that iterates over the symbols of the lookahead iterator.
   *
   * The iterator will yield the current symbol name as a string for each step
   * until there are no more symbols to iterate over.
   */
  [Symbol.iterator](): Iterator<string> {
    return {
      next: (): IteratorResult<string> => {
        if (C._ts_lookahead_iterator_next(this[0])) {
          return { done: false, value: this.currentType };
        }
        return { done: true, value: '' };
      }
    };
  }
}



================================================
FILE: lib/binding_web/src/marshal.ts
================================================
import { Edit, INTERNAL, Point, Range, SIZE_OF_INT, SIZE_OF_NODE, SIZE_OF_POINT, C } from "./constants";
import { Node } from "./node";
import { Tree } from "./tree";
// eslint-disable-next-line @typescript-eslint/no-unused-vars
import { Query, QueryCapture, type QueryMatch } from "./query";
import { TreeCursor } from "./tree_cursor";
import { TRANSFER_BUFFER } from "./parser";
import { LanguageMetadata } from "./language";

/**
 * @internal
 *
 * Unmarshals a {@link QueryMatch} to the transfer buffer.
 */
export function unmarshalCaptures(
  query: Query,
  tree: Tree,
  address: number,
  patternIndex: number,
  result: QueryCapture[]
) {
  for (let i = 0, n = result.length; i < n; i++) {
    const captureIndex = C.getValue(address, 'i32');
    address += SIZE_OF_INT;
    const node = unmarshalNode(tree, address)!;
    address += SIZE_OF_NODE;
    result[i] = {patternIndex, name: query.captureNames[captureIndex], node};
  }
  return address;
}

/**
 * @internal
 *
 * Marshals a {@link Node} to the transfer buffer.
 */
export function marshalNode(node: Node, index = 0) {
  let address = TRANSFER_BUFFER + index * SIZE_OF_NODE;
  C.setValue(address, node.id, 'i32');
  address += SIZE_OF_INT;
  C.setValue(address, node.startIndex, 'i32');
  address += SIZE_OF_INT;
  C.setValue(address, node.startPosition.row, 'i32');
  address += SIZE_OF_INT;
  C.setValue(address, node.startPosition.column, 'i32');
  address += SIZE_OF_INT;
  C.setValue(address, node[0], 'i32');
}

/**
 * @internal
 *
 * Unmarshals a {@link Node} from the transfer buffer.
 */
export function unmarshalNode(tree: Tree, address = TRANSFER_BUFFER): Node | null {
  const id = C.getValue(address, 'i32');
  address += SIZE_OF_INT;
  if (id === 0) return null;

  const index = C.getValue(address, 'i32');
  address += SIZE_OF_INT;
  const row = C.getValue(address, 'i32');
  address += SIZE_OF_INT;
  const column = C.getValue(address, 'i32');
  address += SIZE_OF_INT;
  const other = C.getValue(address, 'i32');

  const result = new Node(INTERNAL, {
    id,
    tree,
    startIndex: index,
    startPosition: {row, column},
    other,
  });

  return result;
}

/**
 * @internal
 *
 * Marshals a {@link TreeCursor} to the transfer buffer.
 */
export function marshalTreeCursor(cursor: TreeCursor, address = TRANSFER_BUFFER) {
  C.setValue(address + 0 * SIZE_OF_INT, cursor[0], 'i32');
  C.setValue(address + 1 * SIZE_OF_INT, cursor[1], 'i32');
  C.setValue(address + 2 * SIZE_OF_INT, cursor[2], 'i32');
  C.setValue(address + 3 * SIZE_OF_INT, cursor[3], 'i32');
}

/**
 * @internal
 *
 * Unmarshals a {@link TreeCursor} from the transfer buffer.
 */
export function unmarshalTreeCursor(cursor: TreeCursor) {
  cursor[0] = C.getValue(TRANSFER_BUFFER + 0 * SIZE_OF_INT, 'i32');
  cursor[1] = C.getValue(TRANSFER_BUFFER + 1 * SIZE_OF_INT, 'i32');
  cursor[2] = C.getValue(TRANSFER_BUFFER + 2 * SIZE_OF_INT, 'i32');
  cursor[3] = C.getValue(TRANSFER_BUFFER + 3 * SIZE_OF_INT, 'i32');
}

/**
 * @internal
 *
 * Marshals a {@link Point} to the transfer buffer.
 */
export function marshalPoint(address: number, point: Point): void {
  C.setValue(address, point.row, 'i32');
  C.setValue(address + SIZE_OF_INT, point.column, 'i32');
}

/**
 * @internal
 *
 * Unmarshals a {@link Point} from the transfer buffer.
 */
export function unmarshalPoint(address: number): Point {
  const result = {
    row: C.getValue(address, 'i32') >>> 0,
    column: C.getValue(address + SIZE_OF_INT, 'i32') >>> 0,
  };
  return result;
}

/**
 * @internal
 *
 * Marshals a {@link Range} to the transfer buffer.
 */
export function marshalRange(address: number, range: Range): void {
  marshalPoint(address, range.startPosition); address += SIZE_OF_POINT;
  marshalPoint(address, range.endPosition); address += SIZE_OF_POINT;
  C.setValue(address, range.startIndex, 'i32'); address += SIZE_OF_INT;
  C.setValue(address, range.endIndex, 'i32'); address += SIZE_OF_INT;
}

/**
 * @internal
 *
 * Unmarshals a {@link Range} from the transfer buffer.
 */
export function unmarshalRange(address: number): Range {
  const result = {} as Range;
  result.startPosition = unmarshalPoint(address); address += SIZE_OF_POINT;
  result.endPosition = unmarshalPoint(address); address += SIZE_OF_POINT;
  result.startIndex = C.getValue(address, 'i32') >>> 0; address += SIZE_OF_INT;
  result.endIndex = C.getValue(address, 'i32') >>> 0;
  return result;
}

/**
 * @internal
 *
 * Marshals an {@link Edit} to the transfer buffer.
 */
export function marshalEdit(edit: Edit, address = TRANSFER_BUFFER) {
  marshalPoint(address, edit.startPosition); address += SIZE_OF_POINT;
  marshalPoint(address, edit.oldEndPosition); address += SIZE_OF_POINT;
  marshalPoint(address, edit.newEndPosition); address += SIZE_OF_POINT;
  C.setValue(address, edit.startIndex, 'i32'); address += SIZE_OF_INT;
  C.setValue(address, edit.oldEndIndex, 'i32'); address += SIZE_OF_INT;
  C.setValue(address, edit.newEndIndex, 'i32'); address += SIZE_OF_INT;
}

/**
 * @internal
 *
 * Unmarshals a {@link LanguageMetadata} from the transfer buffer.
 */
export function unmarshalLanguageMetadata(address: number): LanguageMetadata {
  const result = {} as LanguageMetadata;
  result.major_version = C.getValue(address, 'i32'); address += SIZE_OF_INT;
  result.minor_version = C.getValue(address, 'i32'); address += SIZE_OF_INT;
  result.field_count = C.getValue(address, 'i32');
  return result;
}



================================================
FILE: lib/binding_web/src/node.ts
================================================
import { INTERNAL, Internal, assertInternal, Point, Edit, SIZE_OF_INT, SIZE_OF_NODE, SIZE_OF_POINT, ZERO_POINT, isPoint, C } from './constants';
import { getText, Tree } from './tree';
import { TreeCursor } from './tree_cursor';
// eslint-disable-next-line @typescript-eslint/no-unused-vars
import { Language } from './language';
import { marshalNode, marshalPoint, unmarshalNode, unmarshalPoint } from './marshal';
import { TRANSFER_BUFFER } from './parser';

/** A single node within a syntax {@link Tree}. */
export class Node {
  /** @internal */
  private [0] = 0; // Internal handle for WASM

  /** @internal */
  private _children?: Node[];

  /** @internal */
  private _namedChildren?: Node[];

  /** @internal */
  constructor(
    internal: Internal,
    {
      id,
      tree,
      startIndex,
      startPosition,
      other,
    }: {
      id: number;
      tree: Tree;
      startIndex: number;
      startPosition: Point;
      other: number;
    }
  ) {
    assertInternal(internal);
    this[0] = other;
    this.id = id;
    this.tree = tree;
    this.startIndex = startIndex;
    this.startPosition = startPosition;
  }

  /**
   * The numeric id for this node that is unique.
   *
   * Within a given syntax tree, no two nodes have the same id. However:
   *
   * * If a new tree is created based on an older tree, and a node from the old tree is reused in
   *   the process, then that node will have the same id in both trees.
   *
   * * A node not marked as having changes does not guarantee it was reused.
   *
   * * If a node is marked as having changed in the old tree, it will not be reused.
   */
  id: number;

  /** The byte index where this node starts. */
  startIndex: number;

  /** The position where this node starts. */
  startPosition: Point;

  /** The tree that this node belongs to. */
  tree: Tree;

  /** Get this node's type as a numerical id. */
  get typeId(): number {
    marshalNode(this);
    return C._ts_node_symbol_wasm(this.tree[0]);
  }

  /**
   * Get the node's type as a numerical id as it appears in the grammar,
   * ignoring aliases.
   */
  get grammarId(): number {
    marshalNode(this);
    return C._ts_node_grammar_symbol_wasm(this.tree[0]);
  }

  /** Get this node's type as a string. */
  get type(): string {
    return this.tree.language.types[this.typeId] || 'ERROR';
  }

  /**
   * Get this node's symbol name as it appears in the grammar, ignoring
   * aliases as a string.
   */
  get grammarType(): string {
    return this.tree.language.types[this.grammarId] || 'ERROR';
  }

  /**
   * Check if this node is *named*.
   *
   * Named nodes correspond to named rules in the grammar, whereas
   * *anonymous* nodes correspond to string literals in the grammar.
   */
  get isNamed(): boolean {
    marshalNode(this);
    return C._ts_node_is_named_wasm(this.tree[0]) === 1;
  }

  /**
   * Check if this node is *extra*.
   *
   * Extra nodes represent things like comments, which are not required
   * by the grammar, but can appear anywhere.
   */
  get isExtra(): boolean {
    marshalNode(this);
    return C._ts_node_is_extra_wasm(this.tree[0]) === 1;
  }

  /**
   * Check if this node represents a syntax error.
   *
   * Syntax errors represent parts of the code that could not be incorporated
   * into a valid syntax tree.
   */
  get isError(): boolean {
    marshalNode(this);
    return C._ts_node_is_error_wasm(this.tree[0]) === 1;
  }

  /**
   * Check if this node is *missing*.
   *
   * Missing nodes are inserted by the parser in order to recover from
   * certain kinds of syntax errors.
   */
  get isMissing(): boolean {
    marshalNode(this);
    return C._ts_node_is_missing_wasm(this.tree[0]) === 1;
  }

  /** Check if this node has been edited. */
  get hasChanges(): boolean {
    marshalNode(this);
    return C._ts_node_has_changes_wasm(this.tree[0]) === 1;
  }

  /**
   * Check if this node represents a syntax error or contains any syntax
   * errors anywhere within it.
   */
  get hasError(): boolean {
    marshalNode(this);
    return C._ts_node_has_error_wasm(this.tree[0]) === 1;
  }

  /** Get the byte index where this node ends. */
  get endIndex(): number {
    marshalNode(this);
    return C._ts_node_end_index_wasm(this.tree[0]);
  }

  /** Get the position where this node ends. */
  get endPosition(): Point {
    marshalNode(this);
    C._ts_node_end_point_wasm(this.tree[0]);
    return unmarshalPoint(TRANSFER_BUFFER);
  }

  /** Get the string content of this node. */
  get text(): string {
    return getText(this.tree, this.startIndex, this.endIndex, this.startPosition);
  }

  /** Get this node's parse state. */
  get parseState(): number {
    marshalNode(this);
    return C._ts_node_parse_state_wasm(this.tree[0]);
  }

  /** Get the parse state after this node. */
  get nextParseState(): number {
    marshalNode(this);
    return C._ts_node_next_parse_state_wasm(this.tree[0]);
  }

  /** Check if this node is equal to another node. */
  equals(other: Node): boolean {
    return this.tree === other.tree && this.id === other.id;
  }

  /**
   * Get the node's child at the given index, where zero represents the first child.
   *
   * This method is fairly fast, but its cost is technically log(n), so if
   * you might be iterating over a long list of children, you should use
   * {@link Node#children} instead.
   */
  child(index: number): Node | null {
    marshalNode(this);
    C._ts_node_child_wasm(this.tree[0], index);
    return unmarshalNode(this.tree);
  }

  /**
   * Get this node's *named* child at the given index.
   *
   * See also {@link Node#isNamed}.
   * This method is fairly fast, but its cost is technically log(n), so if
   * you might be iterating over a long list of children, you should use
   * {@link Node#namedChildren} instead.
   */
  namedChild(index: number): Node | null {
    marshalNode(this);
    C._ts_node_named_child_wasm(this.tree[0], index);
    return unmarshalNode(this.tree);
  }

  /**
   * Get this node's child with the given numerical field id.
   *
   * See also {@link Node#childForFieldName}. You can
   * convert a field name to an id using {@link Language#fieldIdForName}.
   */
  childForFieldId(fieldId: number): Node | null {
    marshalNode(this);
    C._ts_node_child_by_field_id_wasm(this.tree[0], fieldId);
    return unmarshalNode(this.tree);
  }

  /**
   * Get the first child with the given field name.
   *
   * If multiple children may have the same field name, access them using
   * {@link Node#childrenForFieldName}.
   */
  childForFieldName(fieldName: string): Node | null {
    const fieldId = this.tree.language.fields.indexOf(fieldName);
    if (fieldId !== -1) return this.childForFieldId(fieldId);
    return null;
  }

  /** Get the field name of this node's child at the given index. */
  fieldNameForChild(index: number): string | null {
    marshalNode(this);
    const address = C._ts_node_field_name_for_child_wasm(this.tree[0], index);
    if (!address) return null;
    return C.AsciiToString(address);
  }

  /** Get the field name of this node's named child at the given index. */
  fieldNameForNamedChild(index: number): string | null {
    marshalNode(this);
    const address = C._ts_node_field_name_for_named_child_wasm(this.tree[0], index);
    if (!address) return null;
    return C.AsciiToString(address);
  }
  /**
   * Get an array of this node's children with a given field name.
   *
   * See also {@link Node#children}.
   */
  childrenForFieldName(fieldName: string): Node[] {
    const fieldId = this.tree.language.fields.indexOf(fieldName);
    if (fieldId !== -1 && fieldId !== 0) return this.childrenForFieldId(fieldId);
    return [];
  }

  /**
    * Get an array of this node's children with a given field id.
    *
    * See also {@link Node#childrenForFieldName}.
    */
  childrenForFieldId(fieldId: number): Node[] {
    marshalNode(this);
    C._ts_node_children_by_field_id_wasm(this.tree[0], fieldId);
    const count = C.getValue(TRANSFER_BUFFER, 'i32');
    const buffer = C.getValue(TRANSFER_BUFFER + SIZE_OF_INT, 'i32');
    const result = new Array<Node>(count);

    if (count > 0) {
      let address = buffer;
      for (let i = 0; i < count; i++) {
        result[i] = unmarshalNode(this.tree, address)!;
        address += SIZE_OF_NODE;
      }
      C._free(buffer);
    }
    return result;
  }

  /** Get the node's first child that contains or starts after the given byte offset. */
  firstChildForIndex(index: number): Node | null {
    marshalNode(this);
    const address = TRANSFER_BUFFER + SIZE_OF_NODE;
    C.setValue(address, index, 'i32');
    C._ts_node_first_child_for_byte_wasm(this.tree[0]);
    return unmarshalNode(this.tree);
  }

  /** Get the node's first named child that contains or starts after the given byte offset. */
  firstNamedChildForIndex(index: number): Node | null {
    marshalNode(this);
    const address = TRANSFER_BUFFER + SIZE_OF_NODE;
    C.setValue(address, index, 'i32');
    C._ts_node_first_named_child_for_byte_wasm(this.tree[0]);
    return unmarshalNode(this.tree);
  }

  /** Get this node's number of children. */
  get childCount(): number {
    marshalNode(this);
    return C._ts_node_child_count_wasm(this.tree[0]);
  }


  /**
   * Get this node's number of *named* children.
   *
   * See also {@link Node#isNamed}.
   */
  get namedChildCount(): number {
    marshalNode(this);
    return C._ts_node_named_child_count_wasm(this.tree[0]);
  }

  /** Get this node's first child. */
  get firstChild(): Node | null {
    return this.child(0);
  }

  /**
   * Get this node's first named child.
   *
   * See also {@link Node#isNamed}.
   */
  get firstNamedChild(): Node | null {
    return this.namedChild(0);
  }

  /** Get this node's last child. */
  get lastChild(): Node | null {
    return this.child(this.childCount - 1);
  }

  /**
   * Get this node's last named child.
   *
   * See also {@link Node#isNamed}.
   */
  get lastNamedChild(): Node | null {
    return this.namedChild(this.namedChildCount - 1);
  }

  /**
   * Iterate over this node's children.
   *
   * If you're walking the tree recursively, you may want to use the
   * {@link TreeCursor} APIs directly instead.
   */
  get children(): Node[] {
    if (!this._children) {
      marshalNode(this);
      C._ts_node_children_wasm(this.tree[0]);
      const count = C.getValue(TRANSFER_BUFFER, 'i32');
      const buffer = C.getValue(TRANSFER_BUFFER + SIZE_OF_INT, 'i32');
      this._children = new Array<Node>(count);
      if (count > 0) {
        let address = buffer;
        for (let i = 0; i < count; i++) {
          this._children[i] = unmarshalNode(this.tree, address)!;
          address += SIZE_OF_NODE;
        }
        C._free(buffer);
      }
    }
    return this._children;
  }

  /**
   * Iterate over this node's named children.
   *
   * See also {@link Node#children}.
   */
  get namedChildren(): Node[] {
    if (!this._namedChildren) {
      marshalNode(this);
      C._ts_node_named_children_wasm(this.tree[0]);
      const count = C.getValue(TRANSFER_BUFFER, 'i32');
      const buffer = C.getValue(TRANSFER_BUFFER + SIZE_OF_INT, 'i32');
      this._namedChildren = new Array<Node>(count);
      if (count > 0) {
        let address = buffer;
        for (let i = 0; i < count; i++) {
          this._namedChildren[i] = unmarshalNode(this.tree, address)!;
          address += SIZE_OF_NODE;
        }
        C._free(buffer);
      }
    }
    return this._namedChildren;
  }

  /**
   * Get the descendants of this node that are the given type, or in the given types array.
   *
   * The types array should contain node type strings, which can be retrieved from {@link Language#types}.
   *
   * Additionally, a `startPosition` and `endPosition` can be passed in to restrict the search to a byte range.
   */
  descendantsOfType(
    types: string | string[],
    startPosition: Point = ZERO_POINT,
    endPosition: Point = ZERO_POINT
  ): Node[] {
    if (!Array.isArray(types)) types = [types];

    // Convert the type strings to numeric type symbols
    const symbols: number[] = [];
    const typesBySymbol = this.tree.language.types;
    for (const node_type of types) {
      if (node_type == "ERROR") {
        symbols.push(65535); // Internally, ts_builtin_sym_error is -1, which is UINT_16MAX
      }
    }
    for (let i = 0, n = typesBySymbol.length; i < n; i++) {
      if (types.includes(typesBySymbol[i])) {
        symbols.push(i);
      }
    }

    // Copy the array of symbols to the WASM heap
    const symbolsAddress = C._malloc(SIZE_OF_INT * symbols.length);
    for (let i = 0, n = symbols.length; i < n; i++) {
      C.setValue(symbolsAddress + i * SIZE_OF_INT, symbols[i], 'i32');
    }

    // Call the C API to compute the descendants
    marshalNode(this);
    C._ts_node_descendants_of_type_wasm(
      this.tree[0],
      symbolsAddress,
      symbols.length,
      startPosition.row,
      startPosition.column,
      endPosition.row,
      endPosition.column
    );

    // Instantiate the nodes based on the data returned
    const descendantCount = C.getValue(TRANSFER_BUFFER, 'i32');
    const descendantAddress = C.getValue(TRANSFER_BUFFER + SIZE_OF_INT, 'i32');
    const result = new Array<Node>(descendantCount);
    if (descendantCount > 0) {
      let address = descendantAddress;
      for (let i = 0; i < descendantCount; i++) {
        result[i] = unmarshalNode(this.tree, address)!;
        address += SIZE_OF_NODE;
      }
    }

    // Free the intermediate buffers
    C._free(descendantAddress);
    C._free(symbolsAddress);
    return result;
  }

  /** Get this node's next sibling. */
  get nextSibling(): Node | null {
    marshalNode(this);
    C._ts_node_next_sibling_wasm(this.tree[0]);
    return unmarshalNode(this.tree);
  }

  /** Get this node's previous sibling. */
  get previousSibling(): Node | null {
    marshalNode(this);
    C._ts_node_prev_sibling_wasm(this.tree[0]);
    return unmarshalNode(this.tree);
  }

  /**
   * Get this node's next *named* sibling.
   *
   * See also {@link Node#isNamed}.
   */
  get nextNamedSibling(): Node | null {
    marshalNode(this);
    C._ts_node_next_named_sibling_wasm(this.tree[0]);
    return unmarshalNode(this.tree);
  }

  /**
   * Get this node's previous *named* sibling.
   *
   * See also {@link Node#isNamed}.
   */
  get previousNamedSibling(): Node | null {
    marshalNode(this);
    C._ts_node_prev_named_sibling_wasm(this.tree[0]);
    return unmarshalNode(this.tree);
  }

  /** Get the node's number of descendants, including one for the node itself. */
  get descendantCount(): number {
    marshalNode(this);
    return C._ts_node_descendant_count_wasm(this.tree[0]);
  }

  /**
   * Get this node's immediate parent.
   * Prefer {@link Node#childWithDescendant} for iterating over this node's ancestors.
   */
  get parent(): Node | null {
    marshalNode(this);
    C._ts_node_parent_wasm(this.tree[0]);
    return unmarshalNode(this.tree);
  }

  /**
   * Get the node that contains `descendant`.
   *
   * Note that this can return `descendant` itself.
   */
  childWithDescendant(descendant: Node): Node | null {
    marshalNode(this);
    marshalNode(descendant, 1);
    C._ts_node_child_with_descendant_wasm(this.tree[0]);
    return unmarshalNode(this.tree);
  }

  /** Get the smallest node within this node that spans the given byte range. */
  descendantForIndex(start: number, end: number = start): Node | null {
    if (typeof start !== 'number' || typeof end !== 'number') {
      throw new Error('Arguments must be numbers');
    }

    marshalNode(this);
    const address = TRANSFER_BUFFER + SIZE_OF_NODE;
    C.setValue(address, start, 'i32');
    C.setValue(address + SIZE_OF_INT, end, 'i32');
    C._ts_node_descendant_for_index_wasm(this.tree[0]);
    return unmarshalNode(this.tree);
  }

  /** Get the smallest named node within this node that spans the given byte range. */
  namedDescendantForIndex(start: number, end: number = start): Node | null {
    if (typeof start !== 'number' || typeof end !== 'number') {
      throw new Error('Arguments must be numbers');
    }

    marshalNode(this);
    const address = TRANSFER_BUFFER + SIZE_OF_NODE;
    C.setValue(address, start, 'i32');
    C.setValue(address + SIZE_OF_INT, end, 'i32');
    C._ts_node_named_descendant_for_index_wasm(this.tree[0]);
    return unmarshalNode(this.tree);
  }

  /** Get the smallest node within this node that spans the given point range. */
  descendantForPosition(start: Point, end: Point = start) {
    if (!isPoint(start) || !isPoint(end)) {
      throw new Error('Arguments must be {row, column} objects');
    }

    marshalNode(this);
    const address = TRANSFER_BUFFER + SIZE_OF_NODE;
    marshalPoint(address, start);
    marshalPoint(address + SIZE_OF_POINT, end);
    C._ts_node_descendant_for_position_wasm(this.tree[0]);
    return unmarshalNode(this.tree);
  }

  /** Get the smallest named node within this node that spans the given point range. */
  namedDescendantForPosition(start: Point, end: Point = start) {
    if (!isPoint(start) || !isPoint(end)) {
      throw new Error('Arguments must be {row, column} objects');
    }

    marshalNode(this);
    const address = TRANSFER_BUFFER + SIZE_OF_NODE;
    marshalPoint(address, start);
    marshalPoint(address + SIZE_OF_POINT, end);
    C._ts_node_named_descendant_for_position_wasm(this.tree[0]);
    return unmarshalNode(this.tree);
  }

  /**
   * Create a new {@link TreeCursor} starting from this node.
   *
   * Note that the given node is considered the root of the cursor,
   * and the cursor cannot walk outside this node.
   */
  walk(): TreeCursor {
    marshalNode(this);
    C._ts_tree_cursor_new_wasm(this.tree[0]);
    return new TreeCursor(INTERNAL, this.tree);
  }

  /**
   * Edit this node to keep it in-sync with source code that has been edited.
   *
   * This function is only rarely needed. When you edit a syntax tree with
   * the {@link Tree#edit} method, all of the nodes that you retrieve from
   * the tree afterward will already reflect the edit. You only need to
   * use {@link Node#edit} when you have a specific {@link Node} instance that
   * you want to keep and continue to use after an edit.
   */
  edit(edit: Edit) {
    if (this.startIndex >= edit.oldEndIndex) {
      this.startIndex = edit.newEndIndex + (this.startIndex - edit.oldEndIndex);
      let subbedPointRow;
      let subbedPointColumn;
      if (this.startPosition.row > edit.oldEndPosition.row) {
        subbedPointRow = this.startPosition.row - edit.oldEndPosition.row;
        subbedPointColumn = this.startPosition.column;
      } else {
        subbedPointRow = 0;
        subbedPointColumn = this.startPosition.column;
        if (this.startPosition.column >= edit.oldEndPosition.column) {
          subbedPointColumn =
            this.startPosition.column - edit.oldEndPosition.column;
        }
      }

      if (subbedPointRow > 0) {
        this.startPosition.row += subbedPointRow;
        this.startPosition.column = subbedPointColumn;
      } else {
        this.startPosition.column += subbedPointColumn;
      }
    } else if (this.startIndex > edit.startIndex) {
      this.startIndex = edit.newEndIndex;
      this.startPosition.row = edit.newEndPosition.row;
      this.startPosition.column = edit.newEndPosition.column;
    }
  }

  /** Get the S-expression representation of this node. */
  toString() {
    marshalNode(this);
    const address = C._ts_node_to_string_wasm(this.tree[0]);
    const result = C.AsciiToString(address);
    C._free(address);
    return result;
  }
}



================================================
FILE: lib/binding_web/src/parser.ts
================================================
import { C, INTERNAL, LogCallback, ParseCallback, Range, SIZE_OF_INT, SIZE_OF_RANGE, setModule } from './constants';
import { Language } from './language';
import { marshalRange, unmarshalRange } from './marshal';
import { checkModule, initializeBinding } from './bindings';
import { Tree } from './tree';

/**
 * Options for parsing
 *
 * The `includedRanges` property is an array of {@link Range} objects that
 * represent the ranges of text that the parser should include when parsing.
 *
 * The `progressCallback` property is a function that is called periodically
 * during parsing to check whether parsing should be cancelled.
 *
 * See {@link Parser#parse} for more information.
 */
export interface ParseOptions {
  /**
   * An array of {@link Range} objects that
   * represent the ranges of text that the parser should include when parsing.
   *
   * This sets the ranges of text that the parser should include when parsing.
   * By default, the parser will always include entire documents. This
   * function allows you to parse only a *portion* of a document but
   * still return a syntax tree whose ranges match up with the document
   * as a whole. You can also pass multiple disjoint ranges.
   * If `ranges` is empty, then the entire document will be parsed.
   * Otherwise, the given ranges must be ordered from earliest to latest
   * in the document, and they must not overlap. That is, the following
   * must hold for all `i` < `length - 1`:
   * ```text
   *     ranges[i].end_byte <= ranges[i + 1].start_byte
   * ```
   */
  includedRanges?: Range[];

  /**
   * A function that is called periodically during parsing to check
   * whether parsing should be cancelled. If the progress callback returns
   * `true`, then parsing will be cancelled. You can also use this to instrument
   * parsing and check where the parser is at in the document. The progress callback
   * takes a single argument, which is a {@link ParseState} representing the current
   * state of the parser.
   */
  progressCallback?: (state: ParseState) => void;
}

/**
 * A stateful object that is passed into the progress callback {@link ParseOptions#progressCallback}
 * to provide the current state of the parser.
 */
export interface ParseState {
  /** The byte offset in the document that the parser is at. */
  currentOffset: number;

  /** Indicates whether the parser has encountered an error during parsing. */
  hasError: boolean;
}

/**
 * @internal
 *
 * Global variable for transferring data across the FFI boundary
 */
export let TRANSFER_BUFFER: number;

/**
 * The latest ABI version that is supported by the current version of the
 * library.
 *
 * When Languages are generated by the Tree-sitter CLI, they are
 * assigned an ABI version number that corresponds to the current CLI version.
 * The Tree-sitter library is generally backwards-compatible with languages
 * generated using older CLI versions, but is not forwards-compatible.
 */
export let LANGUAGE_VERSION: number;

/**
 * The earliest ABI version that is supported by the current version of the
 * library.
 */
export let MIN_COMPATIBLE_VERSION: number;

/**
 * A stateful object that is used to produce a {@link Tree} based on some
 * source code.
 */
export class Parser {
  /** @internal */
  private [0] = 0; // Internal handle for WASM

  /** @internal */
  private [1] = 0; // Internal handle for WASM

  /** @internal */
  private logCallback: LogCallback | null = null;

  /** The parser's current language. */
  language: Language | null = null;

  /**
   * This must always be called before creating a Parser.
   *
   * You can optionally pass in options to configure the WASM module, the most common
   * one being `locateFile` to help the module find the `.wasm` file.
   */
  static async init(moduleOptions?: Partial<EmscriptenModule>) {
    setModule(await initializeBinding(moduleOptions));
    TRANSFER_BUFFER = C._ts_init();
    LANGUAGE_VERSION = C.getValue(TRANSFER_BUFFER, 'i32');
    MIN_COMPATIBLE_VERSION = C.getValue(TRANSFER_BUFFER + SIZE_OF_INT, 'i32');
  }

  /**
   * Create a new parser.
   */
  constructor() {
    this.initialize();
  }

  /** @internal */
  initialize() {
    if (!checkModule()) {
      throw new Error("cannot construct a Parser before calling `init()`");
    }
    C._ts_parser_new_wasm();
    this[0] = C.getValue(TRANSFER_BUFFER, 'i32');
    this[1] = C.getValue(TRANSFER_BUFFER + SIZE_OF_INT, 'i32');
  }

  /** Delete the parser, freeing its resources. */
  delete() {
    C._ts_parser_delete(this[0]);
    C._free(this[1]);
    this[0] = 0;
    this[1] = 0;
  }

  /**
   * Set the language that the parser should use for parsing.
   *
   * If the language was not successfully assigned, an error will be thrown.
   * This happens if the language was generated with an incompatible
   * version of the Tree-sitter CLI. Check the language's version using
   * {@link Language#version} and compare it to this library's
   * {@link LANGUAGE_VERSION} and {@link MIN_COMPATIBLE_VERSION} constants.
   */
  setLanguage(language: Language | null): this {
    let address: number;
    if (!language) {
      address = 0;
      this.language = null;
    } else if (language.constructor === Language) {
      address = language[0];
      const version = C._ts_language_version(address);
      if (version < MIN_COMPATIBLE_VERSION || LANGUAGE_VERSION < version) {
        throw new Error(
          `Incompatible language version ${version}. ` +
          `Compatibility range ${MIN_COMPATIBLE_VERSION} through ${LANGUAGE_VERSION}.`
        );
      }
      this.language = language;
    } else {
      throw new Error('Argument must be a Language');
    }

    C._ts_parser_set_language(this[0], address);
    return this;
  }

  /**
   * Parse a slice of UTF8 text.
   *
   * @param {string | ParseCallback} callback - The UTF8-encoded text to parse or a callback function.
   *
   * @param {Tree | null} [oldTree] - A previous syntax tree parsed from the same document. If the text of the
   *   document has changed since `oldTree` was created, then you must edit `oldTree` to match
   *   the new text using {@link Tree#edit}.
   *
   * @param {ParseOptions} [options] - Options for parsing the text.
   *  This can be used to set the included ranges, or a progress callback.
   *
   * @returns {Tree | null} A {@link Tree} if parsing succeeded, or `null` if:
   *  - The parser has not yet had a language assigned with {@link Parser#setLanguage}.
   *  - The progress callback returned true.
   */
  parse(
    callback: string | ParseCallback,
    oldTree?: Tree | null,
    options?: ParseOptions,
  ): Tree | null {
    if (typeof callback === 'string') {
      C.currentParseCallback = (index: number) => callback.slice(index);
    } else if (typeof callback === 'function') {
      C.currentParseCallback = callback;
    } else {
      throw new Error('Argument must be a string or a function');
    }

    if (options?.progressCallback) {
      C.currentProgressCallback = options.progressCallback;
    } else {
      C.currentProgressCallback = null;
    }

    if (this.logCallback) {
      C.currentLogCallback = this.logCallback;
      C._ts_parser_enable_logger_wasm(this[0], 1);
    } else {
      C.currentLogCallback = null;
      C._ts_parser_enable_logger_wasm(this[0], 0);
    }

    let rangeCount = 0;
    let rangeAddress = 0;
    if (options?.includedRanges) {
      rangeCount = options.includedRanges.length;
      rangeAddress = C._calloc(rangeCount, SIZE_OF_RANGE);
      let address = rangeAddress;
      for (let i = 0; i < rangeCount; i++) {
        marshalRange(address, options.includedRanges[i]);
        address += SIZE_OF_RANGE;
      }
    }

    const treeAddress = C._ts_parser_parse_wasm(
      this[0],
      this[1],
      oldTree ? oldTree[0] : 0,
      rangeAddress,
      rangeCount
    );

    if (!treeAddress) {
      C.currentParseCallback = null;
      C.currentLogCallback = null;
      C.currentProgressCallback = null;
      return null;
    }

    if (!this.language) {
      throw new Error('Parser must have a language to parse');
    }

    const result = new Tree(INTERNAL, treeAddress, this.language, C.currentParseCallback);
    C.currentParseCallback = null;
    C.currentLogCallback = null;
    C.currentProgressCallback = null;
    return result;
  }

  /**
   * Instruct the parser to start the next parse from the beginning.
   *
   * If the parser previously failed because of a timeout, cancellation,
   * or callback, then by default, it will resume where it left off on the
   * next call to {@link Parser#parse} or other parsing functions.
   * If you don't want to resume, and instead intend to use this parser to
   * parse some other document, you must call `reset` first.
   */
  reset(): void {
    C._ts_parser_reset(this[0]);
  }

  /** Get the ranges of text that the parser will include when parsing. */
  getIncludedRanges(): Range[] {
    C._ts_parser_included_ranges_wasm(this[0]);
    const count = C.getValue(TRANSFER_BUFFER, 'i32');
    const buffer = C.getValue(TRANSFER_BUFFER + SIZE_OF_INT, 'i32');
    const result = new Array<Range>(count);

    if (count > 0) {
      let address = buffer;
      for (let i = 0; i < count; i++) {
        result[i] = unmarshalRange(address);
        address += SIZE_OF_RANGE;
      }
      C._free(buffer);
    }

    return result;
  }

  /**
   * @deprecated since version 0.25.0, prefer passing a progress callback to {@link Parser#parse}
   *
   * Get the duration in microseconds that parsing is allowed to take.
   *
   * This is set via {@link Parser#setTimeoutMicros}.
   */
  getTimeoutMicros(): number {
    return C._ts_parser_timeout_micros(this[0]);
  }

  /**
   * @deprecated since version 0.25.0, prefer passing a progress callback to {@link Parser#parse}
   *
   * Set the maximum duration in microseconds that parsing should be allowed
   * to take before halting.
   *
   * If parsing takes longer than this, it will halt early, returning `null`.
   * See {@link Parser#parse} for more information.
   */
  setTimeoutMicros(timeout: number): void {
    C._ts_parser_set_timeout_micros(this[0], 0, timeout);
  }

  /** Set the logging callback that a parser should use during parsing. */
  setLogger(callback: LogCallback | boolean | null): this {
    if (!callback) {
      this.logCallback = null;
    } else if (typeof callback !== 'function') {
      throw new Error('Logger callback must be a function');
    } else {
      this.logCallback = callback;
    }
    return this;
  }

  /** Get the parser's current logger. */
  getLogger(): LogCallback | null {
    return this.logCallback;
  }
}



================================================
FILE: lib/binding_web/src/query.ts
================================================
import { Point, ZERO_POINT, SIZE_OF_INT, C } from './constants';
import { Node } from './node';
import { marshalNode, unmarshalCaptures } from './marshal';
import { TRANSFER_BUFFER } from './parser';
import { Language } from './language';

const PREDICATE_STEP_TYPE_CAPTURE = 1;

const PREDICATE_STEP_TYPE_STRING = 2;

const QUERY_WORD_REGEX = /[\w-]+/g;

/**
 * Options for query execution
 */
export interface QueryOptions {
  /** The start position of the range to query */
  startPosition?: Point;

  /** The end position of the range to query */
  endPosition?: Point;

  /** The start index of the range to query */
  startIndex?: number;

  /** The end index of the range to query */
  endIndex?: number;

  /**
   * The maximum number of in-progress matches for this query.
   * The limit must be > 0 and <= 65536.
   */
  matchLimit?: number;

  /**
   * The maximum start depth for a query cursor.
   *
   * This prevents cursors from exploring children nodes at a certain depth.
   * Note if a pattern includes many children, then they will still be
   * checked.
   *
   * The zero max start depth value can be used as a special behavior and
   * it helps to destructure a subtree by staying on a node and using
   * captures for interested parts. Note that the zero max start depth
   * only limit a search depth for a pattern's root node but other nodes
   * that are parts of the pattern may be searched at any depth what
   * defined by the pattern structure.
   *
   * Set to `null` to remove the maximum start depth.
   */
  maxStartDepth?: number;

  /**
   * The maximum duration in microseconds that query execution should be allowed to
   * take before halting.
   *
   * If query execution takes longer than this, it will halt early, returning an empty array.
   */
  timeoutMicros?: number;

  /**
   * A function that will be called periodically during the execution of the query to check
   * if query execution should be cancelled. You can also use this to instrument query execution
   * and check where the query is at in the document. The progress callback takes a single argument,
   * which is a {@link QueryState} representing the current state of the query.
   */
  progressCallback?: (state: QueryState) => void;
}

/**
 * A stateful object that is passed into the progress callback {@link QueryOptions#progressCallback}
 * to provide the current state of the query.
 */
export interface QueryState {
  /** The byte offset in the document that the query is at. */
  currentOffset: number;
}

/** A record of key-value pairs associated with a particular pattern in a {@link Query}. */
export type QueryProperties = Record<string, string | null>;

/**
 * A predicate that contains an operator and list of operands.
 */
export interface QueryPredicate {
  /** The operator of the predicate, like `match?`, `eq?`, `set!`, etc. */
  operator: string;

  /** The operands of the predicate, which are either captures or strings. */
  operands: PredicateStep[];
}

/**
 * A particular {@link Node} that has been captured with a particular name within a
 * {@link Query}.
 */
export interface QueryCapture {
  /** The index of the pattern that matched. */
  patternIndex: number;

  /** The name of the capture */
  name: string;

  /** The captured node */
  node: Node;

  /** The properties for predicates declared with the operator `set!`. */
  setProperties?: QueryProperties;

  /** The properties for predicates declared with the operator `is?`. */
  assertedProperties?: QueryProperties;

  /** The properties for predicates declared with the operator `is-not?`. */
  refutedProperties?: QueryProperties;
}

/** A match of a {@link Query} to a particular set of {@link Node}s. */
export interface QueryMatch {
  /** @deprecated since version 0.25.0, use `patternIndex` instead. */
  pattern: number;

  /** The index of the pattern that matched. */
  patternIndex: number;

  /** The captures associated with the match. */
  captures: QueryCapture[];

  /** The properties for predicates declared with the operator `set!`. */
  setProperties?: QueryProperties;

  /** The properties for predicates declared with the operator `is?`. */
  assertedProperties?: QueryProperties;

  /** The properties for predicates declared with the operator `is-not?`. */
  refutedProperties?: QueryProperties;
}

/** A quantifier for captures */
export const CaptureQuantifier = {
  Zero: 0,
  ZeroOrOne: 1,
  ZeroOrMore: 2,
  One: 3,
  OneOrMore: 4
} as const;

/** A quantifier for captures */
export type CaptureQuantifier = typeof CaptureQuantifier[keyof typeof CaptureQuantifier];

/**
 * Predicates are represented as a single array of steps. There are two
 * types of steps, which correspond to the two legal values for
 * the `type` field:
 *
 * - `CapturePredicateStep` - Steps with this type represent names
 *    of captures.
 *
 * - `StringPredicateStep` - Steps with this type represent literal
 *    strings.
 */
export type PredicateStep = CapturePredicateStep | StringPredicateStep;

/**
 * A step in a predicate that refers to a capture.
 *
 * The `name` field is the name of the capture.
 */
export interface CapturePredicateStep { type: 'capture', name: string }

/**
 * A step in a predicate that refers to a string.
 *
 * The `value` field is the string value.
 */
export interface StringPredicateStep { type: 'string', value: string }

const isCaptureStep = (step: PredicateStep): step is Extract<PredicateStep, { type: 'capture' }> =>
  step.type === 'capture';

const isStringStep = (step: PredicateStep): step is Extract<PredicateStep, { type: 'string' }> =>
  step.type === 'string';

/**
 * @internal
 *
 * A function that checks if a given set of captures matches a particular
 * condition. This is used in the built-in `eq?`, `match?`, and `any-of?`
 * predicates.
 */
export type TextPredicate = (captures: QueryCapture[]) => boolean;

/** Error codes returned from tree-sitter query parsing */
export const QueryErrorKind = {
  Syntax: 1,
  NodeName: 2,
  FieldName: 3,
  CaptureName: 4,
  PatternStructure: 5,
} as const;

/** An error that occurred while parsing a query string. */
export type QueryErrorKind = typeof QueryErrorKind[keyof typeof QueryErrorKind];

/** Information about a {@link QueryError}. */
export interface QueryErrorInfo {
  [QueryErrorKind.NodeName]: { word: string };
  [QueryErrorKind.FieldName]: { word: string };
  [QueryErrorKind.CaptureName]: { word: string };
  [QueryErrorKind.PatternStructure]: { suffix: string };
  [QueryErrorKind.Syntax]: { suffix: string };
}

/** Error thrown when parsing a tree-sitter query fails */
export class QueryError extends Error {
  constructor(
    public kind: QueryErrorKind,
    public info: QueryErrorInfo[typeof kind],
    public index: number,
    public length: number
  ) {
    super(QueryError.formatMessage(kind, info));
    this.name = 'QueryError';
  }

  /** Formats an error message based on the error kind and info */
  private static formatMessage(kind: QueryErrorKind, info: QueryErrorInfo[QueryErrorKind]): string {
    switch (kind) {
      case QueryErrorKind.NodeName:
        return `Bad node name '${(info as QueryErrorInfo[2]).word}'`;
      case QueryErrorKind.FieldName:
        return `Bad field name '${(info as QueryErrorInfo[3]).word}'`;
      case QueryErrorKind.CaptureName:
        return `Bad capture name @${(info as QueryErrorInfo[4]).word}`;
      case QueryErrorKind.PatternStructure:
        return `Bad pattern structure at offset ${(info as QueryErrorInfo[5]).suffix}`;
      case QueryErrorKind.Syntax:
        return `Bad syntax at offset ${(info as QueryErrorInfo[1]).suffix}`;
    }
  }
}

/**
 * Parses the `eq?` and `not-eq?` predicates in a query, and updates the text predicates.
 */
function parseAnyPredicate(
  steps: PredicateStep[],
  index: number,
  operator: string,
  textPredicates: TextPredicate[][],
) {
  if (steps.length !== 3) {
    throw new Error(
      `Wrong number of arguments to \`#${operator}\` predicate. Expected 2, got ${steps.length - 1}`
    );
  }

  if (!isCaptureStep(steps[1])) {
    throw new Error(
      `First argument of \`#${operator}\` predicate must be a capture. Got "${steps[1].value}"`
    );
  }

  const isPositive = operator === 'eq?' || operator === 'any-eq?';
  const matchAll = !operator.startsWith('any-');

  if (isCaptureStep(steps[2])) {
    const captureName1 = steps[1].name;
    const captureName2 = steps[2].name;
    textPredicates[index].push((captures) => {
      const nodes1: Node[] = [];
      const nodes2: Node[] = [];
      for (const c of captures) {
        if (c.name === captureName1) nodes1.push(c.node);
        if (c.name === captureName2) nodes2.push(c.node);
      }
      const compare = (n1: { text: string }, n2: { text: string }, positive: boolean) => {
        return positive ? n1.text === n2.text : n1.text !== n2.text;
      };
      return matchAll
        ? nodes1.every((n1) => nodes2.some((n2) => compare(n1, n2, isPositive)))
        : nodes1.some((n1) => nodes2.some((n2) => compare(n1, n2, isPositive)));
    });
  } else {
    const captureName = steps[1].name;
    const stringValue = steps[2].value;
    const matches = (n: Node) => n.text === stringValue;
    const doesNotMatch = (n: Node) => n.text !== stringValue;
    textPredicates[index].push((captures) => {
      const nodes = [];
      for (const c of captures) {
        if (c.name === captureName) nodes.push(c.node);
      }
      const test = isPositive ? matches : doesNotMatch;
      return matchAll ? nodes.every(test) : nodes.some(test);
    });
  }
}

/**
 * Parses the `match?` and `not-match?` predicates in a query, and updates the text predicates.
 */
function parseMatchPredicate(
  steps: PredicateStep[],
  index: number,
  operator: string,
  textPredicates: TextPredicate[][],
) {
  if (steps.length !== 3) {
    throw new Error(
      `Wrong number of arguments to \`#${operator}\` predicate. Expected 2, got ${steps.length - 1}.`,
    );
  }

  if (steps[1].type !== 'capture') {
    throw new Error(
      `First argument of \`#${operator}\` predicate must be a capture. Got "${steps[1].value}".`,
    );
  }

  if (steps[2].type !== 'string') {
    throw new Error(
      `Second argument of \`#${operator}\` predicate must be a string. Got @${steps[2].name}.`,
    );
  }

  const isPositive = operator === 'match?' || operator === 'any-match?';
  const matchAll = !operator.startsWith('any-');
  const captureName = steps[1].name;
  const regex = new RegExp(steps[2].value);
  textPredicates[index].push((captures) => {
    const nodes = [];
    for (const c of captures) {
      if (c.name === captureName) nodes.push(c.node.text);
    }
    const test = (text: string, positive: boolean) => {
      return positive ?
        regex.test(text) :
        !regex.test(text);
    };
    if (nodes.length === 0) return !isPositive;
    return matchAll ?
      nodes.every((text) => test(text, isPositive)) :
      nodes.some((text) => test(text, isPositive));
  });
}

/**
 * Parses the `any-of?` and `not-any-of?` predicates in a query, and updates the text predicates.
 */
function parseAnyOfPredicate(
  steps: PredicateStep[],
  index: number,
  operator: string,
  textPredicates: TextPredicate[][],
) {
  if (steps.length < 2) {
    throw new Error(
      `Wrong number of arguments to \`#${operator}\` predicate. Expected at least 1. Got ${steps.length - 1}.`,
    );
  }

  if (steps[1].type !== 'capture') {
    throw new Error(
      `First argument of \`#${operator}\` predicate must be a capture. Got "${steps[1].value}".`,
    );
  }

  const isPositive = operator === 'any-of?';
  const captureName = steps[1].name;

  const stringSteps = steps.slice(2);
  if (!stringSteps.every(isStringStep)) {
    throw new Error(
      `Arguments to \`#${operator}\` predicate must be strings.".`,
    );
  }
  const values = stringSteps.map((s) => s.value);

  textPredicates[index].push((captures) => {
    const nodes = [];
    for (const c of captures) {
      if (c.name === captureName) nodes.push(c.node.text);
    }
    if (nodes.length === 0) return !isPositive;
    return nodes.every((text) => values.includes(text)) === isPositive;
  });
}

/**
 * Parses the `is?` and `is-not?` predicates in a query, and updates the asserted or refuted properties,
 * depending on if the operator is positive or negative.
 */
function parseIsPredicate(
  steps: PredicateStep[],
  index: number,
  operator: string,
  assertedProperties: QueryProperties[],
  refutedProperties: QueryProperties[],
) {
  if (steps.length < 2 || steps.length > 3) {
    throw new Error(
      `Wrong number of arguments to \`#${operator}\` predicate. Expected 1 or 2. Got ${steps.length - 1}.`,
    );
  }

  if (!steps.every(isStringStep)) {
    throw new Error(
      `Arguments to \`#${operator}\` predicate must be strings.".`,
    );
  }

  const properties = operator === 'is?' ? assertedProperties : refutedProperties;
  if (!properties[index]) properties[index] = {};
  properties[index][steps[1].value] = steps[2]?.value ?? null;
}

/**
 * Parses the `set!` directive in a query, and updates the set properties.
 */
function parseSetDirective(
  steps: PredicateStep[],
  index: number,
  setProperties: QueryProperties[],
) {
  if (steps.length < 2 || steps.length > 3) {
    throw new Error(`Wrong number of arguments to \`#set!\` predicate. Expected 1 or 2. Got ${steps.length - 1}.`);
  }
  if (!steps.every(isStringStep)) {
    throw new Error(`Arguments to \`#set!\` predicate must be strings.".`);
  }
  if (!setProperties[index]) setProperties[index] = {};
  setProperties[index][steps[1].value] = steps[2]?.value ?? null;
}

/**
 * Parses the predicate at a given step in a pattern, and updates the appropriate
 * predicates or properties.
 */
function parsePattern(
  index: number,
  stepType: number,
  stepValueId: number,
  captureNames: string[],
  stringValues: string[],
  steps: PredicateStep[],
  textPredicates: TextPredicate[][],
  predicates: QueryPredicate[][],
  setProperties: QueryProperties[],
  assertedProperties: QueryProperties[],
  refutedProperties: QueryProperties[],
) {
  if (stepType === PREDICATE_STEP_TYPE_CAPTURE) {
    const name = captureNames[stepValueId];
    steps.push({ type: 'capture', name });
  } else if (stepType === PREDICATE_STEP_TYPE_STRING) {
    steps.push({ type: 'string', value: stringValues[stepValueId] });
  } else if (steps.length > 0) {
    if (steps[0].type !== 'string') {
      throw new Error('Predicates must begin with a literal value');
    }

    const operator = steps[0].value;
    switch (operator) {
      case 'any-not-eq?':
      case 'not-eq?':
      case 'any-eq?':
      case 'eq?':
        parseAnyPredicate(steps, index, operator, textPredicates);
        break;

      case 'any-not-match?':
      case 'not-match?':
      case 'any-match?':
      case 'match?':
        parseMatchPredicate(steps, index, operator, textPredicates);
        break;

      case 'not-any-of?':
      case 'any-of?':
        parseAnyOfPredicate(steps, index, operator, textPredicates);
        break;

      case 'is?':
      case 'is-not?':
        parseIsPredicate(steps, index, operator, assertedProperties, refutedProperties);
        break;

      case 'set!':
        parseSetDirective(steps, index, setProperties);
        break;

      default:
        predicates[index].push({ operator, operands: steps.slice(1) });
    }

    steps.length = 0;
  }
}

export class Query {
  /** @internal */
  private [0] = 0; // Internal handle for WASM

  /** @internal */
  private exceededMatchLimit: boolean;

  /** @internal */
  private textPredicates: TextPredicate[][];

  /** The names of the captures used in the query. */
  readonly captureNames: string[];

  /** The quantifiers of the captures used in the query. */
  readonly captureQuantifiers: CaptureQuantifier[][];

  /**
   * The other user-defined predicates associated with the given index.
   *
   * This includes predicates with operators other than:
   * - `match?`
   * - `eq?` and `not-eq?`
   * - `any-of?` and `not-any-of?`
   * - `is?` and `is-not?`
   * - `set!`
   */
  readonly predicates: QueryPredicate[][];

  /** The properties for predicates with the operator `set!`. */
  readonly setProperties: QueryProperties[];

  /** The properties for predicates with the operator `is?`. */
  readonly assertedProperties: QueryProperties[];

  /** The properties for predicates with the operator `is-not?`. */
  readonly refutedProperties: QueryProperties[];

  /** The maximum number of in-progress matches for this cursor. */
  matchLimit?: number;

  /**
   * Create a new query from a string containing one or more S-expression
   * patterns.
   *
   * The query is associated with a particular language, and can only be run
   * on syntax nodes parsed with that language. References to Queries can be
   * shared between multiple threads.
   *
   * @link {@see https://tree-sitter.github.io/tree-sitter/using-parsers/queries}
   */
  constructor(language: Language, source: string) {
    const sourceLength = C.lengthBytesUTF8(source);
    const sourceAddress = C._malloc(sourceLength + 1);
    C.stringToUTF8(source, sourceAddress, sourceLength + 1);
    const address = C._ts_query_new(
      language[0],
      sourceAddress,
      sourceLength,
      TRANSFER_BUFFER,
      TRANSFER_BUFFER + SIZE_OF_INT
    );

    if (!address) {
      const errorId = C.getValue(TRANSFER_BUFFER + SIZE_OF_INT, 'i32') as QueryErrorKind;
      const errorByte = C.getValue(TRANSFER_BUFFER, 'i32');
      const errorIndex = C.UTF8ToString(sourceAddress, errorByte).length;
      const suffix = source.slice(errorIndex, errorIndex + 100).split('\n')[0];
      const word = suffix.match(QUERY_WORD_REGEX)?.[0] ?? '';
      C._free(sourceAddress);

      switch (errorId) {
        case QueryErrorKind.Syntax:
          throw new QueryError(QueryErrorKind.Syntax, { suffix: `${errorIndex}: '${suffix}'...` }, errorIndex, 0);
        case QueryErrorKind.NodeName:
          throw new QueryError(errorId, { word }, errorIndex, word.length);
        case QueryErrorKind.FieldName:
          throw new QueryError(errorId, { word }, errorIndex, word.length);
        case QueryErrorKind.CaptureName:
          throw new QueryError(errorId, { word }, errorIndex, word.length);
        case QueryErrorKind.PatternStructure:
          throw new QueryError(errorId, { suffix: `${errorIndex}: '${suffix}'...` }, errorIndex, 0);
      }
    }

    const stringCount = C._ts_query_string_count(address);
    const captureCount = C._ts_query_capture_count(address);
    const patternCount = C._ts_query_pattern_count(address);
    const captureNames = new Array<string>(captureCount);
    const captureQuantifiers = new Array<CaptureQuantifier[]>(patternCount);
    const stringValues = new Array<string>(stringCount);

    // Fill in the capture names
    for (let i = 0; i < captureCount; i++) {
      const nameAddress = C._ts_query_capture_name_for_id(
        address,
        i,
        TRANSFER_BUFFER
      );
      const nameLength = C.getValue(TRANSFER_BUFFER, 'i32');
      captureNames[i] = C.UTF8ToString(nameAddress, nameLength);
    }

    // Fill in the capture quantifiers
    for (let i = 0; i < patternCount; i++) {
      const captureQuantifiersArray = new Array<CaptureQuantifier>(captureCount);
      for (let j = 0; j < captureCount; j++) {
        const quantifier = C._ts_query_capture_quantifier_for_id(address, i, j);
        captureQuantifiersArray[j] = quantifier as CaptureQuantifier;
      }
      captureQuantifiers[i] = captureQuantifiersArray;
    }

    // Fill in the string values
    for (let i = 0; i < stringCount; i++) {
      const valueAddress = C._ts_query_string_value_for_id(
        address,
        i,
        TRANSFER_BUFFER
      );
      const nameLength = C.getValue(TRANSFER_BUFFER, 'i32');
      stringValues[i] = C.UTF8ToString(valueAddress, nameLength);
    }

    const setProperties = new Array<QueryProperties>(patternCount);
    const assertedProperties = new Array<QueryProperties>(patternCount);
    const refutedProperties = new Array<QueryProperties>(patternCount);
    const predicates = new Array<QueryPredicate[]>(patternCount);
    const textPredicates = new Array<TextPredicate[]>(patternCount);

    // Parse the predicates, and add the appropriate predicates or properties
    for (let i = 0; i < patternCount; i++) {
      const predicatesAddress = C._ts_query_predicates_for_pattern(address, i, TRANSFER_BUFFER);
      const stepCount = C.getValue(TRANSFER_BUFFER, 'i32');

      predicates[i] = [];
      textPredicates[i] = [];

      const steps = new Array<PredicateStep>();

      let stepAddress = predicatesAddress;
      for (let j = 0; j < stepCount; j++) {
        const stepType = C.getValue(stepAddress, 'i32');
        stepAddress += SIZE_OF_INT;

        const stepValueId = C.getValue(stepAddress, 'i32');
        stepAddress += SIZE_OF_INT;

        parsePattern(
          i,
          stepType,
          stepValueId,
          captureNames,
          stringValues,
          steps,
          textPredicates,
          predicates,
          setProperties,
          assertedProperties,
          refutedProperties,
        );
      }

      Object.freeze(textPredicates[i]);
      Object.freeze(predicates[i]);
      Object.freeze(setProperties[i]);
      Object.freeze(assertedProperties[i]);
      Object.freeze(refutedProperties[i]);
    }

    C._free(sourceAddress);


    this[0] = address;
    this.captureNames = captureNames;
    this.captureQuantifiers = captureQuantifiers;
    this.textPredicates = textPredicates;
    this.predicates = predicates;
    this.setProperties = setProperties;
    this.assertedProperties = assertedProperties;
    this.refutedProperties = refutedProperties;
    this.exceededMatchLimit = false;
  }

  /** Delete the query, freeing its resources. */
  delete(): void {
    C._ts_query_delete(this[0]);
    this[0] = 0;
  }

  /**
   * Iterate over all of the matches in the order that they were found.
   *
   * Each match contains the index of the pattern that matched, and a list of
   * captures. Because multiple patterns can match the same set of nodes,
   * one match may contain captures that appear *before* some of the
   * captures from a previous match.
   *
   * @param {Node} node - The node to execute the query on.
   *
   * @param {QueryOptions} options - Options for query execution.
   */
  matches(
    node: Node,
    options: QueryOptions = {}
  ): QueryMatch[] {
    const startPosition = options.startPosition ?? ZERO_POINT;
    const endPosition = options.endPosition ?? ZERO_POINT;
    const startIndex = options.startIndex ?? 0;
    const endIndex = options.endIndex ?? 0;
    const matchLimit = options.matchLimit ?? 0xFFFFFFFF;
    const maxStartDepth = options.maxStartDepth ?? 0xFFFFFFFF;
    const timeoutMicros = options.timeoutMicros ?? 0;
    const progressCallback = options.progressCallback;

    if (typeof matchLimit !== 'number') {
      throw new Error('Arguments must be numbers');
    }
    this.matchLimit = matchLimit;

    if (endIndex !== 0 && startIndex > endIndex) {
      throw new Error('`startIndex` cannot be greater than `endIndex`');
    }

    if (endPosition !== ZERO_POINT && (
      startPosition.row > endPosition.row ||
      (startPosition.row === endPosition.row && startPosition.column > endPosition.column)
    )) {
      throw new Error('`startPosition` cannot be greater than `endPosition`');
    }

    if (progressCallback) {
      C.currentQueryProgressCallback = progressCallback;
    }

    marshalNode(node);

    C._ts_query_matches_wasm(
      this[0],
      node.tree[0],
      startPosition.row,
      startPosition.column,
      endPosition.row,
      endPosition.column,
      startIndex,
      endIndex,
      matchLimit,
      maxStartDepth,
      timeoutMicros,
    );

    const rawCount = C.getValue(TRANSFER_BUFFER, 'i32');
    const startAddress = C.getValue(TRANSFER_BUFFER + SIZE_OF_INT, 'i32');
    const didExceedMatchLimit = C.getValue(TRANSFER_BUFFER + 2 * SIZE_OF_INT, 'i32');
    const result = new Array<QueryMatch>(rawCount);
    this.exceededMatchLimit = Boolean(didExceedMatchLimit);

    let filteredCount = 0;
    let address = startAddress;
    for (let i = 0; i < rawCount; i++) {
      const patternIndex = C.getValue(address, 'i32');
      address += SIZE_OF_INT;
      const captureCount = C.getValue(address, 'i32');
      address += SIZE_OF_INT;

      const captures = new Array<QueryCapture>(captureCount);
      address = unmarshalCaptures(this, node.tree, address, patternIndex, captures);

      if (this.textPredicates[patternIndex].every((p) => p(captures))) {
        result[filteredCount] = { pattern: patternIndex, patternIndex, captures };
        const setProperties = this.setProperties[patternIndex];
        result[filteredCount].setProperties = setProperties;
        const assertedProperties = this.assertedProperties[patternIndex];
        result[filteredCount].assertedProperties = assertedProperties;
        const refutedProperties = this.refutedProperties[patternIndex];
        result[filteredCount].refutedProperties = refutedProperties;
        filteredCount++;
      }
    }
    result.length = filteredCount;

    C._free(startAddress);
    C.currentQueryProgressCallback = null;
    return result;
  }

  /**
   * Iterate over all of the individual captures in the order that they
   * appear.
   *
   * This is useful if you don't care about which pattern matched, and just
   * want a single, ordered sequence of captures.
   *
   * @param {Node} node - The node to execute the query on.
   *
   * @param {QueryOptions} options - Options for query execution.
   */
  captures(
    node: Node,
    options: QueryOptions = {}
  ): QueryCapture[] {
    const startPosition = options.startPosition ?? ZERO_POINT;
    const endPosition = options.endPosition ?? ZERO_POINT;
    const startIndex = options.startIndex ?? 0;
    const endIndex = options.endIndex ?? 0;
    const matchLimit = options.matchLimit ?? 0xFFFFFFFF;
    const maxStartDepth = options.maxStartDepth ?? 0xFFFFFFFF;
    const timeoutMicros = options.timeoutMicros ?? 0;
    const progressCallback = options.progressCallback;

    if (typeof matchLimit !== 'number') {
      throw new Error('Arguments must be numbers');
    }
    this.matchLimit = matchLimit;

    if (endIndex !== 0 && startIndex > endIndex) {
      throw new Error('`startIndex` cannot be greater than `endIndex`');
    }

    if (endPosition !== ZERO_POINT && (
      startPosition.row > endPosition.row ||
      (startPosition.row === endPosition.row && startPosition.column > endPosition.column)
    )) {
      throw new Error('`startPosition` cannot be greater than `endPosition`');
    }

    if (progressCallback) {
      C.currentQueryProgressCallback = progressCallback;
    }

    marshalNode(node);

    C._ts_query_captures_wasm(
      this[0],
      node.tree[0],
      startPosition.row,
      startPosition.column,
      endPosition.row,
      endPosition.column,
      startIndex,
      endIndex,
      matchLimit,
      maxStartDepth,
      timeoutMicros,
    );

    const count = C.getValue(TRANSFER_BUFFER, 'i32');
    const startAddress = C.getValue(TRANSFER_BUFFER + SIZE_OF_INT, 'i32');
    const didExceedMatchLimit = C.getValue(TRANSFER_BUFFER + 2 * SIZE_OF_INT, 'i32');
    const result = new Array<QueryCapture>();
    this.exceededMatchLimit = Boolean(didExceedMatchLimit);

    const captures = new Array<QueryCapture>();
    let address = startAddress;
    for (let i = 0; i < count; i++) {
      const patternIndex = C.getValue(address, 'i32');
      address += SIZE_OF_INT;
      const captureCount = C.getValue(address, 'i32');
      address += SIZE_OF_INT;
      const captureIndex = C.getValue(address, 'i32');
      address += SIZE_OF_INT;

      captures.length = captureCount;
      address = unmarshalCaptures(this, node.tree, address, patternIndex, captures);

      if (this.textPredicates[patternIndex].every(p => p(captures))) {
        const capture = captures[captureIndex];
        const setProperties = this.setProperties[patternIndex];
        capture.setProperties = setProperties;
        const assertedProperties = this.assertedProperties[patternIndex];
        capture.assertedProperties = assertedProperties;
        const refutedProperties = this.refutedProperties[patternIndex];
        capture.refutedProperties = refutedProperties;
        result.push(capture);
      }
    }

    C._free(startAddress);
    C.currentQueryProgressCallback = null;
    return result;
  }

  /** Get the predicates for a given pattern. */
  predicatesForPattern(patternIndex: number): QueryPredicate[] {
    return this.predicates[patternIndex];
  }

  /**
   * Disable a certain capture within a query.
   *
   * This prevents the capture from being returned in matches, and also
   * avoids any resource usage associated with recording the capture.
   */
  disableCapture(captureName: string): void {
    const captureNameLength = C.lengthBytesUTF8(captureName);
    const captureNameAddress = C._malloc(captureNameLength + 1);
    C.stringToUTF8(captureName, captureNameAddress, captureNameLength + 1);
    C._ts_query_disable_capture(this[0], captureNameAddress, captureNameLength);
    C._free(captureNameAddress);
  }

  /**
   * Disable a certain pattern within a query.
   *
   * This prevents the pattern from matching, and also avoids any resource
   * usage associated with the pattern. This throws an error if the pattern
   * index is out of bounds.
   */
  disablePattern(patternIndex: number): void {
    if (patternIndex >= this.predicates.length) {
      throw new Error(
        `Pattern index is ${patternIndex} but the pattern count is ${this.predicates.length}`
      );
    }
    C._ts_query_disable_pattern(this[0], patternIndex);
  }

  /**
   * Check if, on its last execution, this cursor exceeded its maximum number
   * of in-progress matches.
   */
  didExceedMatchLimit(): boolean {
    return this.exceededMatchLimit;
  }

  /** Get the byte offset where the given pattern starts in the query's source. */
  startIndexForPattern(patternIndex: number): number {
    if (patternIndex >= this.predicates.length) {
      throw new Error(
        `Pattern index is ${patternIndex} but the pattern count is ${this.predicates.length}`
      );
    }
    return C._ts_query_start_byte_for_pattern(this[0], patternIndex);
  }

  /** Get the byte offset where the given pattern ends in the query's source. */
  endIndexForPattern(patternIndex: number): number {
    if (patternIndex >= this.predicates.length) {
      throw new Error(
        `Pattern index is ${patternIndex} but the pattern count is ${this.predicates.length}`
      );
    }
    return C._ts_query_end_byte_for_pattern(this[0], patternIndex);
  }

  /** Get the number of patterns in the query. */
  patternCount(): number {
    return C._ts_query_pattern_count(this[0]);
  }

  /** Get the index for a given capture name. */
  captureIndexForName(captureName: string): number {
    return this.captureNames.indexOf(captureName);
  }

  /** Check if a given pattern within a query has a single root node. */
  isPatternRooted(patternIndex: number): boolean {
    return C._ts_query_is_pattern_rooted(this[0], patternIndex) === 1;
  }

  /** Check if a given pattern within a query has a single root node. */
  isPatternNonLocal(patternIndex: number): boolean {
    return C._ts_query_is_pattern_non_local(this[0], patternIndex) === 1;
  }

  /**
   * Check if a given step in a query is 'definite'.
   *
   * A query step is 'definite' if its parent pattern will be guaranteed to
   * match successfully once it reaches the step.
   */
  isPatternGuaranteedAtStep(byteIndex: number): boolean {
    return C._ts_query_is_pattern_guaranteed_at_step(this[0], byteIndex) === 1;
  }
}



================================================
FILE: lib/binding_web/src/tree.ts
================================================
import { INTERNAL, Internal, assertInternal, ParseCallback, Point, Range, Edit, SIZE_OF_NODE, SIZE_OF_INT, SIZE_OF_RANGE, C } from './constants';
import { Language } from './language';
import { Node } from './node';
import { TreeCursor } from './tree_cursor';
import { marshalEdit, marshalPoint, unmarshalNode, unmarshalRange } from './marshal';
import { TRANSFER_BUFFER } from './parser';

/** @internal */
export function getText(tree: Tree, startIndex: number, endIndex: number, startPosition: Point): string {
  const length = endIndex - startIndex;
  let result = tree.textCallback(startIndex, startPosition);
  if (result) {
    startIndex += result.length;
    while (startIndex < endIndex) {
      const string = tree.textCallback(startIndex, startPosition);
      if (string && string.length > 0) {
        startIndex += string.length;
        result += string;
      } else {
        break;
      }
    }
    if (startIndex > endIndex) {
      result = result.slice(0, length);
    }
  }
  return result ?? '';
}

/** A tree that represents the syntactic structure of a source code file. */
export class Tree {
  /** @internal */
  private [0] = 0; // Internal handle for WASM

  /** @internal */
  textCallback: ParseCallback;

  /** The language that was used to parse the syntax tree. */
  language: Language;

  /** @internal */
  constructor(internal: Internal, address: number, language: Language, textCallback: ParseCallback) {
    assertInternal(internal);
    this[0] = address;
    this.language = language;
    this.textCallback = textCallback;
  }

  /** Create a shallow copy of the syntax tree. This is very fast. */
  copy(): Tree {
    const address = C._ts_tree_copy(this[0]);
    return new Tree(INTERNAL, address, this.language, this.textCallback);
  }

  /** Delete the syntax tree, freeing its resources. */
  delete(): void {
    C._ts_tree_delete(this[0]);
    this[0] = 0;
  }

  /** Get the root node of the syntax tree. */
  get rootNode(): Node {
    C._ts_tree_root_node_wasm(this[0]);
    return unmarshalNode(this)!;
  }

  /**
   * Get the root node of the syntax tree, but with its position shifted
   * forward by the given offset.
   */
  rootNodeWithOffset(offsetBytes: number, offsetExtent: Point): Node {
    const address = TRANSFER_BUFFER + SIZE_OF_NODE;
    C.setValue(address, offsetBytes, 'i32');
    marshalPoint(address + SIZE_OF_INT, offsetExtent);
    C._ts_tree_root_node_with_offset_wasm(this[0]);
    return unmarshalNode(this)!;
  }

  /**
   * Edit the syntax tree to keep it in sync with source code that has been
   * edited.
   *
   * You must describe the edit both in terms of byte offsets and in terms of
   * row/column coordinates.
   */
  edit(edit: Edit): void {
    marshalEdit(edit);
    C._ts_tree_edit_wasm(this[0]);
  }

  /** Create a new {@link TreeCursor} starting from the root of the tree. */
  walk(): TreeCursor {
    return this.rootNode.walk();
  }

  /**
   * Compare this old edited syntax tree to a new syntax tree representing
   * the same document, returning a sequence of ranges whose syntactic
   * structure has changed.
   *
   * For this to work correctly, this syntax tree must have been edited such
   * that its ranges match up to the new tree. Generally, you'll want to
   * call this method right after calling one of the [`Parser::parse`]
   * functions. Call it on the old tree that was passed to parse, and
   * pass the new tree that was returned from `parse`.
   */
  getChangedRanges(other: Tree): Range[] {
    if (!(other instanceof Tree)) {
      throw new TypeError('Argument must be a Tree');
    }

    C._ts_tree_get_changed_ranges_wasm(this[0], other[0]);
    const count = C.getValue(TRANSFER_BUFFER, 'i32');
    const buffer = C.getValue(TRANSFER_BUFFER + SIZE_OF_INT, 'i32');
    const result = new Array<Range>(count);

    if (count > 0) {
      let address = buffer;
      for (let i = 0; i < count; i++) {
        result[i] = unmarshalRange(address);
        address += SIZE_OF_RANGE;
      }
      C._free(buffer);
    }
    return result;
  }

  /** Get the included ranges that were used to parse the syntax tree. */
  getIncludedRanges(): Range[] {
    C._ts_tree_included_ranges_wasm(this[0]);
    const count = C.getValue(TRANSFER_BUFFER, 'i32');
    const buffer = C.getValue(TRANSFER_BUFFER + SIZE_OF_INT, 'i32');
    const result = new Array<Range>(count);

    if (count > 0) {
      let address = buffer;
      for (let i = 0; i < count; i++) {
        result[i] = unmarshalRange(address);
        address += SIZE_OF_RANGE;
      }
      C._free(buffer);
    }
    return result;
  }
}



================================================
FILE: lib/binding_web/src/tree_cursor.ts
================================================
import { INTERNAL, Internal, assertInternal, Point, SIZE_OF_NODE, SIZE_OF_CURSOR, C } from './constants';
import { marshalNode, marshalPoint, marshalTreeCursor, unmarshalNode, unmarshalPoint, unmarshalTreeCursor } from './marshal';
import { Node } from './node';
import { TRANSFER_BUFFER } from './parser';
import { getText, Tree } from './tree';

/** A stateful object for walking a syntax {@link Tree} efficiently. */
export class TreeCursor {
  /** @internal */
  private [0] = 0; // Internal handle for WASM

  /** @internal */
  private [1] = 0; // Internal handle for WASM

  /** @internal */
  private [2] = 0; // Internal handle for WASM

  /** @internal */
  private [3] = 0; // Internal handle for WASM

  /** @internal */
  private tree: Tree;

  /** @internal */
  constructor(internal: Internal, tree: Tree) {
    assertInternal(internal);
    this.tree = tree;
    unmarshalTreeCursor(this);
  }

  /** Creates a deep copy of the tree cursor. This allocates new memory. */
  copy(): TreeCursor {
    const copy = new TreeCursor(INTERNAL, this.tree);
    C._ts_tree_cursor_copy_wasm(this.tree[0]);
    unmarshalTreeCursor(copy);
    return copy;
  }

  /** Delete the tree cursor, freeing its resources. */
  delete(): void {
    marshalTreeCursor(this);
    C._ts_tree_cursor_delete_wasm(this.tree[0]);
    this[0] = this[1] = this[2] = 0;
  }

  /** Get the tree cursor's current {@link Node}. */
  get currentNode(): Node {
    marshalTreeCursor(this);
    C._ts_tree_cursor_current_node_wasm(this.tree[0]);
    return unmarshalNode(this.tree)!;
  }

  /**
   * Get the numerical field id of this tree cursor's current node.
   *
   * See also {@link TreeCursor#currentFieldName}.
   */
  get currentFieldId(): number {
    marshalTreeCursor(this);
    return C._ts_tree_cursor_current_field_id_wasm(this.tree[0]);
  }

  /** Get the field name of this tree cursor's current node. */
  get currentFieldName(): string | null {
    return this.tree.language.fields[this.currentFieldId];
  }

  /**
   * Get the depth of the cursor's current node relative to the original
   * node that the cursor was constructed with.
   */
  get currentDepth(): number {
    marshalTreeCursor(this);
    return C._ts_tree_cursor_current_depth_wasm(this.tree[0]);
  }

  /**
   * Get the index of the cursor's current node out of all of the
   * descendants of the original node that the cursor was constructed with.
   */
  get currentDescendantIndex(): number {
    marshalTreeCursor(this);
    return C._ts_tree_cursor_current_descendant_index_wasm(this.tree[0]);
  }

  /** Get the type of the cursor's current node. */
  get nodeType(): string {
    return this.tree.language.types[this.nodeTypeId] || 'ERROR';
  }

  /** Get the type id of the cursor's current node. */
  get nodeTypeId(): number {
    marshalTreeCursor(this);
    return C._ts_tree_cursor_current_node_type_id_wasm(this.tree[0]);
  }

  /** Get the state id of the cursor's current node. */
  get nodeStateId(): number {
    marshalTreeCursor(this);
    return C._ts_tree_cursor_current_node_state_id_wasm(this.tree[0]);
  }

  /** Get the id of the cursor's current node. */
  get nodeId(): number {
    marshalTreeCursor(this);
    return C._ts_tree_cursor_current_node_id_wasm(this.tree[0]);
  }

  /**
   * Check if the cursor's current node is *named*.
   *
   * Named nodes correspond to named rules in the grammar, whereas
   * *anonymous* nodes correspond to string literals in the grammar.
   */
  get nodeIsNamed(): boolean {
    marshalTreeCursor(this);
    return C._ts_tree_cursor_current_node_is_named_wasm(this.tree[0]) === 1;
  }

  /**
   * Check if the cursor's current node is *missing*.
   *
   * Missing nodes are inserted by the parser in order to recover from
   * certain kinds of syntax errors.
   */
  get nodeIsMissing(): boolean {
    marshalTreeCursor(this);
    return C._ts_tree_cursor_current_node_is_missing_wasm(this.tree[0]) === 1;
  }

  /** Get the string content of the cursor's current node. */
  get nodeText(): string {
    marshalTreeCursor(this);
    const startIndex = C._ts_tree_cursor_start_index_wasm(this.tree[0]);
    const endIndex = C._ts_tree_cursor_end_index_wasm(this.tree[0]);
    C._ts_tree_cursor_start_position_wasm(this.tree[0]);
    const startPosition = unmarshalPoint(TRANSFER_BUFFER);
    return getText(this.tree, startIndex, endIndex, startPosition);
  }

  /** Get the start position of the cursor's current node. */
  get startPosition(): Point {
    marshalTreeCursor(this);
    C._ts_tree_cursor_start_position_wasm(this.tree[0]);
    return unmarshalPoint(TRANSFER_BUFFER);
  }

  /** Get the end position of the cursor's current node. */
  get endPosition(): Point {
    marshalTreeCursor(this);
    C._ts_tree_cursor_end_position_wasm(this.tree[0]);
    return unmarshalPoint(TRANSFER_BUFFER);
  }

  /** Get the start index of the cursor's current node. */
  get startIndex(): number {
    marshalTreeCursor(this);
    return C._ts_tree_cursor_start_index_wasm(this.tree[0]);
  }

  /** Get the end index of the cursor's current node. */
  get endIndex(): number {
    marshalTreeCursor(this);
    return C._ts_tree_cursor_end_index_wasm(this.tree[0]);
  }

  /**
   * Move this cursor to the first child of its current node.
   *
   * This returns `true` if the cursor successfully moved, and returns
   * `false` if there were no children.
   */
  gotoFirstChild(): boolean {
    marshalTreeCursor(this);
    const result = C._ts_tree_cursor_goto_first_child_wasm(this.tree[0]);
    unmarshalTreeCursor(this);
    return result === 1;
  }

  /**
   * Move this cursor to the last child of its current node.
   *
   * This returns `true` if the cursor successfully moved, and returns
   * `false` if there were no children.
   *
   * Note that this function may be slower than
   * {@link TreeCursor#gotoFirstChild} because it needs to
   * iterate through all the children to compute the child's position.
   */
  gotoLastChild(): boolean {
    marshalTreeCursor(this);
    const result = C._ts_tree_cursor_goto_last_child_wasm(this.tree[0]);
    unmarshalTreeCursor(this);
    return result === 1;
  }

  /**
   * Move this cursor to the parent of its current node.
   *
   * This returns `true` if the cursor successfully moved, and returns
   * `false` if there was no parent node (the cursor was already on the
   * root node).
   *
   * Note that the node the cursor was constructed with is considered the root
   * of the cursor, and the cursor cannot walk outside this node.
   */
  gotoParent(): boolean {
    marshalTreeCursor(this);
    const result = C._ts_tree_cursor_goto_parent_wasm(this.tree[0]);
    unmarshalTreeCursor(this);
    return result === 1;
  }

  /**
   * Move this cursor to the next sibling of its current node.
   *
   * This returns `true` if the cursor successfully moved, and returns
   * `false` if there was no next sibling node.
   *
   * Note that the node the cursor was constructed with is considered the root
   * of the cursor, and the cursor cannot walk outside this node.
   */
  gotoNextSibling(): boolean {
    marshalTreeCursor(this);
    const result = C._ts_tree_cursor_goto_next_sibling_wasm(this.tree[0]);
    unmarshalTreeCursor(this);
    return result === 1;
  }

  /**
   * Move this cursor to the previous sibling of its current node.
   *
   * This returns `true` if the cursor successfully moved, and returns
   * `false` if there was no previous sibling node.
   *
   * Note that this function may be slower than
   * {@link TreeCursor#gotoNextSibling} due to how node
   * positions are stored. In the worst case, this will need to iterate
   * through all the children up to the previous sibling node to recalculate
   * its position. Also note that the node the cursor was constructed with is
   * considered the root of the cursor, and the cursor cannot walk outside this node.
   */
  gotoPreviousSibling(): boolean {
    marshalTreeCursor(this);
    const result = C._ts_tree_cursor_goto_previous_sibling_wasm(this.tree[0]);
    unmarshalTreeCursor(this);
    return result === 1;
  }

  /**
   * Move the cursor to the node that is the nth descendant of
   * the original node that the cursor was constructed with, where
   * zero represents the original node itself.
   */
  gotoDescendant(goalDescendantIndex: number): void {
    marshalTreeCursor(this);
    C._ts_tree_cursor_goto_descendant_wasm(this.tree[0], goalDescendantIndex);
    unmarshalTreeCursor(this);
  }

  /**
   * Move this cursor to the first child of its current node that contains or
   * starts after the given byte offset.
   *
   * This returns `true` if the cursor successfully moved to a child node, and returns
   * `false` if no such child was found.
   */
  gotoFirstChildForIndex(goalIndex: number): boolean {
    marshalTreeCursor(this);
    C.setValue(TRANSFER_BUFFER + SIZE_OF_CURSOR, goalIndex, 'i32');
    const result = C._ts_tree_cursor_goto_first_child_for_index_wasm(this.tree[0]);
    unmarshalTreeCursor(this);
    return result === 1;
  }

  /**
   * Move this cursor to the first child of its current node that contains or
   * starts after the given byte offset.
   *
   * This returns the index of the child node if one was found, and returns
   * `null` if no such child was found.
   */
  gotoFirstChildForPosition(goalPosition: Point): boolean {
    marshalTreeCursor(this);
    marshalPoint(TRANSFER_BUFFER + SIZE_OF_CURSOR, goalPosition);
    const result = C._ts_tree_cursor_goto_first_child_for_position_wasm(this.tree[0]);
    unmarshalTreeCursor(this);
    return result === 1;
  }

  /**
   * Re-initialize this tree cursor to start at the original node that the
   * cursor was constructed with.
   */
  reset(node: Node): void {
    marshalNode(node);
    marshalTreeCursor(this, TRANSFER_BUFFER + SIZE_OF_NODE);
    C._ts_tree_cursor_reset_wasm(this.tree[0]);
    unmarshalTreeCursor(this);
  }

  /**
   * Re-initialize a tree cursor to the same position as another cursor.
   *
   * Unlike {@link TreeCursor#reset}, this will not lose parent
   * information and allows reusing already created cursors.
   */
  resetTo(cursor: TreeCursor): void {
    marshalTreeCursor(this, TRANSFER_BUFFER);
    marshalTreeCursor(cursor, TRANSFER_BUFFER + SIZE_OF_CURSOR);
    C._ts_tree_cursor_reset_to_wasm(this.tree[0], cursor.tree[0]);
    unmarshalTreeCursor(this);
  }
}



================================================
FILE: lib/binding_web/test/helper.ts
================================================
import { Parser, Language } from '../src';
import path from 'path';

// https://github.com/tree-sitter/tree-sitter/blob/master/xtask/src/fetch.rs#L15
export type LanguageName = 'bash' | 'c' | 'cpp' | 'embedded-template' | 'go' | 'html' | 'java' | 'javascript' | 'jsdoc' | 'json' | 'php' | 'python' | 'ruby' | 'rust' | 'typescript' | 'tsx';

function languageURL(name: LanguageName): string {
  const basePath = process.cwd();
  return path.join(basePath, `../../target/release/tree-sitter-${name}.wasm`);
}

export default Parser.init().then(async () => ({
  languageURL,
  C: await Language.load(languageURL('c')),
  EmbeddedTemplate: await Language.load(languageURL('embedded-template')),
  HTML: await Language.load(languageURL('html')),
  JavaScript: await Language.load(languageURL('javascript')),
  JSON: await Language.load(languageURL('json')),
  Python: await Language.load(languageURL('python')),
  Rust: await Language.load(languageURL('rust')),
}));



================================================
FILE: lib/binding_web/test/language.test.ts
================================================
import { describe, it, expect, beforeAll, afterAll } from 'vitest';
import helper from './helper';
import type { LookaheadIterator, Language } from '../src';
import { Parser } from '../src';

let JavaScript: Language;
let Rust: Language;

describe('Language', () => {
  beforeAll(async () => ({ JavaScript, Rust } = await helper));

  describe('.name, .version', () => {
    it('returns the name and version of the language', () => {
      expect(JavaScript.name).toBe('javascript');
      expect(JavaScript.abiVersion).toBe(15);
    });
  });

  describe('.fieldIdForName, .fieldNameForId', () => {
    it('converts between the string and integer representations of fields', () => {
      const nameId = JavaScript.fieldIdForName('name');
      const bodyId = JavaScript.fieldIdForName('body');

      expect(nameId).toBeLessThan(JavaScript.fieldCount);
      expect(bodyId).toBeLessThan(JavaScript.fieldCount);
      expect(JavaScript.fieldNameForId(nameId!)).toBe('name');
      expect(JavaScript.fieldNameForId(bodyId!)).toBe('body');
    });

    it('handles invalid inputs', () => {
      expect(JavaScript.fieldIdForName('namezzz')).toBeNull();
      expect(JavaScript.fieldNameForId(-3)).toBeNull();
      expect(JavaScript.fieldNameForId(10000)).toBeNull();
    });
  });

  describe('.idForNodeType, .nodeTypeForId, .nodeTypeIsNamed', () => {
    it('converts between the string and integer representations of a node type', () => {
      const exportStatementId = JavaScript.idForNodeType('export_statement', true)!;
      const starId = JavaScript.idForNodeType('*', false)!;

      expect(exportStatementId).toBeLessThan(JavaScript.nodeTypeCount);
      expect(starId).toBeLessThan(JavaScript.nodeTypeCount);
      expect(JavaScript.nodeTypeIsNamed(exportStatementId)).toBe(true);
      expect(JavaScript.nodeTypeForId(exportStatementId)).toBe('export_statement');
      expect(JavaScript.nodeTypeIsNamed(starId)).toBe(false);
      expect(JavaScript.nodeTypeForId(starId)).toBe('*');
    });

    it('handles invalid inputs', () => {
      expect(JavaScript.nodeTypeForId(-3)).toBeNull();
      expect(JavaScript.nodeTypeForId(10000)).toBeNull();
      expect(JavaScript.idForNodeType('export_statement', false)).toBeNull();
    });
  });

  describe('Supertypes', () => {
    it('gets the supertypes and subtypes of a parser', () => {
      const supertypes = Rust.supertypes;
      const names = supertypes.map((id) => Rust.nodeTypeForId(id));
      expect(names).toEqual([
        '_expression',
        '_literal',
        '_literal_pattern',
        '_pattern',
        '_type'
      ]);

      for (const id of supertypes) {
        const name = Rust.nodeTypeForId(id);
        const subtypes = Rust.subtypes(id);
        let subtypeNames = subtypes.map((id) => Rust.nodeTypeForId(id));
        subtypeNames = [...new Set(subtypeNames)].sort(); // Remove duplicates & sort

        switch (name) {
          case '_literal':
            expect(subtypeNames).toEqual([
              'boolean_literal',
              'char_literal',
              'float_literal',
              'integer_literal',
              'raw_string_literal',
              'string_literal',
            ]);
            break;
          case '_pattern':
            expect(subtypeNames).toEqual([
              '_',
              '_literal_pattern',
              'captured_pattern',
              'const_block',
              'generic_pattern',
              'identifier',
              'macro_invocation',
              'mut_pattern',
              'or_pattern',
              'range_pattern',
              'ref_pattern',
              'reference_pattern',
              'remaining_field_pattern',
              'scoped_identifier',
              'slice_pattern',
              'struct_pattern',
              'tuple_pattern',
              'tuple_struct_pattern',
            ]);
            break;
          case '_type':
            expect(subtypeNames).toEqual([
              'abstract_type',
              'array_type',
              'bounded_type',
              'dynamic_type',
              'function_type',
              'generic_type',
              'macro_invocation',
              'metavariable',
              'never_type',
              'pointer_type',
              'primitive_type',
              'reference_type',
              'removed_trait_bound',
              'scoped_type_identifier',
              'tuple_type',
              'type_identifier',
              'unit_type',
            ]);
            break;
        }
      }
    });
  });
});

describe('Lookahead iterator', () => {
  let lookahead: LookaheadIterator;
  let state: number;

  beforeAll(async () => {
    ({ JavaScript } = await helper);
    const parser = new Parser();
    parser.setLanguage(JavaScript);
    const tree = parser.parse('function fn() {}')!;
    parser.delete();
    const cursor = tree.walk();
    expect(cursor.gotoFirstChild()).toBe(true);
    expect(cursor.gotoFirstChild()).toBe(true);
    state = cursor.currentNode.nextParseState;
    lookahead = JavaScript.lookaheadIterator(state)!;
    expect(lookahead).toBeDefined();
  });

  afterAll(() => { lookahead.delete() });

  const expected = ['(', 'identifier', '*', 'formal_parameters', 'html_comment', 'comment'];

  it('should iterate over valid symbols in the state', () => {
    const symbols = Array.from(lookahead);
    expect(symbols).toEqual(expect.arrayContaining(expected));
    expect(symbols).toHaveLength(expected.length);
  });

  it('should reset to the initial state', () => {
    expect(lookahead.resetState(state)).toBe(true);
    const symbols = Array.from(lookahead);
    expect(symbols).toEqual(expect.arrayContaining(expected));
    expect(symbols).toHaveLength(expected.length);
  });

  it('should reset', () => {
    expect(lookahead.reset(JavaScript, state)).toBe(true);
    const symbols = Array.from(lookahead);
    expect(symbols).toEqual(expect.arrayContaining(expected));
    expect(symbols).toHaveLength(expected.length);
  });
});



================================================
FILE: lib/binding_web/test/node.test.ts
================================================
import { describe, it, expect, beforeAll, beforeEach, afterEach } from 'vitest';
import type { Language, Tree, Node } from '../src';
import { Parser } from '../src';
import helper from './helper';

let C: Language;
let JavaScript: Language;
let JSON: Language;
let EmbeddedTemplate: Language;
let Python: Language;

const JSON_EXAMPLE = `
[
  123,
  false,
  {
    "x": null
  }
]
`;

function getAllNodes(tree: Tree): Node[] {
  const result: Node[] = [];
  let visitedChildren = false;
  const cursor = tree.walk();

  while (true) {
    if (!visitedChildren) {
      result.push(cursor.currentNode);
      if (!cursor.gotoFirstChild()) {
        visitedChildren = true;
      }
    } else if (cursor.gotoNextSibling()) {
      visitedChildren = false;
    } else if (!cursor.gotoParent()) {
      break;
    }
  }
  return result;
}

describe('Node', () => {
  let parser: Parser;
  let tree: Tree | null;

  beforeAll(async () => {
    ({ C, EmbeddedTemplate, JavaScript, JSON, Python } = await helper);
  });

  beforeEach(() => {
    tree = null;
    parser = new Parser();
    parser.setLanguage(JavaScript);
  });

  afterEach(() => {
    parser.delete();
    tree!.delete();
  });

  describe('.children', () => {
    it('returns an array of child nodes', () => {
      tree = parser.parse('x10 + 1000')!;
      expect(tree.rootNode.children).toHaveLength(1);
      const sumNode = tree.rootNode.firstChild!.firstChild!;
      expect(sumNode.children.map(child => child.type)).toEqual(['identifier', '+', 'number']);
    });
  });

  describe('.namedChildren', () => {
    it('returns an array of named child nodes', () => {
      tree = parser.parse('x10 + 1000')!;
      const sumNode = tree.rootNode.firstChild!.firstChild!;
      expect(tree.rootNode.namedChildren).toHaveLength(1);
      expect(sumNode.namedChildren.map(child => child.type)).toEqual(['identifier', 'number']);
    });
  });

  describe('.childrenForFieldName', () => {
    it('returns an array of child nodes for the given field name', () => {
      parser.setLanguage(Python);
      const source = `
        if one:
            a()
        elif two:
            b()
        elif three:
            c()
        elif four:
    d()`;

      tree = parser.parse(source)!;
      const node = tree.rootNode.firstChild!;
      expect(node.type).toBe('if_statement');
      const alternatives = node.childrenForFieldName('alternative');
      const alternativeTexts = alternatives.map(n => {
        const condition = n.childForFieldName('condition')!;
        return source.slice(condition.startIndex, condition.endIndex);
      });
      expect(alternativeTexts).toEqual(['two', 'three', 'four']);
    });
  });

  describe('.startIndex and .endIndex', () => {
    it('returns the character index where the node starts/ends in the text', () => {
      tree = parser.parse('a👍👎1 / b👎c👎')!;
      const quotientNode = tree.rootNode.firstChild!.firstChild!;

      expect(quotientNode.startIndex).toBe(0);
      expect(quotientNode.endIndex).toBe(15);
      expect(quotientNode.children.map(child => child.startIndex)).toEqual([0, 7, 9]);
      expect(quotientNode.children.map(child => child.endIndex)).toEqual([6, 8, 15]);
    });
  });

  describe('.startPosition and .endPosition', () => {
    it('returns the row and column where the node starts/ends in the text', () => {
      tree = parser.parse('x10 + 1000')!;
      const sumNode = tree.rootNode.firstChild!.firstChild!;
      expect(sumNode.type).toBe('binary_expression');

      expect(sumNode.startPosition).toEqual({ row: 0, column: 0 });
      expect(sumNode.endPosition).toEqual({ row: 0, column: 10 });
      expect(sumNode.children.map((child) => child.startPosition)).toEqual([
        { row: 0, column: 0 },
        { row: 0, column: 4 },
        { row: 0, column: 6 },
      ]);
      expect(sumNode.children.map((child) => child.endPosition)).toEqual([
        { row: 0, column: 3 },
        { row: 0, column: 5 },
        { row: 0, column: 10 },
      ]);
    });

    it('handles characters that occupy two UTF16 code units', () => {
      tree = parser.parse('a👍👎1 /\n b👎c👎')!;
      const sumNode = tree.rootNode.firstChild!.firstChild!;
      expect(sumNode.children.map(child => [child.startPosition, child.endPosition])).toEqual([
        [{ row: 0, column: 0 }, { row: 0, column: 6 }],
        [{ row: 0, column: 7 }, { row: 0, column: 8 }],
        [{ row: 1, column: 1 }, { row: 1, column: 7 }]
      ]);
    });
  });

  describe('.parent', () => {
    it('returns the node\'s parent', () => {
      tree = parser.parse('x10 + 1000')!;
      const sumNode = tree.rootNode.firstChild!;
      const variableNode = sumNode.firstChild!;
      expect(sumNode.id).not.toBe(variableNode.id);
      expect(sumNode.id).toBe(variableNode.parent!.id);
      expect(tree.rootNode.id).toBe(sumNode.parent!.id);
    });
  });

  describe('.child(), .firstChild, .lastChild', () => {
    it('returns null when the node has no children', () => {
      tree = parser.parse('x10 + 1000')!;
      const sumNode = tree.rootNode.firstChild!.firstChild!;
      const variableNode = sumNode.firstChild!;
      expect(variableNode.firstChild).toBeNull();
      expect(variableNode.lastChild).toBeNull();
      expect(variableNode.firstNamedChild).toBeNull();
      expect(variableNode.lastNamedChild).toBeNull();
      expect(variableNode.child(1)).toBeNull();
    });
  });

  describe('.childForFieldName()', () => {
    it('returns node for the given field name', () => {
      tree = parser.parse('class A { b() {} }')!;

      const classNode = tree.rootNode.firstChild!;
      expect(classNode.type).toBe('class_declaration');

      const classNameNode = classNode.childForFieldName('name')!;
      expect(classNameNode.type).toBe('identifier');
      expect(classNameNode.text).toBe('A');

      const bodyNode = classNode.childForFieldName('body')!;
      expect(bodyNode.type).toBe('class_body');
      expect(bodyNode.text).toBe('{ b() {} }');

      const methodNode = bodyNode.firstNamedChild!;
      expect(methodNode.type).toBe('method_definition');
      expect(methodNode.text).toBe('b() {}');
    });
  });

  describe('.childWithDescendant()', () => {
    it('correctly retrieves immediate children', () => {
      const sourceCode = 'let x = 1; console.log(x);';
      tree = parser.parse(sourceCode)!;
      const root = tree.rootNode
      const child = root.children[0].children[0]
      const a = root.childWithDescendant(child)
      expect(a!.startIndex).toBe(0)
      const b = a!.childWithDescendant(child)
      expect(b).toEqual(child)
      const c = b!.childWithDescendant(child)
      expect(c).toBeNull()
    });
  });

  describe('.nextSibling and .previousSibling', () => {
    it('returns the node\'s next and previous sibling', () => {
      tree = parser.parse('x10 + 1000')!;
      const sumNode = tree.rootNode.firstChild!.firstChild!;
      expect(sumNode.children[1].id).toBe(sumNode.children[0].nextSibling!.id);
      expect(sumNode.children[2].id).toBe(sumNode.children[1].nextSibling!.id);
      expect(sumNode.children[0].id).toBe(sumNode.children[1].previousSibling!.id);
      expect(sumNode.children[1].id).toBe(sumNode.children[2].previousSibling!.id);
    });
  });

  describe('.nextNamedSibling and .previousNamedSibling', () => {
    it('returns the node\'s next and previous named sibling', () => {
      tree = parser.parse('x10 + 1000')!;
      const sumNode = tree.rootNode.firstChild!.firstChild!;
      expect(sumNode.namedChildren[1].id).toBe(sumNode.namedChildren[0].nextNamedSibling!.id);
      expect(sumNode.namedChildren[0].id).toBe(sumNode.namedChildren[1].previousNamedSibling!.id);
    });
  });

  describe('.descendantForIndex(min, max)', () => {
    it('returns the smallest node that spans the given range', () => {
      tree = parser.parse('x10 + 1000')!;
      const sumNode = tree.rootNode.firstChild!.firstChild!;
      expect(sumNode.descendantForIndex(1, 2)!.type).toBe('identifier');
      expect(sumNode.descendantForIndex(4, 4)!.type).toBe('+');

      expect(() => {
        // @ts-expect-error Testing invalid arguments
        sumNode.descendantForIndex(1, {});
      }).toThrow('Arguments must be numbers');

      expect(() => {
        // @ts-expect-error Testing invalid arguments
        sumNode.descendantForIndex(undefined);
      }).toThrow('Arguments must be numbers');
    });
  });

  describe('.namedDescendantForIndex', () => {
    it('returns the smallest named node that spans the given range', () => {
      tree = parser.parse('x10 + 1000')!;
      const sumNode = tree.rootNode.firstChild!;
      expect(sumNode.descendantForIndex(1, 2)!.type).toBe('identifier');
      expect(sumNode.descendantForIndex(4, 4)!.type).toBe('+');
    });
  });

  describe('.descendantForPosition', () => {
    it('returns the smallest node that spans the given range', () => {
      tree = parser.parse('x10 + 1000')!;
      const sumNode = tree.rootNode.firstChild!;

      expect(sumNode.descendantForPosition({ row: 0, column: 1 }, { row: 0, column: 2 })!.type).toBe('identifier');
      expect(sumNode.descendantForPosition({ row: 0, column: 4 })!.type).toBe('+');

      expect(() => {
        // @ts-expect-error Testing invalid arguments
        sumNode.descendantForPosition(1, {});
      }).toThrow('Arguments must be {row, column} objects');

      expect(() => {
        // @ts-expect-error Testing invalid arguments
        sumNode.descendantForPosition(undefined);
      }).toThrow('Arguments must be {row, column} objects');
    });
  });

  describe('.namedDescendantForPosition(min, max)', () => {
    it('returns the smallest named node that spans the given range', () => {
      tree = parser.parse('x10 + 1000')!;
      const sumNode = tree.rootNode.firstChild!;

      expect(sumNode.namedDescendantForPosition({ row: 0, column: 1 }, { row: 0, column: 2 })!.type).toBe('identifier')
      expect(sumNode.namedDescendantForPosition({ row: 0, column: 4 })!.type).toBe('binary_expression');
    });
  });

  describe('.hasError', () => {
    it('returns true if the node contains an error', () => {
      tree = parser.parse('1 + 2 * * 3')!;
      const node = tree.rootNode;
      expect(node.toString()).toBe(
        '(program (expression_statement (binary_expression left: (number) right: (binary_expression left: (number) (ERROR) right: (number)))))'
      );

      const sum = node.firstChild!.firstChild!;
      expect(sum.hasError).toBe(true);
      expect(sum.children[0].hasError).toBe(false);
      expect(sum.children[1].hasError).toBe(false);
      expect(sum.children[2].hasError).toBe(true);
    });
  });

  describe('.isError', () => {
    it('returns true if the node is an error', () => {
      tree = parser.parse('2 * * 3')!;
      const node = tree.rootNode;
      expect(node.toString()).toBe(
        '(program (expression_statement (binary_expression left: (number) (ERROR) right: (number))))'
      );

      const multi = node.firstChild!.firstChild!;
      expect(multi.hasError).toBe(true);
      expect(multi.children[0].isError).toBe(false);
      expect(multi.children[1].isError).toBe(false);
      expect(multi.children[2].isError).toBe(true);
      expect(multi.children[3].isError).toBe(false);
    });
  });

  describe('.isMissing', () => {
    it('returns true if the node was inserted via error recovery', () => {
      tree = parser.parse('(2 ||)')!;
      const node = tree.rootNode;
      expect(node.toString()).toBe(
        '(program (expression_statement (parenthesized_expression (binary_expression left: (number) right: (MISSING identifier)))))'
      );

      const sum = node.firstChild!.firstChild!.firstNamedChild!;
      expect(sum.type).toBe('binary_expression');
      expect(sum.hasError).toBe(true);
      expect(sum.children[0].isMissing).toBe(false);
      expect(sum.children[1].isMissing).toBe(false);
      expect(sum.children[2].isMissing).toBe(true);
    });
  });

  describe('.isExtra', () => {
    it('returns true if the node is an extra node like comments', () => {
      tree = parser.parse('foo(/* hi */);')!;
      const node = tree.rootNode;
      const commentNode = node.descendantForIndex(7, 7)!;

      expect(node.type).toBe('program');
      expect(commentNode.type).toBe('comment');
      expect(node.isExtra).toBe(false);
      expect(commentNode.isExtra).toBe(true);
    });
  });

  describe('.text', () => {
    const text = 'α0 / b👎c👎';

    Object.entries({
      '.parse(String)': text,
      '.parse(Function)': (offset: number) => text.slice(offset, offset + 4),
    }).forEach(([method, _parse]) => {
      it(`returns the text of a node generated by ${method}`, () => {
        const [numeratorSrc, denominatorSrc] = text.split(/\s*\/\s+/);
        tree = parser.parse(_parse)!;
        const quotientNode = tree.rootNode.firstChild!.firstChild!;
        const [numerator, slash, denominator] = quotientNode.children;

        expect(tree.rootNode.text).toBe(text);
        expect(denominator.text).toBe(denominatorSrc);
        expect(quotientNode.text).toBe(text);
        expect(numerator.text).toBe(numeratorSrc);
        expect(slash.text).toBe('/');
      });
    });
  });

  describe('.descendantCount', () => {
    it('returns the number of descendants', () => {
      parser.setLanguage(JSON);
      tree = parser.parse(JSON_EXAMPLE)!;
      const valueNode = tree.rootNode;
      const allNodes = getAllNodes(tree);

      expect(valueNode.descendantCount).toBe(allNodes.length);

      const cursor = tree.walk();
      for (let i = 0; i < allNodes.length; i++) {
        const node = allNodes[i];
        cursor.gotoDescendant(i);
        expect(cursor.currentNode.id).toBe(node.id);
      }

      for (let i = allNodes.length - 1; i >= 0; i--) {
        const node = allNodes[i];
        cursor.gotoDescendant(i);
        expect(cursor.currentNode.id).toBe(node.id);
      }
    });

    it('tests a single node tree', () => {
      parser.setLanguage(EmbeddedTemplate);
      tree = parser.parse('hello')!;

      const nodes = getAllNodes(tree);
      expect(nodes).toHaveLength(2);
      expect(tree.rootNode.descendantCount).toBe(2);

      const cursor = tree.walk();

      cursor.gotoDescendant(0);
      expect(cursor.currentDepth).toBe(0);
      expect(cursor.currentNode.id).toBe(nodes[0].id);

      cursor.gotoDescendant(1);
      expect(cursor.currentDepth).toBe(1);
      expect(cursor.currentNode.id).toBe(nodes[1].id);
    });
  });

  describe('.rootNodeWithOffset', () => {
    it('returns the root node of the tree, offset by the given byte offset', () => {
      tree = parser.parse('  if (a) b')!;
      const node = tree.rootNodeWithOffset(6, { row: 2, column: 2 });
      expect(node.startIndex).toBe(8);
      expect(node.endIndex).toBe(16);
      expect(node.startPosition).toEqual({ row: 2, column: 4 });
      expect(node.endPosition).toEqual({ row: 2, column: 12 });

      let child = node.firstChild!.child(2)!;
      expect(child.type).toBe('expression_statement');
      expect(child.startIndex).toBe(15);
      expect(child.endIndex).toBe(16);
      expect(child.startPosition).toEqual({ row: 2, column: 11 });
      expect(child.endPosition).toEqual({ row: 2, column: 12 });

      const cursor = node.walk();
      cursor.gotoFirstChild();
      cursor.gotoFirstChild();
      cursor.gotoNextSibling();
      child = cursor.currentNode;
      expect(child.type).toBe('parenthesized_expression');
      expect(child.startIndex).toBe(11);
      expect(child.endIndex).toBe(14);
      expect(child.startPosition).toEqual({ row: 2, column: 7 });
      expect(child.endPosition).toEqual({ row: 2, column: 10 });
    });
  });

  describe('.parseState, .nextParseState', () => {
    const text = '10 / 5';

    it('returns node parse state ids', () => {
      tree = parser.parse(text)!;
      const quotientNode = tree.rootNode.firstChild!.firstChild!;
      const [numerator, slash, denominator] = quotientNode.children;

      expect(tree.rootNode.parseState).toBe(0);
      // parse states will change on any change to the grammar so test that it
      // returns something instead
      expect(numerator.parseState).toBeGreaterThan(0);
      expect(slash.parseState).toBeGreaterThan(0);
      expect(denominator.parseState).toBeGreaterThan(0);
    });

    it('returns next parse state equal to the language', () => {
      tree = parser.parse(text)!;
      const quotientNode = tree.rootNode.firstChild!.firstChild!;
      quotientNode.children.forEach((node) => {
        expect(node.nextParseState).toBe(JavaScript.nextState(node.parseState, node.grammarId));
      });
    });
  });

  describe('.descendantsOfType("ERROR")', () => {
    it('finds all of the descendants of an ERROR node', () => {
      tree = parser.parse(
        `if ({a: 'b'} {c: 'd'}) {
          // ^ ERROR
          x = function(a) { b; } function(c) { d; }
        }`
      )!;
      const errorNode = tree.rootNode;
      const descendants = errorNode.descendantsOfType('ERROR');
      expect(
        descendants.map((node) => node.startIndex)
      ).toEqual(
        [4]
      );
    });
  });

  describe('.descendantsOfType', () => {
    it('finds all descendants of a given type in the given range', () => {
      tree = parser.parse('a + 1 * b * 2 + c + 3')!;
      const outerSum = tree.rootNode.firstChild!.firstChild!;

      const descendants = outerSum.descendantsOfType('number', { row: 0, column: 2 }, { row: 0, column: 15 });
      expect(descendants.map(node => node.startIndex)).toEqual([4, 12]);
      expect(descendants.map(node => node.endPosition)).toEqual([{ row: 0, column: 5 }, { row: 0, column: 13 }]);
    });
  });



  describe('.firstChildForIndex(index)', () => {
    it('returns the first child that contains or starts after the given index', () => {
      tree = parser.parse('x10 + 1000')!;
      const sumNode = tree.rootNode.firstChild!.firstChild!;

      expect(sumNode.firstChildForIndex(0)!.type).toBe('identifier');
      expect(sumNode.firstChildForIndex(1)!.type).toBe('identifier');
      expect(sumNode.firstChildForIndex(3)!.type).toBe('+');
      expect(sumNode.firstChildForIndex(5)!.type).toBe('number');
    });
  });

  describe('.firstNamedChildForIndex(index)', () => {
    it('returns the first child that contains or starts after the given index', () => {
      tree = parser.parse('x10 + 1000')!;
      const sumNode = tree.rootNode.firstChild!.firstChild!;

      expect(sumNode.firstNamedChildForIndex(0)!.type).toBe('identifier');
      expect(sumNode.firstNamedChildForIndex(1)!.type).toBe('identifier');
      expect(sumNode.firstNamedChildForIndex(3)!.type).toBe('number');
    });
  });

  describe('.equals(other)', () => {
    it('returns true if the nodes are the same', () => {
      tree = parser.parse('1 + 2')!;

      const sumNode = tree.rootNode.firstChild!.firstChild!;
      const node1 = sumNode.firstChild!;
      const node2 = sumNode.firstChild!;
      expect(node1.equals(node2)).toBe(true);
    });

    it('returns false if the nodes are not the same', () => {
      tree = parser.parse('1 + 2')!;

      const sumNode = tree.rootNode.firstChild!.firstChild!;
      const node1 = sumNode.firstChild!;
      const node2 = node1.nextSibling!;
      expect(node1.equals(node2)).toBe(false);
    });
  });

  describe('.fieldNameForChild(index)', () => {
    it('returns the field of a child or null', () => {
      parser.setLanguage(C);
      tree = parser.parse('int w = x + /* y is special! */ y;')!;

      const translationUnitNode = tree.rootNode;
      const declarationNode = translationUnitNode.firstChild;
      const binaryExpressionNode = declarationNode!
        .childForFieldName('declarator')!
        .childForFieldName('value')!;

      // -------------------
      // left: (identifier)  0
      // operator: "+"       1 <--- (not a named child)
      // (comment)           2 <--- (is an extra)
      // right: (identifier) 3
      // -------------------

      expect(binaryExpressionNode.fieldNameForChild(0)).toBe('left');
      expect(binaryExpressionNode.fieldNameForChild(1)).toBe('operator');
      // The comment should not have a field name, as it's just an extra
      expect(binaryExpressionNode.fieldNameForChild(2)).toBeNull();
      expect(binaryExpressionNode.fieldNameForChild(3)).toBe('right');
      // Negative test - Not a valid child index
      expect(binaryExpressionNode.fieldNameForChild(4)).toBeNull();
    });
  });

  describe('.fieldNameForNamedChild(index)', () => {
    it('returns the field of a named child or null', () => {
      parser.setLanguage(C);
      tree = parser.parse('int w = x + /* y is special! */ y;')!;

      const translationUnitNode = tree.rootNode;
      const declarationNode = translationUnitNode.firstNamedChild;
      const binaryExpressionNode = declarationNode!
        .childForFieldName('declarator')!
        .childForFieldName('value')!;

      // -------------------
      // left: (identifier)  0
      // operator: "+"       _ <--- (not a named child)
      // (comment)           1 <--- (is an extra)
      // right: (identifier) 2
      // -------------------

      expect(binaryExpressionNode.fieldNameForNamedChild(0)).toBe('left');
      // The comment should not have a field name, as it's just an extra
      expect(binaryExpressionNode.fieldNameForNamedChild(1)).toBeNull();
      // The operator is not a named child, so the named child at index 2 is the right child
      expect(binaryExpressionNode.fieldNameForNamedChild(2)).toBe('right');
      // Negative test - Not a valid child index
      expect(binaryExpressionNode.fieldNameForNamedChild(3)).toBeNull();
    });
  });
});



================================================
FILE: lib/binding_web/test/parser.test.ts
================================================
import { describe, it, expect, beforeAll, beforeEach, afterEach } from 'vitest';
import helper, { type LanguageName } from './helper';
import type { ParseState, Tree } from '../src';
import { Parser, Language } from '../src';

let JavaScript: Language;
let HTML: Language;
let JSON: Language;
let languageURL: (name: LanguageName) => string;

describe('Parser', () => {
  let parser: Parser;

  beforeAll(async () => {
    ({ JavaScript, HTML, JSON, languageURL } = await helper);
  });

  beforeEach(() => {
    parser = new Parser();
  });

  afterEach(() => {
    parser.delete();
  });

  describe('.setLanguage', () => {
    it('allows setting the language to null', () => {
      expect(parser.language).toBeNull();
      parser.setLanguage(JavaScript);
      expect(parser.language).toBe(JavaScript);
      parser.setLanguage(null);
      expect(parser.language).toBeNull();
    });

    it('throws an exception when the given object is not a tree-sitter language', () => {
      // @ts-expect-error Testing invalid arguments
      expect(() => { parser.setLanguage({}); }).toThrow(/Argument must be a Language/);
      // @ts-expect-error Testing invalid arguments
      expect(() => { parser.setLanguage(1); }).toThrow(/Argument must be a Language/);
    });
  });

  describe('.setLogger', () => {
    beforeEach(() => {
      parser.setLanguage(JavaScript);
    });

    it('calls the given callback for each parse event', () => {
      const debugMessages: string[] = [];
      parser.setLogger((message) => debugMessages.push(message));
      parser.parse('a + b + c')!;
      expect(debugMessages).toEqual(expect.arrayContaining([
        'skip character:\' \'',
        'consume character:\'b\'',
        'reduce sym:program, child_count:1',
        'accept'
      ]));
    });

    it('allows the callback to be retrieved later', () => {
      const callback = () => { return; };
      parser.setLogger(callback);
      expect(parser.getLogger()).toBe(callback);
      parser.setLogger(false);
      expect(parser.getLogger()).toBeNull();
    });

    it('disables debugging when given a falsy value', () => {
      const debugMessages: string[] = [];
      parser.setLogger((message) => debugMessages.push(message));
      parser.setLogger(false);
      parser.parse('a + b * c')!;
      expect(debugMessages).toHaveLength(0);
    });

    it('throws an error when given a truthy value that isn\'t a function', () => {
      // @ts-expect-error Testing invalid arguments
      expect(() => { parser.setLogger('5'); }).toThrow('Logger callback must be a function');
    });

    it('rethrows errors thrown by the logging callback', () => {
      const error = new Error('The error message');
      parser.setLogger(() => {
        throw error;
      });
      expect(() => parser.parse('ok;')).toThrow('The error message');
    });
  });

  describe('one included range', () => {
    it('parses the text within a range', () => {
      parser.setLanguage(HTML);
      const sourceCode = '<span>hi</span><script>console.log(\'sup\');</script>';
      const htmlTree = parser.parse(sourceCode)!;
      const scriptContentNode = htmlTree.rootNode.child(1)!.child(1)!;
      expect(scriptContentNode.type).toBe('raw_text');

      parser.setLanguage(JavaScript);
      expect(parser.getIncludedRanges()).toEqual([{
        startIndex: 0,
        endIndex: 2147483647,
        startPosition: { row: 0, column: 0 },
        endPosition: { row: 4294967295, column: 2147483647 }
      }]);

      const ranges = [{
        startIndex: scriptContentNode.startIndex,
        endIndex: scriptContentNode.endIndex,
        startPosition: scriptContentNode.startPosition,
        endPosition: scriptContentNode.endPosition,
      }];

      const jsTree = parser.parse(
        sourceCode,
        null,
        { includedRanges: ranges }
      )!;
      expect(parser.getIncludedRanges()).toEqual(ranges);

      expect(jsTree.rootNode.toString()).toBe(
        '(program (expression_statement (call_expression ' +
        'function: (member_expression object: (identifier) property: (property_identifier)) ' +
        'arguments: (arguments (string (string_fragment))))))'
      );
      expect(jsTree.rootNode.startPosition).toEqual({ row: 0, column: sourceCode.indexOf('console') });
    });
  });

  describe('multiple included ranges', () => {
    it('parses the text within multiple ranges', () => {
      parser.setLanguage(JavaScript);
      const sourceCode = 'html `<div>Hello, ${name.toUpperCase()}, it\'s <b>${now()}</b>.</div>`';
      const jsTree = parser.parse(sourceCode)!;
      const templateStringNode = jsTree.rootNode.descendantForIndex(
        sourceCode.indexOf('`<'),
        sourceCode.indexOf('>`')
      )!;
      expect(templateStringNode.type).toBe('template_string');

      const openQuoteNode = templateStringNode.child(0)!;
      const interpolationNode1 = templateStringNode.child(2)!;
      const interpolationNode2 = templateStringNode.child(4)!;
      const closeQuoteNode = templateStringNode.child(6)!;

      parser.setLanguage(HTML);
      const htmlRanges = [
        {
          startIndex: openQuoteNode.endIndex,
          startPosition: openQuoteNode.endPosition,
          endIndex: interpolationNode1.startIndex,
          endPosition: interpolationNode1.startPosition,
        },
        {
          startIndex: interpolationNode1.endIndex,
          startPosition: interpolationNode1.endPosition,
          endIndex: interpolationNode2.startIndex,
          endPosition: interpolationNode2.startPosition,
        },
        {
          startIndex: interpolationNode2.endIndex,
          startPosition: interpolationNode2.endPosition,
          endIndex: closeQuoteNode.startIndex,
          endPosition: closeQuoteNode.startPosition,
        },
      ];

      const htmlTree = parser.parse(sourceCode, null, { includedRanges: htmlRanges })!;

      expect(htmlTree.rootNode.toString()).toBe(
        '(document (element' +
        ' (start_tag (tag_name))' +
        ' (text)' +
        ' (element (start_tag (tag_name)) (end_tag (tag_name)))' +
        ' (text)' +
        ' (end_tag (tag_name))))'
      );
      expect(htmlTree.getIncludedRanges()).toEqual(htmlRanges);

      const divElementNode = htmlTree.rootNode.child(0)!;
      const helloTextNode = divElementNode.child(1)!;
      const bElementNode = divElementNode.child(2)!;
      const bStartTagNode = bElementNode.child(0)!;
      const bEndTagNode = bElementNode.child(1)!;

      expect(helloTextNode.type).toBe('text');
      expect(helloTextNode.startIndex).toBe(sourceCode.indexOf('Hello'));
      expect(helloTextNode.endIndex).toBe(sourceCode.indexOf(' <b>'));

      expect(bStartTagNode.type).toBe('start_tag');
      expect(bStartTagNode.startIndex).toBe(sourceCode.indexOf('<b>'));
      expect(bStartTagNode.endIndex).toBe(sourceCode.indexOf('${now()}'));

      expect(bEndTagNode.type).toBe('end_tag');
      expect(bEndTagNode.startIndex).toBe(sourceCode.indexOf('</b>'));
      expect(bEndTagNode.endIndex).toBe(sourceCode.indexOf('.</div>'));
    });
  });

  describe('an included range containing mismatched positions', () => {
    it('parses the text within the range', () => {
      const sourceCode = '<div>test</div>{_ignore_this_part_}';

      parser.setLanguage(HTML);

      const endIndex = sourceCode.indexOf('{_ignore_this_part_');

      const rangeToParse = {
        startIndex: 0,
        startPosition: { row: 10, column: 12 },
        endIndex,
        endPosition: { row: 10, column: 12 + endIndex },
      };

      const htmlTree = parser.parse(sourceCode, null, { includedRanges: [rangeToParse] })!;

      expect(htmlTree.getIncludedRanges()[0]).toEqual(rangeToParse);

      expect(htmlTree.rootNode.toString()).toBe(
        '(document (element (start_tag (tag_name)) (text) (end_tag (tag_name))))'
      );
    });
  });

  describe('.parse', () => {
    let tree: Tree | null;

    beforeEach(() => {
      tree = null;
      parser.setLanguage(JavaScript);
    });

    afterEach(() => {
      if (tree) tree.delete();
    });

    it('reads from the given input', () => {
      const parts = ['first', '_', 'second', '_', 'third'];
      tree = parser.parse(() => parts.shift())!;
      expect(tree.rootNode.toString()).toBe('(program (expression_statement (identifier)))');
    });

    it('stops reading when the input callback returns something that\'s not a string', () => {
      const parts = ['abc', 'def', 'ghi', {}, {}, {}, 'second-word', ' '];
      tree = parser.parse(() => parts.shift() as string)!;
      expect(tree.rootNode.toString()).toBe('(program (expression_statement (identifier)))');
      expect(tree.rootNode.endIndex).toBe(9);
      expect(parts).toHaveLength(2);
    });

    it('throws an exception when the given input is not a function', () => {
      // @ts-expect-error Testing invalid arguments
      expect(() => parser.parse(null)).toThrow('Argument must be a string or a function');
      // @ts-expect-error Testing invalid arguments
      expect(() => parser.parse(5)).toThrow('Argument must be a string or a function');
      // @ts-expect-error Testing invalid arguments
      expect(() => parser.parse({})).toThrow('Argument must be a string or a function');
    });

    it('handles long input strings', { timeout: 10000 }, () => {
      const repeatCount = 10000;
      const inputString = `[${Array(repeatCount).fill('0').join(',')}]`;

      tree = parser.parse(inputString)!;
      expect(tree.rootNode.type).toBe('program');
      expect(tree.rootNode.firstChild!.firstChild!.namedChildCount).toBe(repeatCount);
    });

    it('can use the bash parser', { timeout: 5000 }, async () => {
      parser.setLanguage(await Language.load(languageURL('bash')));
      tree = parser.parse('FOO=bar echo <<EOF 2> err.txt > hello.txt \nhello${FOO}\nEOF')!;
      expect(tree.rootNode.toString()).toBe(
        '(program ' +
        '(redirected_statement ' +
        'body: (command ' +
        '(variable_assignment name: (variable_name) value: (word)) ' +
        'name: (command_name (word))) ' +
        'redirect: (heredoc_redirect (heredoc_start) ' +
        'redirect: (file_redirect descriptor: (file_descriptor) destination: (word)) ' +
        'redirect: (file_redirect destination: (word)) ' +
        '(heredoc_body ' +
        '(expansion (variable_name)) (heredoc_content)) (heredoc_end))))'
      );
    });

    it('can use the c++ parser', { timeout: 5000 }, async () => {
      parser.setLanguage(await Language.load(languageURL('cpp')));
      tree = parser.parse('const char *s = R"EOF(HELLO WORLD)EOF";')!;
      expect(tree.rootNode.toString()).toBe(
        '(translation_unit (declaration ' +
        '(type_qualifier) ' +
        'type: (primitive_type) ' +
        'declarator: (init_declarator ' +
        'declarator: (pointer_declarator declarator: (identifier)) ' +
        'value: (raw_string_literal delimiter: (raw_string_delimiter) (raw_string_content) (raw_string_delimiter)))))'
      );
    });

    it('can use the HTML parser', { timeout: 5000 }, async () => {
      parser.setLanguage(await Language.load(languageURL('html')));
      tree = parser.parse('<div><span><custom></custom></span></div>')!;
      expect(tree.rootNode.toString()).toBe(
        '(document (element (start_tag (tag_name)) (element (start_tag (tag_name)) ' +
        '(element (start_tag (tag_name)) (end_tag (tag_name))) (end_tag (tag_name))) (end_tag (tag_name))))'
      );
    });

    it('can use the python parser', { timeout: 5000 }, async () => {
      parser.setLanguage(await Language.load(languageURL('python')));
      tree = parser.parse('class A:\n  def b():\n    c()')!;
      expect(tree.rootNode.toString()).toBe(
        '(module (class_definition ' +
        'name: (identifier) ' +
        'body: (block ' +
        '(function_definition ' +
        'name: (identifier) ' +
        'parameters: (parameters) ' +
        'body: (block (expression_statement (call ' +
        'function: (identifier) ' +
        'arguments: (argument_list))))))))'
      );
    });

    it('can use the rust parser', { timeout: 5000 }, async () => {
      parser.setLanguage(await Language.load(languageURL('rust')));
      tree = parser.parse('const x: &\'static str = r###"hello"###;')!;
      expect(tree.rootNode.toString()).toBe(
        '(source_file (const_item ' +
        'name: (identifier) ' +
        'type: (reference_type (lifetime (identifier)) type: (primitive_type)) ' +
        'value: (raw_string_literal (string_content))))'
      );
    });

    it('can use the typescript parser', { timeout: 5000 }, async () => {
      parser.setLanguage(await Language.load(languageURL('typescript')));
      tree = parser.parse('a()\nb()\n[c]')!;
      expect(tree.rootNode.toString()).toBe(
        '(program ' +
        '(expression_statement (call_expression function: (identifier) arguments: (arguments))) ' +
        '(expression_statement (subscript_expression ' +
        'object: (call_expression ' +
        'function: (identifier) ' +
        'arguments: (arguments)) ' +
        'index: (identifier))))'
      );
    });

    it('can use the tsx parser', { timeout: 5000 }, async () => {
      parser.setLanguage(await Language.load(languageURL('tsx')));
      tree = parser.parse('a()\nb()\n[c]')!;
      expect(tree.rootNode.toString()).toBe(
        '(program ' +
        '(expression_statement (call_expression function: (identifier) arguments: (arguments))) ' +
        '(expression_statement (subscript_expression ' +
        'object: (call_expression ' +
        'function: (identifier) ' +
        'arguments: (arguments)) ' +
        'index: (identifier))))',

      );
    });

    it('parses only the text within the `includedRanges` if they are specified', () => {
      const sourceCode = '<% foo() %> <% bar %>';

      const start1 = sourceCode.indexOf('foo');
      const end1 = start1 + 5;
      const start2 = sourceCode.indexOf('bar');
      const end2 = start2 + 3;

      const tree = parser.parse(sourceCode, null, {
        includedRanges: [
          {
            startIndex: start1,
            endIndex: end1,
            startPosition: { row: 0, column: start1 },
            endPosition: { row: 0, column: end1 },
          },
          {
            startIndex: start2,
            endIndex: end2,
            startPosition: { row: 0, column: start2 },
            endPosition: { row: 0, column: end2 },
          },
        ],
      })!;

      expect(tree.rootNode.toString()).toBe(
        '(program ' +
        '(expression_statement (call_expression function: (identifier) arguments: (arguments))) ' +
        '(expression_statement (identifier)))'
      );
    });

    it('parses with a timeout', { timeout: 5000 }, () => {
      parser.setLanguage(JSON);

      const startTime = performance.now();
      let currentByteOffset = 0;
      const progressCallback = (state: ParseState) => {
        expect(state.currentOffset).toBeGreaterThanOrEqual(currentByteOffset);
        currentByteOffset = state.currentOffset;

        if (performance.now() - startTime > 1) {
          return true;
        }
        return false;
      };

      expect(parser.parse(
        (offset) => offset === 0 ? '[' : ',0',
        null,
        { progressCallback },
      )).toBeNull();
    });

    it('times out when an error is detected', { timeout: 5000 }, () => {
      parser.setLanguage(JSON);

      let offset = 0;
      const erroneousCode = '!,';
      const progressCallback = (state: ParseState) => {
        offset = state.currentOffset;
        return state.hasError;
      };

      const tree = parser.parse(
        (offset) => {
          if (offset === 0) return '[';
          if (offset >= 1 && offset < 1000) return '0,';
          return erroneousCode;
        },
        null,
        { progressCallback },
      );

      // The callback is called at the end of parsing, however, what we're asserting here is that
      // parsing ends immediately as the error is detected. This is verified by checking the offset
      // of the last byte processed is the length of the erroneous code we inserted, aka, 1002, or
      // 1000 + the length of the erroneous code. Note that in this WASM test, we multiply the offset
      // by 2 because JavaScript strings are UTF-16 encoded.
      expect(offset).toBe((1000 + erroneousCode.length) * 2);
      expect(tree).toBeNull();
    });
  });
});



================================================
FILE: lib/binding_web/test/query.test.ts
================================================
import { describe, it, expect, beforeAll, beforeEach, afterEach } from 'vitest';
import type { Language, Tree, QueryMatch, QueryCapture } from '../src';
import { Parser, Query } from '../src';
import helper from './helper';

let JavaScript: Language;

describe('Query', () => {
  let parser: Parser;
  let tree: Tree | null;
  let query: Query | null;

  beforeAll(async () => {
    ({ JavaScript } = await helper);
  });

  beforeEach(() => {
    parser = new Parser();
    parser.setLanguage(JavaScript);
  });

  afterEach(() => {
    parser.delete();
    if (tree) tree.delete();
    if (query) query.delete();
  });

  describe('construction', () => {
    it('throws an error on invalid patterns', () => {
      expect(() => {
        new Query(JavaScript, '(function_declaration wat)');
      }).toThrow('Bad syntax at offset 22: \'wat)\'...');

      expect(() => {
        new Query(JavaScript, '(non_existent)');
      }).toThrow('Bad node name \'non_existent\'');

      expect(() => {
        new Query(JavaScript, '(a)');
      }).toThrow('Bad node name \'a\'');

      expect(() => {
        new Query(JavaScript, '(function_declaration non_existent:(identifier))');
      }).toThrow('Bad field name \'non_existent\'');

      expect(() => {
        new Query(JavaScript, '(function_declaration name:(statement_block))');
      }).toThrow('Bad pattern structure at offset 22: \'name:(statement_block))\'');
    });

    it('throws an error on invalid predicates', () => {
      expect(() => {
        new Query(JavaScript, '((identifier) @abc (#eq? @ab hi))');
      }).toThrow('Bad capture name @ab');

      expect(() => {
        new Query(JavaScript, '((identifier) @abc (#eq?))');
      }).toThrow('Wrong number of arguments to `#eq?` predicate. Expected 2, got 0');

      expect(() => {
        new Query(JavaScript, '((identifier) @a (#eq? @a @a @a))');
      }).toThrow('Wrong number of arguments to `#eq?` predicate. Expected 2, got 3');
    });
  });

  describe('.matches', () => {
    it('returns all of the matches for the given query', { timeout: 10000 }, () => {
      tree = parser.parse('function one() { two(); function three() {} }')!;
      query = new Query(JavaScript, `
        (function_declaration name: (identifier) @fn-def)
        (call_expression function: (identifier) @fn-ref)
      `);
      const matches = query.matches(tree.rootNode);
      expect(formatMatches(matches)).toEqual([
        { patternIndex: 0, captures: [{ patternIndex: 0, name: 'fn-def', text: 'one' }] },
        { patternIndex: 1, captures: [{ patternIndex: 1, name: 'fn-ref', text: 'two' }] },
        { patternIndex: 0, captures: [{ patternIndex: 0, name: 'fn-def', text: 'three' }] },
      ]);
    });

    it('can search in specified ranges', () => {
      tree = parser.parse('[a, b,\nc, d,\ne, f,\ng, h]')!;
      query = new Query(JavaScript, '(identifier) @element');
      const matches = query.matches(
        tree.rootNode,
        {
          startPosition: { row: 1, column: 1 },
          endPosition: { row: 3, column: 1 },
        }
      );
      expect(formatMatches(matches)).toEqual([
        { patternIndex: 0, captures: [{ patternIndex: 0, name: 'element', text: 'd' }] },
        { patternIndex: 0, captures: [{ patternIndex: 0, name: 'element', text: 'e' }] },
        { patternIndex: 0, captures: [{ patternIndex: 0, name: 'element', text: 'f' }] },
        { patternIndex: 0, captures: [{ patternIndex: 0, name: 'element', text: 'g' }] },
      ]);
    });

    it('handles predicates that compare the text of capture to literal strings', () => {
      tree = parser.parse(`
        giraffe(1, 2, []);
        helment([false]);
        goat(false);
        gross(3, []);
        hiccup([]);
        gaff(5);
      `)!;

      // Find all calls to functions beginning with 'g', where one argument
      // is an array literal.
      query = new Query(JavaScript, `
        (call_expression
          function: (identifier) @name
          arguments: (arguments (array))
          (#match? @name "^g"))
      `);

      const matches = query.matches(tree.rootNode);
      expect(formatMatches(matches)).toEqual([
        { patternIndex: 0, captures: [{ patternIndex: 0, name: 'name', text: 'giraffe' }] },
        { patternIndex: 0, captures: [{ patternIndex: 0, name: 'name', text: 'gross' }] },
      ]);
    });

    it('handles multiple matches where the first one is filtered', () => {
      tree = parser.parse(`
        const a = window.b;
      `)!;

      query = new Query(JavaScript, `
        ((identifier) @variable.builtin
          (#match? @variable.builtin "^(arguments|module|console|window|document)$")
          (#is-not? local))
      `);

      const matches = query.matches(tree.rootNode);
      expect(formatMatches(matches)).toEqual([
        { patternIndex: 0, captures: [{ patternIndex: 0, name: 'variable.builtin', text: 'window' }] },
      ]);
    });
  });

  describe('.captures', () => {
    it('returns all of the captures for the given query, in order', () => {
      tree = parser.parse(`
        a({
          bc: function de() {
            const fg = function hi() {}
          },
          jk: function lm() {
            const no = function pq() {}
          },
        });
      `)!;
      query = new Query(JavaScript, `
        (pair
          key: _ @method.def
          (function_expression
            name: (identifier) @method.alias))

        (variable_declarator
          name: _ @function.def
          value: (function_expression
            name: (identifier) @function.alias))

        ":" @delimiter
        "=" @operator
      `);

      const captures = query.captures(tree.rootNode);
      expect(formatCaptures(captures)).toEqual([
        { patternIndex: 0, name: 'method.def', text: 'bc' },
        { patternIndex: 2, name: 'delimiter', text: ':' },
        { patternIndex: 0, name: 'method.alias', text: 'de' },
        { patternIndex: 1, name: 'function.def', text: 'fg' },
        { patternIndex: 3, name: 'operator', text: '=' },
        { patternIndex: 1, name: 'function.alias', text: 'hi' },
        { patternIndex: 0, name: 'method.def', text: 'jk' },
        { patternIndex: 2, name: 'delimiter', text: ':' },
        { patternIndex: 0, name: 'method.alias', text: 'lm' },
        { patternIndex: 1, name: 'function.def', text: 'no' },
        { patternIndex: 3, name: 'operator', text: '=' },
        { patternIndex: 1, name: 'function.alias', text: 'pq' },
      ]);
    });

    it('handles conditions that compare the text of capture to literal strings', () => {
      tree = parser.parse(`
        lambda
        panda
        load
        toad
        const ab = require('./ab');
        new Cd(EF);
      `)!;

      query = new Query(JavaScript, `
        ((identifier) @variable
         (#not-match? @variable "^(lambda|load)$"))

        ((identifier) @function.builtin
         (#eq? @function.builtin "require"))

        ((identifier) @constructor
         (#match? @constructor "^[A-Z]"))

        ((identifier) @constant
         (#match? @constant "^[A-Z]{2,}$"))
      `);

      const captures = query.captures(tree.rootNode);
      expect(formatCaptures(captures)).toEqual([
        { patternIndex: 0, name: 'variable', text: 'panda' },
        { patternIndex: 0, name: 'variable', text: 'toad' },
        { patternIndex: 0, name: 'variable', text: 'ab' },
        { patternIndex: 0, name: 'variable', text: 'require' },
        { patternIndex: 1, name: 'function.builtin', text: 'require' },
        { patternIndex: 0, name: 'variable', text: 'Cd' },
        { patternIndex: 2, name: 'constructor', text: 'Cd' },
        { patternIndex: 0, name: 'variable', text: 'EF' },
        { patternIndex: 2, name: 'constructor', text: 'EF' },
        { patternIndex: 3, name: 'constant', text: 'EF' },
      ]);
    });

    it('handles conditions that compare the text of captures to each other', () => {
      tree = parser.parse(`
        ab = abc + 1;
        def = de + 1;
        ghi = ghi + 1;
      `)!;

      query = new Query(JavaScript, `
        (
          (assignment_expression
            left: (identifier) @id1
            right: (binary_expression
              left: (identifier) @id2))
          (#eq? @id1 @id2)
        )
      `);

      const captures = query.captures(tree.rootNode);
      expect(formatCaptures(captures)).toEqual([
        { patternIndex: 0, name: 'id1', text: 'ghi' },
        { patternIndex: 0, name: 'id2', text: 'ghi' },
      ]);
    });

    it('handles patterns with properties', () => {
      tree = parser.parse(`a(b.c);`)!;
      query = new Query(JavaScript, `
        ((call_expression (identifier) @func)
         (#set! foo)
         (#set! bar baz))

        ((property_identifier) @prop
         (#is? foo)
         (#is-not? bar baz))
      `);

      const captures = query.captures(tree.rootNode);
      expect(formatCaptures(captures)).toEqual([
        {
          patternIndex: 0,
          name: 'func',
          text: 'a',
          setProperties: { foo: null, bar: 'baz' }
        },
        {
          patternIndex: 1,
          name: 'prop',
          text: 'c',
          assertedProperties: { foo: null },
          refutedProperties: { bar: 'baz' },
        },
      ]);
      expect(query.didExceedMatchLimit()).toBe(false);
    });

    it('detects queries with too many permutations to track', () => {
      tree = parser.parse(`
        [
          hello, hello, hello, hello, hello, hello, hello, hello, hello, hello,
          hello, hello, hello, hello, hello, hello, hello, hello, hello, hello,
          hello, hello, hello, hello, hello, hello, hello, hello, hello, hello,
          hello, hello, hello, hello, hello, hello, hello, hello, hello, hello,
          hello, hello, hello, hello, hello, hello, hello, hello, hello, hello,
        ];
      `)!;

      query = new Query(JavaScript, `(array (identifier) @pre (identifier) @post)`);

      query.captures(tree.rootNode, { matchLimit: 32 });
      expect(query.didExceedMatchLimit()).toBe(true);
    });

    it('handles quantified captures properly', () => {
      tree = parser.parse(`
        /// foo
        /// bar
        /// baz
      `)!;

      const expectCount = (tree: Tree, queryText: string, expectedCount: number) => {
        query = new Query(JavaScript, queryText);
        const captures = query.captures(tree.rootNode);
        expect(captures).toHaveLength(expectedCount);
      };

      expectCount(
        tree,
        `((comment)+ @foo (#any-eq? @foo "/// foo"))`,
        3
      );

      expectCount(
        tree,
        `((comment)+ @foo (#eq? @foo "/// foo"))`,
        0
      );

      expectCount(
        tree,
        `((comment)+ @foo (#any-not-eq? @foo "/// foo"))`,
        3
      );

      expectCount(
        tree,
        `((comment)+ @foo (#not-eq? @foo "/// foo"))`,
        0
      );

      expectCount(
        tree,
        `((comment)+ @foo (#match? @foo "^/// foo"))`,
        0
      );

      expectCount(
        tree,
        `((comment)+ @foo (#any-match? @foo "^/// foo"))`,
        3
      );

      expectCount(
        tree,
        `((comment)+ @foo (#not-match? @foo "^/// foo"))`,
        0
      );

      expectCount(
        tree,
        `((comment)+ @foo (#not-match? @foo "fsdfsdafdfs"))`,
        3
      );

      expectCount(
        tree,
        `((comment)+ @foo (#any-not-match? @foo "^///"))`,
        0
      );

      expectCount(
        tree,
        `((comment)+ @foo (#any-not-match? @foo "^/// foo"))`,
        3
      );
    });
  });

  describe('.predicatesForPattern(index)', () => {
    it('returns all of the predicates as objects', () => {
      query = new Query(JavaScript, `
        (
          (binary_expression
            left: (identifier) @a
            right: (identifier) @b)
          (#something? @a @b)
          (#match? @a "c")
          (#something-else? @a "A" @b "B")
        )

        ((identifier) @c
         (#hello! @c))

        "if" @d
      `);

      expect(query.predicatesForPattern(0)).toStrictEqual([
        {
          operator: 'something?',
          operands: [
            { type: 'capture', name: 'a' },
            { type: 'capture', name: 'b' },
          ],
        },
        {
          operator: 'something-else?',
          operands: [
            { type: 'capture', name: 'a' },
            { type: 'string', value: 'A' },
            { type: 'capture', name: 'b' },
            { type: 'string', value: 'B' },
          ],
        },
      ]);

      expect(query.predicatesForPattern(1)).toStrictEqual([
        {
          operator: 'hello!',
          operands: [{ type: 'capture', name: 'c' }],
        },
      ]);

      expect(query.predicatesForPattern(2)).toEqual([]);
    });
  });

  describe('.disableCapture', () => {
    it('disables a capture', () => {
      query = new Query(JavaScript, `
        (function_declaration
          (identifier) @name1 @name2 @name3
          (statement_block) @body1 @body2)
      `);

      const source = 'function foo() { return 1; }';
      const tree = parser.parse(source)!;

      let matches = query.matches(tree.rootNode);
      expect(formatMatches(matches)).toEqual([
        {
          patternIndex: 0,
          captures: [
            { patternIndex: 0, name: 'name1', text: 'foo' },
            { patternIndex: 0, name: 'name2', text: 'foo' },
            { patternIndex: 0, name: 'name3', text: 'foo' },
            { patternIndex: 0, name: 'body1', text: '{ return 1; }' },
            { patternIndex: 0, name: 'body2', text: '{ return 1; }' },
          ],
        },
      ]);

      // disabling captures still works when there are multiple captures on a
      // single node.
      query.disableCapture('name2');
      matches = query.matches(tree.rootNode);
      expect(formatMatches(matches)).toEqual([
        {
          patternIndex: 0,
          captures: [
            { patternIndex: 0, name: 'name1', text: 'foo' },
            { patternIndex: 0, name: 'name3', text: 'foo' },
            { patternIndex: 0, name: 'body1', text: '{ return 1; }' },
            { patternIndex: 0, name: 'body2', text: '{ return 1; }' },
          ],
        },
      ]);
    });
  });

  describe('Set a timeout', () => {
    it('returns less than the expected matches', { timeout: 10000 }, () => {
      tree = parser.parse('function foo() while (true) { } }\n'.repeat(1000))!;
      query = new Query(JavaScript, '(function_declaration name: (identifier) @function)');
      const matches = query.matches(tree.rootNode, { timeoutMicros: 1000 });
      expect(matches.length).toBeLessThan(1000);
      const matches2 = query.matches(tree.rootNode, { timeoutMicros: 0 });
      expect(matches2).toHaveLength(1000);
    });
  });

  describe('Start and end indices for patterns', () => {
    it('Returns the start and end indices for a pattern', () => {
      const patterns1 = `
"+" @operator
"-" @operator
"*" @operator
"=" @operator
"=>" @operator
      `.trim();

      const patterns2 = `
(identifier) @a
(string) @b
`.trim();

      const patterns3 = `
((identifier) @b (#match? @b i))
(function_declaration name: (identifier) @c)
(method_definition name: (property_identifier) @d)
`.trim();

      const source = patterns1 + patterns2 + patterns3;

      const query = new Query(JavaScript, source);

      expect(query.startIndexForPattern(0)).toBe(0);
      expect(query.endIndexForPattern(0)).toBe('"+" @operator\n'.length);
      expect(query.startIndexForPattern(5)).toBe(patterns1.length);
      expect(query.endIndexForPattern(5)).toBe(
        patterns1.length + '(identifier) @a\n'.length
      );
      expect(query.startIndexForPattern(7)).toBe(patterns1.length + patterns2.length);
      expect(query.endIndexForPattern(7)).toBe(
        patterns1.length +
        patterns2.length +
        '((identifier) @b (#match? @b i))\n'.length
      );
    });
  });

  describe('Disable pattern', () => {
    it('Disables patterns in the query', () => {
      const query = new Query(JavaScript, `
        (function_declaration name: (identifier) @name)
        (function_declaration body: (statement_block) @body)
        (class_declaration name: (identifier) @name)
        (class_declaration body: (class_body) @body)
      `);

      // disable the patterns that match names
      query.disablePattern(0);
      query.disablePattern(2);

      const source = 'class A { constructor() {} } function b() { return 1; }';
      tree = parser.parse(source)!;
      const matches = query.matches(tree.rootNode);
      expect(formatMatches(matches)).toEqual([
        {
          patternIndex: 3,
          captures: [{ patternIndex: 3, name: 'body', text: '{ constructor() {} }' }],
        },
        { patternIndex: 1, captures: [{ patternIndex: 1, name: 'body', text: '{ return 1; }' }] },
      ]);
    });
  });

  describe('Executes with a timeout', { timeout: 10000 }, () => {
    it('Returns less than the expected matches', () => {
      tree = parser.parse('function foo() while (true) { } }\n'.repeat(1000))!;
      query = new Query(JavaScript, '(function_declaration) @function');

      const startTime = performance.now();

      const matches = query.matches(
        tree.rootNode,
        {
          progressCallback: () => {
            if (performance.now() - startTime > 1) {
              return true;
            }
            return false;
          },
        }
      );
      expect(matches.length).toBeLessThan(1000);

      const matches2 = query.matches(tree.rootNode);
      expect(matches2).toHaveLength(1000);
    });
  });
});

// Helper functions
function formatMatches(matches: QueryMatch[]): Omit<QueryMatch, 'pattern'>[] {
  return matches.map(({ patternIndex, captures }) => ({
    patternIndex,
    captures: formatCaptures(captures),
  }));
}

function formatCaptures(captures: QueryCapture[]): (QueryCapture & { text: string })[] {
  return captures.map((c) => {
    const node = c.node;
    // @ts-expect-error We're not interested in the node object for these tests
    delete c.node;
    return { ...c, text: node.text };
  });
}



================================================
FILE: lib/binding_web/test/tree.test.ts
================================================
import { describe, it, expect, beforeAll, beforeEach, afterEach } from 'vitest';
import type { Point, Language, Tree, Edit, TreeCursor } from '../src';
import { Parser } from '../src';
import helper from './helper';

let JavaScript: Language;

interface CursorState {
  nodeType: string;
  nodeIsNamed: boolean;
  startPosition: Point;
  endPosition: Point;
  startIndex: number;
  endIndex: number;
}

describe('Tree', () => {
  let parser: Parser;
  let tree: Tree;

  beforeAll(async () => {
    ({ JavaScript } = await helper);
  });

  beforeEach(() => {
    parser = new Parser();
    parser.setLanguage(JavaScript);
  });

  afterEach(() => {
    parser.delete();
    tree.delete();
  });

  describe('.edit', () => {
    let input: string;
    let edit: Edit;

    it('updates the positions of nodes', () => {
      input = 'abc + cde';
      tree = parser.parse(input)!;
      expect(tree.rootNode.toString()).toBe(
        '(program (expression_statement (binary_expression left: (identifier) right: (identifier))))'
      );

      let sumNode = tree.rootNode.firstChild!.firstChild;
      let variableNode1 = sumNode!.firstChild;
      let variableNode2 = sumNode!.lastChild;
      expect(variableNode1!.startIndex).toBe(0);
      expect(variableNode1!.endIndex).toBe(3);
      expect(variableNode2!.startIndex).toBe(6);
      expect(variableNode2!.endIndex).toBe(9);

      [input, edit] = spliceInput(input, input.indexOf('bc'), 0, ' * ');
      expect(input).toBe('a * bc + cde');
      tree.edit(edit);

      sumNode = tree.rootNode.firstChild!.firstChild;
      variableNode1 = sumNode!.firstChild;
      variableNode2 = sumNode!.lastChild;
      expect(variableNode1!.startIndex).toBe(0);
      expect(variableNode1!.endIndex).toBe(6);
      expect(variableNode2!.startIndex).toBe(9);
      expect(variableNode2!.endIndex).toBe(12);

      tree = parser.parse(input, tree)!;
      expect(tree.rootNode.toString()).toBe(
        '(program (expression_statement (binary_expression left: (binary_expression left: (identifier) right: (identifier)) right: (identifier))))'
      );
    });

    it('handles non-ascii characters', () => {
      input = 'αβδ + cde';

      tree = parser.parse(input)!;
      expect(tree.rootNode.toString()).toBe(
        '(program (expression_statement (binary_expression left: (identifier) right: (identifier))))'
      );

      let variableNode = tree.rootNode.firstChild!.firstChild!.lastChild;

      [input, edit] = spliceInput(input, input.indexOf('δ'), 0, '👍 * ');
      expect(input).toBe('αβ👍 * δ + cde');
      tree.edit(edit);

      variableNode = tree.rootNode.firstChild!.firstChild!.lastChild;
      expect(variableNode!.startIndex).toBe(input.indexOf('cde'));

      tree = parser.parse(input, tree)!;
      expect(tree.rootNode.toString()).toBe(
        '(program (expression_statement (binary_expression left: (binary_expression left: (identifier) right: (identifier)) right: (identifier))))'
      );
    });
  });

  describe('.getChangedRanges(previous)', () => {
    it('reports the ranges of text whose syntactic meaning has changed', () => {
      let sourceCode = 'abcdefg + hij';
      tree = parser.parse(sourceCode)!;

      expect(tree.rootNode.toString()).toBe(
        '(program (expression_statement (binary_expression left: (identifier) right: (identifier))))'
      );

      sourceCode = 'abc + defg + hij';
      tree.edit({
        startIndex: 2,
        oldEndIndex: 2,
        newEndIndex: 5,
        startPosition: { row: 0, column: 2 },
        oldEndPosition: { row: 0, column: 2 },
        newEndPosition: { row: 0, column: 5 },
      });

      const tree2 = parser.parse(sourceCode, tree)!;
      expect(tree2.rootNode.toString()).toBe(
        '(program (expression_statement (binary_expression left: (binary_expression left: (identifier) right: (identifier)) right: (identifier))))'
      );

      const ranges = tree.getChangedRanges(tree2);
      expect(ranges).toEqual([
        {
          startIndex: 0,
          endIndex: 'abc + defg'.length,
          startPosition: { row: 0, column: 0 },
          endPosition: { row: 0, column: 'abc + defg'.length },
        },
      ]);

      tree2.delete();
    });

    it('throws an exception if the argument is not a tree', () => {
      tree = parser.parse('abcdefg + hij')!;

      expect(() => {
        tree.getChangedRanges({} as Tree);
      }).toThrow(/Argument must be a Tree/);
    });
  });

  describe('.walk()', () => {
    let cursor: TreeCursor;

    afterEach(() => {
      cursor.delete();
    });

    it('returns a cursor that can be used to walk the tree', () => {
      tree = parser.parse('a * b + c / d')!;
      cursor = tree.walk();

      assertCursorState(cursor, {
        nodeType: 'program',
        nodeIsNamed: true,
        startPosition: { row: 0, column: 0 },
        endPosition: { row: 0, column: 13 },
        startIndex: 0,
        endIndex: 13,
      });

      expect(cursor.gotoFirstChild()).toBe(true);
      assertCursorState(cursor, {
        nodeType: 'expression_statement',
        nodeIsNamed: true,
        startPosition: { row: 0, column: 0 },
        endPosition: { row: 0, column: 13 },
        startIndex: 0,
        endIndex: 13,
      });

      expect(cursor.gotoFirstChild()).toBe(true);
      assertCursorState(cursor, {
        nodeType: 'binary_expression',
        nodeIsNamed: true,
        startPosition: { row: 0, column: 0 },
        endPosition: { row: 0, column: 13 },
        startIndex: 0,
        endIndex: 13,
      });

      expect(cursor.gotoFirstChild()).toBe(true);
      assertCursorState(cursor, {
        nodeType: 'binary_expression',
        nodeIsNamed: true,
        startPosition: { row: 0, column: 0 },
        endPosition: { row: 0, column: 5 },
        startIndex: 0,
        endIndex: 5,
      });

      expect(cursor.gotoFirstChild()).toBe(true);
      expect(cursor.nodeText).toBe('a');
      assertCursorState(cursor, {
        nodeType: 'identifier',
        nodeIsNamed: true,
        startPosition: { row: 0, column: 0 },
        endPosition: { row: 0, column: 1 },
        startIndex: 0,
        endIndex: 1,
      });

      expect(cursor.gotoFirstChild()).toBe(false);
      expect(cursor.gotoNextSibling()).toBe(true);
      expect(cursor.nodeText).toBe('*');
      assertCursorState(cursor, {
        nodeType: '*',
        nodeIsNamed: false,
        startPosition: { row: 0, column: 2 },
        endPosition: { row: 0, column: 3 },
        startIndex: 2,
        endIndex: 3,
      });

      expect(cursor.gotoNextSibling()).toBe(true);
      expect(cursor.nodeText).toBe('b');
      assertCursorState(cursor, {
        nodeType: 'identifier',
        nodeIsNamed: true,
        startPosition: { row: 0, column: 4 },
        endPosition: { row: 0, column: 5 },
        startIndex: 4,
        endIndex: 5,
      });

      expect(cursor.gotoNextSibling()).toBe(false);
      expect(cursor.gotoParent()).toBe(true);
      assertCursorState(cursor, {
        nodeType: 'binary_expression',
        nodeIsNamed: true,
        startPosition: { row: 0, column: 0 },
        endPosition: { row: 0, column: 5 },
        startIndex: 0,
        endIndex: 5,
      });

      expect(cursor.gotoNextSibling()).toBe(true);
      assertCursorState(cursor, {
        nodeType: '+',
        nodeIsNamed: false,
        startPosition: { row: 0, column: 6 },
        endPosition: { row: 0, column: 7 },
        startIndex: 6,
        endIndex: 7,
      });

      expect(cursor.gotoNextSibling()).toBe(true);
      assertCursorState(cursor, {
        nodeType: 'binary_expression',
        nodeIsNamed: true,
        startPosition: { row: 0, column: 8 },
        endPosition: { row: 0, column: 13 },
        startIndex: 8,
        endIndex: 13,
      });

      const copy = tree.walk();
      copy.resetTo(cursor);

      expect(copy.gotoPreviousSibling()).toBe(true);
      assertCursorState(copy, {
        nodeType: '+',
        nodeIsNamed: false,
        startPosition: { row: 0, column: 6 },
        endPosition: { row: 0, column: 7 },
        startIndex: 6,
        endIndex: 7,
      });

      expect(copy.gotoPreviousSibling()).toBe(true);
      assertCursorState(copy, {
        nodeType: 'binary_expression',
        nodeIsNamed: true,
        startPosition: { row: 0, column: 0 },
        endPosition: { row: 0, column: 5 },
        startIndex: 0,
        endIndex: 5,
      });

      expect(copy.gotoLastChild()).toBe(true);
      assertCursorState(copy, {
        nodeType: 'identifier',
        nodeIsNamed: true,
        startPosition: { row: 0, column: 4 },
        endPosition: { row: 0, column: 5 },
        startIndex: 4,
        endIndex: 5,
      });

      expect(copy.gotoParent()).toBe(true);
      expect(copy.gotoParent()).toBe(true);
      expect(copy.nodeType).toBe('binary_expression');
      expect(copy.gotoParent()).toBe(true);
      expect(copy.nodeType).toBe('expression_statement');
      expect(copy.gotoParent()).toBe(true);
      expect(copy.nodeType).toBe('program');
      expect(copy.gotoParent()).toBe(false);
      copy.delete();

      expect(cursor.gotoParent()).toBe(true);
      expect(cursor.nodeType).toBe('binary_expression');
      expect(cursor.gotoParent()).toBe(true);
      expect(cursor.nodeType).toBe('expression_statement');
      expect(cursor.gotoParent()).toBe(true);
      expect(cursor.nodeType).toBe('program');
    });

    it('keeps track of the field name associated with each node', () => {
      tree = parser.parse('a.b();')!;
      cursor = tree.walk();
      cursor.gotoFirstChild();
      cursor.gotoFirstChild();

      expect(cursor.currentNode.type).toBe('call_expression');
      expect(cursor.currentFieldName).toBeNull();

      cursor.gotoFirstChild();
      expect(cursor.currentNode.type).toBe('member_expression');
      expect(cursor.currentFieldName).toBe('function');

      cursor.gotoFirstChild();
      expect(cursor.currentNode.type).toBe('identifier');
      expect(cursor.currentFieldName).toBe('object');

      cursor.gotoNextSibling();
      cursor.gotoNextSibling();
      expect(cursor.currentNode.type).toBe('property_identifier');
      expect(cursor.currentFieldName).toBe('property');

      cursor.gotoParent();
      cursor.gotoNextSibling();
      expect(cursor.currentNode.type).toBe('arguments');
      expect(cursor.currentFieldName).toBe('arguments');
    });

    it('returns a cursor that can be reset anywhere in the tree', () => {
      tree = parser.parse('a * b + c / d')!;
      cursor = tree.walk();
      const root = tree.rootNode.firstChild;

      cursor.reset(root!.firstChild!.firstChild!);
      assertCursorState(cursor, {
        nodeType: 'binary_expression',
        nodeIsNamed: true,
        startPosition: { row: 0, column: 0 },
        endPosition: { row: 0, column: 5 },
        startIndex: 0,
        endIndex: 5,
      });

      cursor.gotoFirstChild();
      assertCursorState(cursor, {
        nodeType: 'identifier',
        nodeIsNamed: true,
        startPosition: { row: 0, column: 0 },
        endPosition: { row: 0, column: 1 },
        startIndex: 0,
        endIndex: 1,
      });

      expect(cursor.gotoParent()).toBe(true);
      expect(cursor.gotoParent()).toBe(false);
    });
  });

  describe('.copy', () => {
    let input: string;
    let edit: Edit;

    it('creates another tree that remains stable if the original tree is edited', () => {
      input = 'abc + cde';
      tree = parser.parse(input)!;
      expect(tree.rootNode.toString()).toBe(
        '(program (expression_statement (binary_expression left: (identifier) right: (identifier))))'
      );

      const tree2 = tree.copy();
      [input, edit] = spliceInput(input, 3, 0, '123');
      expect(input).toBe('abc123 + cde');
      tree.edit(edit);

      const leftNode = tree.rootNode.firstChild!.firstChild!.firstChild;
      const leftNode2 = tree2.rootNode.firstChild!.firstChild!.firstChild;
      const rightNode = tree.rootNode.firstChild!.firstChild!.lastChild;
      const rightNode2 = tree2.rootNode.firstChild!.firstChild!.lastChild;
      expect(leftNode!.endIndex).toBe(6);
      expect(leftNode2!.endIndex).toBe(3);
      expect(rightNode!.startIndex).toBe(9);
      expect(rightNode2!.startIndex).toBe(6);

      tree2.delete();
    });
  });
});

function spliceInput(input: string, startIndex: number, lengthRemoved: number, newText: string): [string, Edit] {
  const oldEndIndex = startIndex + lengthRemoved;
  const newEndIndex = startIndex + newText.length;
  const startPosition = getExtent(input.slice(0, startIndex));
  const oldEndPosition = getExtent(input.slice(0, oldEndIndex));
  input = input.slice(0, startIndex) + newText + input.slice(oldEndIndex);
  const newEndPosition = getExtent(input.slice(0, newEndIndex));
  return [
    input,
    {
      startIndex,
      startPosition,
      oldEndIndex,
      oldEndPosition,
      newEndIndex,
      newEndPosition,
    },
  ];
}

// Gets the extent of the text in terms of zero-based row and column numbers.
function getExtent(text: string): Point {
  let row = 0;
  let index = -1;
  let lastIndex = 0;
  while ((index = text.indexOf('\n', index + 1)) !== -1) {
    row++;
    lastIndex = index + 1;
  }
  return { row, column: text.length - lastIndex };
}

function assertCursorState(cursor: TreeCursor, params: CursorState): void {
  expect(cursor.nodeType).toBe(params.nodeType);
  expect(cursor.nodeIsNamed).toBe(params.nodeIsNamed);
  expect(cursor.startPosition).toEqual(params.startPosition);
  expect(cursor.endPosition).toEqual(params.endPosition);
  expect(cursor.startIndex).toEqual(params.startIndex);
  expect(cursor.endIndex).toEqual(params.endIndex);

  const node = cursor.currentNode;
  expect(node.type).toBe(params.nodeType);
  expect(node.isNamed).toBe(params.nodeIsNamed);
  expect(node.startPosition).toEqual(params.startPosition);
  expect(node.endPosition).toEqual(params.endPosition);
  expect(node.startIndex).toEqual(params.startIndex);
  expect(node.endIndex).toEqual(params.endIndex);
}



================================================
FILE: lib/include/tree_sitter/api.h
================================================
#ifndef TREE_SITTER_API_H_
#define TREE_SITTER_API_H_

#ifndef TREE_SITTER_HIDE_SYMBOLS
#if defined(__GNUC__) || defined(__clang__)
#pragma GCC visibility push(default)
#endif
#endif

#include <stdlib.h>
#include <stdint.h>
#include <stdbool.h>

#ifdef __cplusplus
extern "C" {
#endif

/****************************/
/* Section - ABI Versioning */
/****************************/

/**
 * The latest ABI version that is supported by the current version of the
 * library. When Languages are generated by the Tree-sitter CLI, they are
 * assigned an ABI version number that corresponds to the current CLI version.
 * The Tree-sitter library is generally backwards-compatible with languages
 * generated using older CLI versions, but is not forwards-compatible.
 */
#define TREE_SITTER_LANGUAGE_VERSION 15

/**
 * The earliest ABI version that is supported by the current version of the
 * library.
 */
#define TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION 13

/*******************/
/* Section - Types */
/*******************/

typedef uint16_t TSStateId;
typedef uint16_t TSSymbol;
typedef uint16_t TSFieldId;
typedef struct TSLanguage TSLanguage;
typedef struct TSParser TSParser;
typedef struct TSTree TSTree;
typedef struct TSQuery TSQuery;
typedef struct TSQueryCursor TSQueryCursor;
typedef struct TSLookaheadIterator TSLookaheadIterator;

// This function signature reads one code point from the given string,
// returning the number of bytes consumed. It should write the code point
// to the `code_point` pointer, or write -1 if the input is invalid.
typedef uint32_t (*DecodeFunction)(
  const uint8_t *string,
  uint32_t length,
  int32_t *code_point
);

typedef enum TSInputEncoding {
  TSInputEncodingUTF8,
  TSInputEncodingUTF16LE,
  TSInputEncodingUTF16BE,
  TSInputEncodingCustom
} TSInputEncoding;

typedef enum TSSymbolType {
  TSSymbolTypeRegular,
  TSSymbolTypeAnonymous,
  TSSymbolTypeSupertype,
  TSSymbolTypeAuxiliary,
} TSSymbolType;

typedef struct TSPoint {
  uint32_t row;
  uint32_t column;
} TSPoint;

typedef struct TSRange {
  TSPoint start_point;
  TSPoint end_point;
  uint32_t start_byte;
  uint32_t end_byte;
} TSRange;

typedef struct TSInput {
  void *payload;
  const char *(*read)(void *payload, uint32_t byte_index, TSPoint position, uint32_t *bytes_read);
  TSInputEncoding encoding;
  DecodeFunction decode;
} TSInput;

typedef struct TSParseState {
  void *payload;
  uint32_t current_byte_offset;
  bool has_error;
} TSParseState;

typedef struct TSParseOptions {
  void *payload;
  bool (*progress_callback)(TSParseState *state);
} TSParseOptions;

typedef enum TSLogType {
  TSLogTypeParse,
  TSLogTypeLex,
} TSLogType;

typedef struct TSLogger {
  void *payload;
  void (*log)(void *payload, TSLogType log_type, const char *buffer);
} TSLogger;

typedef struct TSInputEdit {
  uint32_t start_byte;
  uint32_t old_end_byte;
  uint32_t new_end_byte;
  TSPoint start_point;
  TSPoint old_end_point;
  TSPoint new_end_point;
} TSInputEdit;

typedef struct TSNode {
  uint32_t context[4];
  const void *id;
  const TSTree *tree;
} TSNode;

typedef struct TSTreeCursor {
  const void *tree;
  const void *id;
  uint32_t context[3];
} TSTreeCursor;

typedef struct TSQueryCapture {
  TSNode node;
  uint32_t index;
} TSQueryCapture;

typedef enum TSQuantifier {
  TSQuantifierZero = 0, // must match the array initialization value
  TSQuantifierZeroOrOne,
  TSQuantifierZeroOrMore,
  TSQuantifierOne,
  TSQuantifierOneOrMore,
} TSQuantifier;

typedef struct TSQueryMatch {
  uint32_t id;
  uint16_t pattern_index;
  uint16_t capture_count;
  const TSQueryCapture *captures;
} TSQueryMatch;

typedef enum TSQueryPredicateStepType {
  TSQueryPredicateStepTypeDone,
  TSQueryPredicateStepTypeCapture,
  TSQueryPredicateStepTypeString,
} TSQueryPredicateStepType;

typedef struct TSQueryPredicateStep {
  TSQueryPredicateStepType type;
  uint32_t value_id;
} TSQueryPredicateStep;

typedef enum TSQueryError {
  TSQueryErrorNone = 0,
  TSQueryErrorSyntax,
  TSQueryErrorNodeType,
  TSQueryErrorField,
  TSQueryErrorCapture,
  TSQueryErrorStructure,
  TSQueryErrorLanguage,
} TSQueryError;

typedef struct TSQueryCursorState {
  void *payload;
  uint32_t current_byte_offset;
} TSQueryCursorState;

typedef struct TSQueryCursorOptions {
  void *payload;
  bool (*progress_callback)(TSQueryCursorState *state);
} TSQueryCursorOptions;

/**
 * The metadata associated with a language.
 *
 * Currently, this metadata can be used to check the [Semantic Version](https://semver.org/)
 * of the language. This version information should be used to signal if a given parser might
 * be incompatible with existing queries when upgrading between major versions, or minor versions
 * if it's in zerover.
 */
typedef struct TSLanguageMetadata {
  uint8_t major_version;
  uint8_t minor_version;
  uint8_t patch_version;
} TSLanguageMetadata;

/********************/
/* Section - Parser */
/********************/

/**
 * Create a new parser.
 */
TSParser *ts_parser_new(void);

/**
 * Delete the parser, freeing all of the memory that it used.
 */
void ts_parser_delete(TSParser *self);

/**
 * Get the parser's current language.
 */
const TSLanguage *ts_parser_language(const TSParser *self);

/**
 * Set the language that the parser should use for parsing.
 *
 * Returns a boolean indicating whether or not the language was successfully
 * assigned. True means assignment succeeded. False means there was a version
 * mismatch: the language was generated with an incompatible version of the
 * Tree-sitter CLI. Check the language's ABI version using [`ts_language_abi_version`]
 * and compare it to this library's [`TREE_SITTER_LANGUAGE_VERSION`] and
 * [`TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION`] constants.
 */
bool ts_parser_set_language(TSParser *self, const TSLanguage *language);

/**
 * Set the ranges of text that the parser should include when parsing.
 *
 * By default, the parser will always include entire documents. This function
 * allows you to parse only a *portion* of a document but still return a syntax
 * tree whose ranges match up with the document as a whole. You can also pass
 * multiple disjoint ranges.
 *
 * The second and third parameters specify the location and length of an array
 * of ranges. The parser does *not* take ownership of these ranges; it copies
 * the data, so it doesn't matter how these ranges are allocated.
 *
 * If `count` is zero, then the entire document will be parsed. Otherwise,
 * the given ranges must be ordered from earliest to latest in the document,
 * and they must not overlap. That is, the following must hold for all:
 *
 * `i < count - 1`: `ranges[i].end_byte <= ranges[i + 1].start_byte`
 *
 * If this requirement is not satisfied, the operation will fail, the ranges
 * will not be assigned, and this function will return `false`. On success,
 * this function returns `true`
 */
bool ts_parser_set_included_ranges(
  TSParser *self,
  const TSRange *ranges,
  uint32_t count
);

/**
 * Get the ranges of text that the parser will include when parsing.
 *
 * The returned pointer is owned by the parser. The caller should not free it
 * or write to it. The length of the array will be written to the given
 * `count` pointer.
 */
const TSRange *ts_parser_included_ranges(
  const TSParser *self,
  uint32_t *count
);

/**
 * Use the parser to parse some source code and create a syntax tree.
 *
 * If you are parsing this document for the first time, pass `NULL` for the
 * `old_tree` parameter. Otherwise, if you have already parsed an earlier
 * version of this document and the document has since been edited, pass the
 * previous syntax tree so that the unchanged parts of it can be reused.
 * This will save time and memory. For this to work correctly, you must have
 * already edited the old syntax tree using the [`ts_tree_edit`] function in a
 * way that exactly matches the source code changes.
 *
 * The [`TSInput`] parameter lets you specify how to read the text. It has the
 * following three fields:
 * 1. [`read`]: A function to retrieve a chunk of text at a given byte offset
 *    and (row, column) position. The function should return a pointer to the
 *    text and write its length to the [`bytes_read`] pointer. The parser does
 *    not take ownership of this buffer; it just borrows it until it has
 *    finished reading it. The function should write a zero value to the
 *    [`bytes_read`] pointer to indicate the end of the document.
 * 2. [`payload`]: An arbitrary pointer that will be passed to each invocation
 *    of the [`read`] function.
 * 3. [`encoding`]: An indication of how the text is encoded. Either
 *    `TSInputEncodingUTF8` or `TSInputEncodingUTF16`.
 *
 * This function returns a syntax tree on success, and `NULL` on failure. There
 * are four possible reasons for failure:
 * 1. The parser does not have a language assigned. Check for this using the
      [`ts_parser_language`] function.
 * 2. Parsing was cancelled due to a timeout that was set by an earlier call to
 *    the [`ts_parser_set_timeout_micros`] function. You can resume parsing from
 *    where the parser left out by calling [`ts_parser_parse`] again with the
 *    same arguments. Or you can start parsing from scratch by first calling
 *    [`ts_parser_reset`].
 * 3. Parsing was cancelled using a cancellation flag that was set by an
 *    earlier call to [`ts_parser_set_cancellation_flag`]. You can resume parsing
 *    from where the parser left out by calling [`ts_parser_parse`] again with
 *    the same arguments.
 * 4. Parsing was cancelled due to the progress callback returning true. This callback
 *    is passed in [`ts_parser_parse_with_options`] inside the [`TSParseOptions`] struct.
 *
 * [`read`]: TSInput::read
 * [`payload`]: TSInput::payload
 * [`encoding`]: TSInput::encoding
 * [`bytes_read`]: TSInput::read
 */
TSTree *ts_parser_parse(
  TSParser *self,
  const TSTree *old_tree,
  TSInput input
);

/**
 * Use the parser to parse some source code and create a syntax tree, with some options.
 *
 * See [`ts_parser_parse`] for more details.
 *
 * See [`TSParseOptions`] for more details on the options.
 */
TSTree* ts_parser_parse_with_options(
  TSParser *self,
  const TSTree *old_tree,
  TSInput input,
  TSParseOptions parse_options
);

/**
 * Use the parser to parse some source code stored in one contiguous buffer.
 * The first two parameters are the same as in the [`ts_parser_parse`] function
 * above. The second two parameters indicate the location of the buffer and its
 * length in bytes.
 */
TSTree *ts_parser_parse_string(
  TSParser *self,
  const TSTree *old_tree,
  const char *string,
  uint32_t length
);

/**
 * Use the parser to parse some source code stored in one contiguous buffer with
 * a given encoding. The first four parameters work the same as in the
 * [`ts_parser_parse_string`] method above. The final parameter indicates whether
 * the text is encoded as UTF8 or UTF16.
 */
TSTree *ts_parser_parse_string_encoding(
  TSParser *self,
  const TSTree *old_tree,
  const char *string,
  uint32_t length,
  TSInputEncoding encoding
);

/**
 * Instruct the parser to start the next parse from the beginning.
 *
 * If the parser previously failed because of a timeout or a cancellation, then
 * by default, it will resume where it left off on the next call to
 * [`ts_parser_parse`] or other parsing functions. If you don't want to resume,
 * and instead intend to use this parser to parse some other document, you must
 * call [`ts_parser_reset`] first.
 */
void ts_parser_reset(TSParser *self);

/**
 * @deprecated use [`ts_parser_parse_with_options`] and pass in a callback instead, this will be removed in 0.26.
 *
 * Set the maximum duration in microseconds that parsing should be allowed to
 * take before halting.
 *
 * If parsing takes longer than this, it will halt early, returning NULL.
 * See [`ts_parser_parse`] for more information.
 */
void ts_parser_set_timeout_micros(TSParser *self, uint64_t timeout_micros);

/**
 * @deprecated use [`ts_parser_parse_with_options`] and pass in a callback instead, this will be removed in 0.26.
 *
 * Get the duration in microseconds that parsing is allowed to take.
 */
uint64_t ts_parser_timeout_micros(const TSParser *self);

/**
 * @deprecated use [`ts_parser_parse_with_options`] and pass in a callback instead, this will be removed in 0.26.
 *
 * Set the parser's current cancellation flag pointer.
 *
 * If a non-null pointer is assigned, then the parser will periodically read
 * from this pointer during parsing. If it reads a non-zero value, it will
 * halt early, returning NULL. See [`ts_parser_parse`] for more information.
 */
void ts_parser_set_cancellation_flag(TSParser *self, const size_t *flag);

/**
 * @deprecated use [`ts_parser_parse_with_options`] and pass in a callback instead, this will be removed in 0.26.
 *
 * Get the parser's current cancellation flag pointer.
 */
const size_t *ts_parser_cancellation_flag(const TSParser *self);

/**
 * Set the logger that a parser should use during parsing.
 *
 * The parser does not take ownership over the logger payload. If a logger was
 * previously assigned, the caller is responsible for releasing any memory
 * owned by the previous logger.
 */
void ts_parser_set_logger(TSParser *self, TSLogger logger);

/**
 * Get the parser's current logger.
 */
TSLogger ts_parser_logger(const TSParser *self);

/**
 * Set the file descriptor to which the parser should write debugging graphs
 * during parsing. The graphs are formatted in the DOT language. You may want
 * to pipe these graphs directly to a `dot(1)` process in order to generate
 * SVG output. You can turn off this logging by passing a negative number.
 */
void ts_parser_print_dot_graphs(TSParser *self, int fd);

/******************/
/* Section - Tree */
/******************/

/**
 * Create a shallow copy of the syntax tree. This is very fast.
 *
 * You need to copy a syntax tree in order to use it on more than one thread at
 * a time, as syntax trees are not thread safe.
 */
TSTree *ts_tree_copy(const TSTree *self);

/**
 * Delete the syntax tree, freeing all of the memory that it used.
 */
void ts_tree_delete(TSTree *self);

/**
 * Get the root node of the syntax tree.
 */
TSNode ts_tree_root_node(const TSTree *self);

/**
 * Get the root node of the syntax tree, but with its position
 * shifted forward by the given offset.
 */
TSNode ts_tree_root_node_with_offset(
  const TSTree *self,
  uint32_t offset_bytes,
  TSPoint offset_extent
);

/**
 * Get the language that was used to parse the syntax tree.
 */
const TSLanguage *ts_tree_language(const TSTree *self);

/**
 * Get the array of included ranges that was used to parse the syntax tree.
 *
 * The returned pointer must be freed by the caller.
 */
TSRange *ts_tree_included_ranges(const TSTree *self, uint32_t *length);

/**
 * Edit the syntax tree to keep it in sync with source code that has been
 * edited.
 *
 * You must describe the edit both in terms of byte offsets and in terms of
 * (row, column) coordinates.
 */
void ts_tree_edit(TSTree *self, const TSInputEdit *edit);

/**
 * Compare an old edited syntax tree to a new syntax tree representing the same
 * document, returning an array of ranges whose syntactic structure has changed.
 *
 * For this to work correctly, the old syntax tree must have been edited such
 * that its ranges match up to the new tree. Generally, you'll want to call
 * this function right after calling one of the [`ts_parser_parse`] functions.
 * You need to pass the old tree that was passed to parse, as well as the new
 * tree that was returned from that function.
 *
 * The returned ranges indicate areas where the hierarchical structure of syntax
 * nodes (from root to leaf) has changed between the old and new trees. Characters
 * outside these ranges have identical ancestor nodes in both trees.
 *
 * Note that the returned ranges may be slightly larger than the exact changed areas,
 * but Tree-sitter attempts to make them as small as possible.
 *
 * The returned array is allocated using `malloc` and the caller is responsible
 * for freeing it using `free`. The length of the array will be written to the
 * given `length` pointer.
 */
TSRange *ts_tree_get_changed_ranges(
  const TSTree *old_tree,
  const TSTree *new_tree,
  uint32_t *length
);

/**
 * Write a DOT graph describing the syntax tree to the given file.
 */
void ts_tree_print_dot_graph(const TSTree *self, int file_descriptor);

/******************/
/* Section - Node */
/******************/

/**
 * Get the node's type as a null-terminated string.
 */
const char *ts_node_type(TSNode self);

/**
 * Get the node's type as a numerical id.
 */
TSSymbol ts_node_symbol(TSNode self);

/**
 * Get the node's language.
 */
const TSLanguage *ts_node_language(TSNode self);

/**
 * Get the node's type as it appears in the grammar ignoring aliases as a
 * null-terminated string.
 */
const char *ts_node_grammar_type(TSNode self);

/**
 * Get the node's type as a numerical id as it appears in the grammar ignoring
 * aliases. This should be used in [`ts_language_next_state`] instead of
 * [`ts_node_symbol`].
 */
TSSymbol ts_node_grammar_symbol(TSNode self);

/**
 * Get the node's start byte.
 */
uint32_t ts_node_start_byte(TSNode self);

/**
 * Get the node's start position in terms of rows and columns.
 */
TSPoint ts_node_start_point(TSNode self);

/**
 * Get the node's end byte.
 */
uint32_t ts_node_end_byte(TSNode self);

/**
 * Get the node's end position in terms of rows and columns.
 */
TSPoint ts_node_end_point(TSNode self);

/**
 * Get an S-expression representing the node as a string.
 *
 * This string is allocated with `malloc` and the caller is responsible for
 * freeing it using `free`.
 */
char *ts_node_string(TSNode self);

/**
 * Check if the node is null. Functions like [`ts_node_child`] and
 * [`ts_node_next_sibling`] will return a null node to indicate that no such node
 * was found.
 */
bool ts_node_is_null(TSNode self);

/**
 * Check if the node is *named*. Named nodes correspond to named rules in the
 * grammar, whereas *anonymous* nodes correspond to string literals in the
 * grammar.
 */
bool ts_node_is_named(TSNode self);

/**
 * Check if the node is *missing*. Missing nodes are inserted by the parser in
 * order to recover from certain kinds of syntax errors.
 */
bool ts_node_is_missing(TSNode self);

/**
 * Check if the node is *extra*. Extra nodes represent things like comments,
 * which are not required the grammar, but can appear anywhere.
 */
bool ts_node_is_extra(TSNode self);

/**
 * Check if a syntax node has been edited.
 */
bool ts_node_has_changes(TSNode self);

/**
 * Check if the node is a syntax error or contains any syntax errors.
 */
bool ts_node_has_error(TSNode self);

/**
 * Check if the node is a syntax error.
*/
bool ts_node_is_error(TSNode self);

/**
 * Get this node's parse state.
*/
TSStateId ts_node_parse_state(TSNode self);

/**
 * Get the parse state after this node.
*/
TSStateId ts_node_next_parse_state(TSNode self);

/**
 * Get the node's immediate parent.
 * Prefer [`ts_node_child_with_descendant`] for
 * iterating over the node's ancestors.
 */
TSNode ts_node_parent(TSNode self);

/**
 * Get the node that contains `descendant`.
 *
 * Note that this can return `descendant` itself.
 */
TSNode ts_node_child_with_descendant(TSNode self, TSNode descendant);

/**
 * Get the node's child at the given index, where zero represents the first
 * child.
 */
TSNode ts_node_child(TSNode self, uint32_t child_index);

/**
 * Get the field name for node's child at the given index, where zero represents
 * the first child. Returns NULL, if no field is found.
 */
const char *ts_node_field_name_for_child(TSNode self, uint32_t child_index);

/**
 * Get the field name for node's named child at the given index, where zero
 * represents the first named child. Returns NULL, if no field is found.
 */
const char *ts_node_field_name_for_named_child(TSNode self, uint32_t named_child_index);

/**
 * Get the node's number of children.
 */
uint32_t ts_node_child_count(TSNode self);

/**
 * Get the node's *named* child at the given index.
 *
 * See also [`ts_node_is_named`].
 */
TSNode ts_node_named_child(TSNode self, uint32_t child_index);

/**
 * Get the node's number of *named* children.
 *
 * See also [`ts_node_is_named`].
 */
uint32_t ts_node_named_child_count(TSNode self);

/**
 * Get the node's child with the given field name.
 */
TSNode ts_node_child_by_field_name(
  TSNode self,
  const char *name,
  uint32_t name_length
);

/**
 * Get the node's child with the given numerical field id.
 *
 * You can convert a field name to an id using the
 * [`ts_language_field_id_for_name`] function.
 */
TSNode ts_node_child_by_field_id(TSNode self, TSFieldId field_id);

/**
 * Get the node's next / previous sibling.
 */
TSNode ts_node_next_sibling(TSNode self);
TSNode ts_node_prev_sibling(TSNode self);

/**
 * Get the node's next / previous *named* sibling.
 */
TSNode ts_node_next_named_sibling(TSNode self);
TSNode ts_node_prev_named_sibling(TSNode self);

/**
 * Get the node's first child that contains or starts after the given byte offset.
 */
TSNode ts_node_first_child_for_byte(TSNode self, uint32_t byte);

/**
 * Get the node's first named child that contains or starts after the given byte offset.
 */
TSNode ts_node_first_named_child_for_byte(TSNode self, uint32_t byte);

/**
 * Get the node's number of descendants, including one for the node itself.
 */
uint32_t ts_node_descendant_count(TSNode self);

/**
 * Get the smallest node within this node that spans the given range of bytes
 * or (row, column) positions.
 */
TSNode ts_node_descendant_for_byte_range(TSNode self, uint32_t start, uint32_t end);
TSNode ts_node_descendant_for_point_range(TSNode self, TSPoint start, TSPoint end);

/**
 * Get the smallest named node within this node that spans the given range of
 * bytes or (row, column) positions.
 */
TSNode ts_node_named_descendant_for_byte_range(TSNode self, uint32_t start, uint32_t end);
TSNode ts_node_named_descendant_for_point_range(TSNode self, TSPoint start, TSPoint end);

/**
 * Edit the node to keep it in-sync with source code that has been edited.
 *
 * This function is only rarely needed. When you edit a syntax tree with the
 * [`ts_tree_edit`] function, all of the nodes that you retrieve from the tree
 * afterward will already reflect the edit. You only need to use [`ts_node_edit`]
 * when you have a [`TSNode`] instance that you want to keep and continue to use
 * after an edit.
 */
void ts_node_edit(TSNode *self, const TSInputEdit *edit);

/**
 * Check if two nodes are identical.
 */
bool ts_node_eq(TSNode self, TSNode other);

/************************/
/* Section - TreeCursor */
/************************/

/**
 * Create a new tree cursor starting from the given node.
 *
 * A tree cursor allows you to walk a syntax tree more efficiently than is
 * possible using the [`TSNode`] functions. It is a mutable object that is always
 * on a certain syntax node, and can be moved imperatively to different nodes.
 *
 * Note that the given node is considered the root of the cursor,
 * and the cursor cannot walk outside this node.
 */
TSTreeCursor ts_tree_cursor_new(TSNode node);

/**
 * Delete a tree cursor, freeing all of the memory that it used.
 */
void ts_tree_cursor_delete(TSTreeCursor *self);

/**
 * Re-initialize a tree cursor to start at the original node that the cursor was
 * constructed with.
 */
void ts_tree_cursor_reset(TSTreeCursor *self, TSNode node);

/**
 * Re-initialize a tree cursor to the same position as another cursor.
 *
 * Unlike [`ts_tree_cursor_reset`], this will not lose parent information and
 * allows reusing already created cursors.
*/
void ts_tree_cursor_reset_to(TSTreeCursor *dst, const TSTreeCursor *src);

/**
 * Get the tree cursor's current node.
 */
TSNode ts_tree_cursor_current_node(const TSTreeCursor *self);

/**
 * Get the field name of the tree cursor's current node.
 *
 * This returns `NULL` if the current node doesn't have a field.
 * See also [`ts_node_child_by_field_name`].
 */
const char *ts_tree_cursor_current_field_name(const TSTreeCursor *self);

/**
 * Get the field id of the tree cursor's current node.
 *
 * This returns zero if the current node doesn't have a field.
 * See also [`ts_node_child_by_field_id`], [`ts_language_field_id_for_name`].
 */
TSFieldId ts_tree_cursor_current_field_id(const TSTreeCursor *self);

/**
 * Move the cursor to the parent of its current node.
 *
 * This returns `true` if the cursor successfully moved, and returns `false`
 * if there was no parent node (the cursor was already on the root node).
 *
 * Note that the node the cursor was constructed with is considered the root
 * of the cursor, and the cursor cannot walk outside this node.
 */
bool ts_tree_cursor_goto_parent(TSTreeCursor *self);

/**
 * Move the cursor to the next sibling of its current node.
 *
 * This returns `true` if the cursor successfully moved, and returns `false`
 * if there was no next sibling node.
 *
 * Note that the node the cursor was constructed with is considered the root
 * of the cursor, and the cursor cannot walk outside this node.
 */
bool ts_tree_cursor_goto_next_sibling(TSTreeCursor *self);

/**
 * Move the cursor to the previous sibling of its current node.
 *
 * This returns `true` if the cursor successfully moved, and returns `false` if
 * there was no previous sibling node.
 *
 * Note, that this function may be slower than
 * [`ts_tree_cursor_goto_next_sibling`] due to how node positions are stored. In
 * the worst case, this will need to iterate through all the children up to the
 * previous sibling node to recalculate its position. Also note that the node the cursor
 * was constructed with is considered the root of the cursor, and the cursor cannot
 * walk outside this node.
 */
bool ts_tree_cursor_goto_previous_sibling(TSTreeCursor *self);

/**
 * Move the cursor to the first child of its current node.
 *
 * This returns `true` if the cursor successfully moved, and returns `false`
 * if there were no children.
 */
bool ts_tree_cursor_goto_first_child(TSTreeCursor *self);

/**
 * Move the cursor to the last child of its current node.
 *
 * This returns `true` if the cursor successfully moved, and returns `false` if
 * there were no children.
 *
 * Note that this function may be slower than [`ts_tree_cursor_goto_first_child`]
 * because it needs to iterate through all the children to compute the child's
 * position.
 */
bool ts_tree_cursor_goto_last_child(TSTreeCursor *self);

/**
 * Move the cursor to the node that is the nth descendant of
 * the original node that the cursor was constructed with, where
 * zero represents the original node itself.
 */
void ts_tree_cursor_goto_descendant(TSTreeCursor *self, uint32_t goal_descendant_index);

/**
 * Get the index of the cursor's current node out of all of the
 * descendants of the original node that the cursor was constructed with.
 */
uint32_t ts_tree_cursor_current_descendant_index(const TSTreeCursor *self);

/**
 * Get the depth of the cursor's current node relative to the original
 * node that the cursor was constructed with.
 */
uint32_t ts_tree_cursor_current_depth(const TSTreeCursor *self);

/**
 * Move the cursor to the first child of its current node that contains or starts after
 * the given byte offset or point.
 *
 * This returns the index of the child node if one was found, and returns -1
 * if no such child was found.
 */
int64_t ts_tree_cursor_goto_first_child_for_byte(TSTreeCursor *self, uint32_t goal_byte);
int64_t ts_tree_cursor_goto_first_child_for_point(TSTreeCursor *self, TSPoint goal_point);

TSTreeCursor ts_tree_cursor_copy(const TSTreeCursor *cursor);

/*******************/
/* Section - Query */
/*******************/

/**
 * Create a new query from a string containing one or more S-expression
 * patterns. The query is associated with a particular language, and can
 * only be run on syntax nodes parsed with that language.
 *
 * If all of the given patterns are valid, this returns a [`TSQuery`].
 * If a pattern is invalid, this returns `NULL`, and provides two pieces
 * of information about the problem:
 * 1. The byte offset of the error is written to the `error_offset` parameter.
 * 2. The type of error is written to the `error_type` parameter.
 */
TSQuery *ts_query_new(
  const TSLanguage *language,
  const char *source,
  uint32_t source_len,
  uint32_t *error_offset,
  TSQueryError *error_type
);

/**
 * Delete a query, freeing all of the memory that it used.
 */
void ts_query_delete(TSQuery *self);

/**
 * Get the number of patterns, captures, or string literals in the query.
 */
uint32_t ts_query_pattern_count(const TSQuery *self);
uint32_t ts_query_capture_count(const TSQuery *self);
uint32_t ts_query_string_count(const TSQuery *self);

/**
 * Get the byte offset where the given pattern starts in the query's source.
 *
 * This can be useful when combining queries by concatenating their source
 * code strings.
 */
uint32_t ts_query_start_byte_for_pattern(const TSQuery *self, uint32_t pattern_index);

/**
 * Get the byte offset where the given pattern ends in the query's source.
 *
 * This can be useful when combining queries by concatenating their source
 * code strings.
 */
uint32_t ts_query_end_byte_for_pattern(const TSQuery *self, uint32_t pattern_index);

/**
 * Get all of the predicates for the given pattern in the query.
 *
 * The predicates are represented as a single array of steps. There are three
 * types of steps in this array, which correspond to the three legal values for
 * the `type` field:
 * - `TSQueryPredicateStepTypeCapture` - Steps with this type represent names
 *    of captures. Their `value_id` can be used with the
 *   [`ts_query_capture_name_for_id`] function to obtain the name of the capture.
 * - `TSQueryPredicateStepTypeString` - Steps with this type represent literal
 *    strings. Their `value_id` can be used with the
 *    [`ts_query_string_value_for_id`] function to obtain their string value.
 * - `TSQueryPredicateStepTypeDone` - Steps with this type are *sentinels*
 *    that represent the end of an individual predicate. If a pattern has two
 *    predicates, then there will be two steps with this `type` in the array.
 */
const TSQueryPredicateStep *ts_query_predicates_for_pattern(
  const TSQuery *self,
  uint32_t pattern_index,
  uint32_t *step_count
);

/*
 * Check if the given pattern in the query has a single root node.
 */
bool ts_query_is_pattern_rooted(const TSQuery *self, uint32_t pattern_index);

/*
 * Check if the given pattern in the query is 'non local'.
 *
 * A non-local pattern has multiple root nodes and can match within a
 * repeating sequence of nodes, as specified by the grammar. Non-local
 * patterns disable certain optimizations that would otherwise be possible
 * when executing a query on a specific range of a syntax tree.
 */
bool ts_query_is_pattern_non_local(const TSQuery *self, uint32_t pattern_index);

/*
 * Check if a given pattern is guaranteed to match once a given step is reached.
 * The step is specified by its byte offset in the query's source code.
 */
bool ts_query_is_pattern_guaranteed_at_step(const TSQuery *self, uint32_t byte_offset);

/**
 * Get the name and length of one of the query's captures, or one of the
 * query's string literals. Each capture and string is associated with a
 * numeric id based on the order that it appeared in the query's source.
 */
const char *ts_query_capture_name_for_id(
  const TSQuery *self,
  uint32_t index,
  uint32_t *length
);

/**
 * Get the quantifier of the query's captures. Each capture is * associated
 * with a numeric id based on the order that it appeared in the query's source.
 */
TSQuantifier ts_query_capture_quantifier_for_id(
  const TSQuery *self,
  uint32_t pattern_index,
  uint32_t capture_index
);

const char *ts_query_string_value_for_id(
  const TSQuery *self,
  uint32_t index,
  uint32_t *length
);

/**
 * Disable a certain capture within a query.
 *
 * This prevents the capture from being returned in matches, and also avoids
 * any resource usage associated with recording the capture. Currently, there
 * is no way to undo this.
 */
void ts_query_disable_capture(TSQuery *self, const char *name, uint32_t length);

/**
 * Disable a certain pattern within a query.
 *
 * This prevents the pattern from matching and removes most of the overhead
 * associated with the pattern. Currently, there is no way to undo this.
 */
void ts_query_disable_pattern(TSQuery *self, uint32_t pattern_index);

/**
 * Create a new cursor for executing a given query.
 *
 * The cursor stores the state that is needed to iteratively search
 * for matches. To use the query cursor, first call [`ts_query_cursor_exec`]
 * to start running a given query on a given syntax node. Then, there are
 * two options for consuming the results of the query:
 * 1. Repeatedly call [`ts_query_cursor_next_match`] to iterate over all of the
 *    *matches* in the order that they were found. Each match contains the
 *    index of the pattern that matched, and an array of captures. Because
 *    multiple patterns can match the same set of nodes, one match may contain
 *    captures that appear *before* some of the captures from a previous match.
 * 2. Repeatedly call [`ts_query_cursor_next_capture`] to iterate over all of the
 *    individual *captures* in the order that they appear. This is useful if
 *    don't care about which pattern matched, and just want a single ordered
 *    sequence of captures.
 *
 * If you don't care about consuming all of the results, you can stop calling
 * [`ts_query_cursor_next_match`] or [`ts_query_cursor_next_capture`] at any point.
 *  You can then start executing another query on another node by calling
 *  [`ts_query_cursor_exec`] again.
 */
TSQueryCursor *ts_query_cursor_new(void);

/**
 * Delete a query cursor, freeing all of the memory that it used.
 */
void ts_query_cursor_delete(TSQueryCursor *self);

/**
 * Start running a given query on a given node.
 */
void ts_query_cursor_exec(TSQueryCursor *self, const TSQuery *query, TSNode node);

/**
 * Start running a given query on a given node, with some options.
 */
void ts_query_cursor_exec_with_options(
  TSQueryCursor *self,
  const TSQuery *query,
  TSNode node,
  const TSQueryCursorOptions *query_options
);

/**
 * Manage the maximum number of in-progress matches allowed by this query
 * cursor.
 *
 * Query cursors have an optional maximum capacity for storing lists of
 * in-progress captures. If this capacity is exceeded, then the
 * earliest-starting match will silently be dropped to make room for further
 * matches. This maximum capacity is optional — by default, query cursors allow
 * any number of pending matches, dynamically allocating new space for them as
 * needed as the query is executed.
 */
bool ts_query_cursor_did_exceed_match_limit(const TSQueryCursor *self);
uint32_t ts_query_cursor_match_limit(const TSQueryCursor *self);
void ts_query_cursor_set_match_limit(TSQueryCursor *self, uint32_t limit);

/**
 * @deprecated use [`ts_query_cursor_exec_with_options`] and pass in a callback instead, this will be removed in 0.26.
 *
 * Set the maximum duration in microseconds that query execution should be allowed to
 * take before halting.
 *
 * If query execution takes longer than this, it will halt early, returning NULL.
 * See [`ts_query_cursor_next_match`] or [`ts_query_cursor_next_capture`] for more information.
 */
void ts_query_cursor_set_timeout_micros(TSQueryCursor *self, uint64_t timeout_micros);

/**
 * @deprecated use [`ts_query_cursor_exec_with_options`] and pass in a callback instead, this will be removed in 0.26.
 *
 * Get the duration in microseconds that query execution is allowed to take.
 *
 * This is set via [`ts_query_cursor_set_timeout_micros`].
 */
uint64_t ts_query_cursor_timeout_micros(const TSQueryCursor *self);

/**
 * Set the range of bytes in which the query will be executed.
 *
 * The query cursor will return matches that intersect with the given point range.
 * This means that a match may be returned even if some of its captures fall
 * outside the specified range, as long as at least part of the match
 * overlaps with the range.
 *
 * For example, if a query pattern matches a node that spans a larger area
 * than the specified range, but part of that node intersects with the range,
 * the entire match will be returned.
 *
 * This will return `false` if the start byte is greater than the end byte, otherwise
 * it will return `true`.
 */
bool ts_query_cursor_set_byte_range(TSQueryCursor *self, uint32_t start_byte, uint32_t end_byte);

/**
 * Set the range of (row, column) positions in which the query will be executed.
 *
 * The query cursor will return matches that intersect with the given point range.
 * This means that a match may be returned even if some of its captures fall
 * outside the specified range, as long as at least part of the match
 * overlaps with the range.
 *
 * For example, if a query pattern matches a node that spans a larger area
 * than the specified range, but part of that node intersects with the range,
 * the entire match will be returned.
 *
 * This will return `false` if the start point is greater than the end point, otherwise
 * it will return `true`.
 */
bool ts_query_cursor_set_point_range(TSQueryCursor *self, TSPoint start_point, TSPoint end_point);

/**
 * Advance to the next match of the currently running query.
 *
 * If there is a match, write it to `*match` and return `true`.
 * Otherwise, return `false`.
 */
bool ts_query_cursor_next_match(TSQueryCursor *self, TSQueryMatch *match);
void ts_query_cursor_remove_match(TSQueryCursor *self, uint32_t match_id);

/**
 * Advance to the next capture of the currently running query.
 *
 * If there is a capture, write its match to `*match` and its index within
 * the match's capture list to `*capture_index`. Otherwise, return `false`.
 */
bool ts_query_cursor_next_capture(
  TSQueryCursor *self,
  TSQueryMatch *match,
  uint32_t *capture_index
);

/**
 * Set the maximum start depth for a query cursor.
 *
 * This prevents cursors from exploring children nodes at a certain depth.
 * Note if a pattern includes many children, then they will still be checked.
 *
 * The zero max start depth value can be used as a special behavior and
 * it helps to destructure a subtree by staying on a node and using captures
 * for interested parts. Note that the zero max start depth only limit a search
 * depth for a pattern's root node but other nodes that are parts of the pattern
 * may be searched at any depth what defined by the pattern structure.
 *
 * Set to `UINT32_MAX` to remove the maximum start depth.
 */
void ts_query_cursor_set_max_start_depth(TSQueryCursor *self, uint32_t max_start_depth);

/**********************/
/* Section - Language */
/**********************/

/**
 * Get another reference to the given language.
 */
const TSLanguage *ts_language_copy(const TSLanguage *self);

/**
 * Free any dynamically-allocated resources for this language, if
 * this is the last reference.
 */
void ts_language_delete(const TSLanguage *self);

/**
 * Get the number of distinct node types in the language.
 */
uint32_t ts_language_symbol_count(const TSLanguage *self);

/**
 * Get the number of valid states in this language.
*/
uint32_t ts_language_state_count(const TSLanguage *self);

/**
 * Get the numerical id for the given node type string.
 */
TSSymbol ts_language_symbol_for_name(
  const TSLanguage *self,
  const char *string,
  uint32_t length,
  bool is_named
);

/**
 * Get the number of distinct field names in the language.
 */
uint32_t ts_language_field_count(const TSLanguage *self);

/**
 * Get the field name string for the given numerical id.
 */
const char *ts_language_field_name_for_id(const TSLanguage *self, TSFieldId id);

/**
 * Get the numerical id for the given field name string.
 */
TSFieldId ts_language_field_id_for_name(const TSLanguage *self, const char *name, uint32_t name_length);

/**
 * Get a list of all supertype symbols for the language.
*/
const TSSymbol *ts_language_supertypes(const TSLanguage *self, uint32_t *length);

/**
 * Get a list of all subtype symbol ids for a given supertype symbol.
 *
 * See [`ts_language_supertypes`] for fetching all supertype symbols.
 */
const TSSymbol *ts_language_subtypes(
  const TSLanguage *self,
  TSSymbol supertype,
  uint32_t *length
);

/**
 * Get a node type string for the given numerical id.
 */
const char *ts_language_symbol_name(const TSLanguage *self, TSSymbol symbol);

/**
 * Check whether the given node type id belongs to named nodes, anonymous nodes,
 * or a hidden nodes.
 *
 * See also [`ts_node_is_named`]. Hidden nodes are never returned from the API.
 */
TSSymbolType ts_language_symbol_type(const TSLanguage *self, TSSymbol symbol);

/**
 * @deprecated use [`ts_language_abi_version`] instead, this will be removed in 0.26.
 *
 * Get the ABI version number for this language. This version number is used
 * to ensure that languages were generated by a compatible version of
 * Tree-sitter.
 *
 * See also [`ts_parser_set_language`].
 */
uint32_t ts_language_version(const TSLanguage *self);

/**
 * Get the ABI version number for this language. This version number is used
 * to ensure that languages were generated by a compatible version of
 * Tree-sitter.
 *
 * See also [`ts_parser_set_language`].
 */
uint32_t ts_language_abi_version(const TSLanguage *self);

/**
 * Get the metadata for this language. This information is generated by the
 * CLI, and relies on the language author providing the correct metadata in
 * the language's `tree-sitter.json` file.
 *
 * See also [`TSMetadata`].
 */
const TSLanguageMetadata *ts_language_metadata(const TSLanguage *self);

/**
 * Get the next parse state. Combine this with lookahead iterators to generate
 * completion suggestions or valid symbols in error nodes. Use
 * [`ts_node_grammar_symbol`] for valid symbols.
*/
TSStateId ts_language_next_state(const TSLanguage *self, TSStateId state, TSSymbol symbol);

/**
 * Get the name of this language. This returns `NULL` in older parsers.
 */
const char *ts_language_name(const TSLanguage *self);

/********************************/
/* Section - Lookahead Iterator */
/********************************/

/**
 * Create a new lookahead iterator for the given language and parse state.
 *
 * This returns `NULL` if state is invalid for the language.
 *
 * Repeatedly using [`ts_lookahead_iterator_next`] and
 * [`ts_lookahead_iterator_current_symbol`] will generate valid symbols in the
 * given parse state. Newly created lookahead iterators will contain the `ERROR`
 * symbol.
 *
 * Lookahead iterators can be useful to generate suggestions and improve syntax
 * error diagnostics. To get symbols valid in an ERROR node, use the lookahead
 * iterator on its first leaf node state. For `MISSING` nodes, a lookahead
 * iterator created on the previous non-extra leaf node may be appropriate.
*/
TSLookaheadIterator *ts_lookahead_iterator_new(const TSLanguage *self, TSStateId state);

/**
 * Delete a lookahead iterator freeing all the memory used.
*/
void ts_lookahead_iterator_delete(TSLookaheadIterator *self);

/**
 * Reset the lookahead iterator to another state.
 *
 * This returns `true` if the iterator was reset to the given state and `false`
 * otherwise.
*/
bool ts_lookahead_iterator_reset_state(TSLookaheadIterator *self, TSStateId state);

/**
 * Reset the lookahead iterator.
 *
 * This returns `true` if the language was set successfully and `false`
 * otherwise.
*/
bool ts_lookahead_iterator_reset(TSLookaheadIterator *self, const TSLanguage *language, TSStateId state);

/**
 * Get the current language of the lookahead iterator.
*/
const TSLanguage *ts_lookahead_iterator_language(const TSLookaheadIterator *self);

/**
 * Advance the lookahead iterator to the next symbol.
 *
 * This returns `true` if there is a new symbol and `false` otherwise.
*/
bool ts_lookahead_iterator_next(TSLookaheadIterator *self);

/**
 * Get the current symbol of the lookahead iterator;
*/
TSSymbol ts_lookahead_iterator_current_symbol(const TSLookaheadIterator *self);

/**
 * Get the current symbol type of the lookahead iterator as a null terminated
 * string.
*/
const char *ts_lookahead_iterator_current_symbol_name(const TSLookaheadIterator *self);

/*************************************/
/* Section - WebAssembly Integration */
/************************************/

typedef struct wasm_engine_t TSWasmEngine;
typedef struct TSWasmStore TSWasmStore;

typedef enum {
  TSWasmErrorKindNone = 0,
  TSWasmErrorKindParse,
  TSWasmErrorKindCompile,
  TSWasmErrorKindInstantiate,
  TSWasmErrorKindAllocate,
} TSWasmErrorKind;

typedef struct {
  TSWasmErrorKind kind;
  char *message;
} TSWasmError;

/**
 * Create a Wasm store.
 */
TSWasmStore *ts_wasm_store_new(
  TSWasmEngine *engine,
  TSWasmError *error
);

/**
 * Free the memory associated with the given Wasm store.
 */
void ts_wasm_store_delete(TSWasmStore *);

/**
 * Create a language from a buffer of Wasm. The resulting language behaves
 * like any other Tree-sitter language, except that in order to use it with
 * a parser, that parser must have a Wasm store. Note that the language
 * can be used with any Wasm store, it doesn't need to be the same store that
 * was used to originally load it.
 */
const TSLanguage *ts_wasm_store_load_language(
  TSWasmStore *,
  const char *name,
  const char *wasm,
  uint32_t wasm_len,
  TSWasmError *error
);

/**
 * Get the number of languages instantiated in the given wasm store.
 */
size_t ts_wasm_store_language_count(const TSWasmStore *);

/**
 * Check if the language came from a Wasm module. If so, then in order to use
 * this language with a Parser, that parser must have a Wasm store assigned.
 */
bool ts_language_is_wasm(const TSLanguage *);

/**
 * Assign the given Wasm store to the parser. A parser must have a Wasm store
 * in order to use Wasm languages.
 */
void ts_parser_set_wasm_store(TSParser *, TSWasmStore *);

/**
 * Remove the parser's current Wasm store and return it. This returns NULL if
 * the parser doesn't have a Wasm store.
 */
TSWasmStore *ts_parser_take_wasm_store(TSParser *);

/**********************************/
/* Section - Global Configuration */
/**********************************/

/**
 * Set the allocation functions used by the library.
 *
 * By default, Tree-sitter uses the standard libc allocation functions,
 * but aborts the process when an allocation fails. This function lets
 * you supply alternative allocation functions at runtime.
 *
 * If you pass `NULL` for any parameter, Tree-sitter will switch back to
 * its default implementation of that function.
 *
 * If you call this function after the library has already been used, then
 * you must ensure that either:
 *  1. All the existing objects have been freed.
 *  2. The new allocator shares its state with the old one, so it is capable
 *     of freeing memory that was allocated by the old allocator.
 */
void ts_set_allocator(
  void *(*new_malloc)(size_t),
	void *(*new_calloc)(size_t, size_t),
	void *(*new_realloc)(void *, size_t),
	void (*new_free)(void *)
);

#ifdef __cplusplus
}
#endif

#ifndef TREE_SITTER_HIDE_SYMBOLS
#if defined(__GNUC__) || defined(__clang__)
#pragma GCC visibility pop
#endif
#endif

#endif  // TREE_SITTER_API_H_



================================================
FILE: lib/src/alloc.c
================================================
#include "alloc.h"
#include "tree_sitter/api.h"
#include <stdlib.h>

static void *ts_malloc_default(size_t size) {
  void *result = malloc(size);
  if (size > 0 && !result) {
    fprintf(stderr, "tree-sitter failed to allocate %zu bytes", size);
    abort();
  }
  return result;
}

static void *ts_calloc_default(size_t count, size_t size) {
  void *result = calloc(count, size);
  if (count > 0 && !result) {
    fprintf(stderr, "tree-sitter failed to allocate %zu bytes", count * size);
    abort();
  }
  return result;
}

static void *ts_realloc_default(void *buffer, size_t size) {
  void *result = realloc(buffer, size);
  if (size > 0 && !result) {
    fprintf(stderr, "tree-sitter failed to reallocate %zu bytes", size);
    abort();
  }
  return result;
}

// Allow clients to override allocation functions dynamically
TS_PUBLIC void *(*ts_current_malloc)(size_t) = ts_malloc_default;
TS_PUBLIC void *(*ts_current_calloc)(size_t, size_t) = ts_calloc_default;
TS_PUBLIC void *(*ts_current_realloc)(void *, size_t) = ts_realloc_default;
TS_PUBLIC void (*ts_current_free)(void *) = free;

void ts_set_allocator(
  void *(*new_malloc)(size_t size),
  void *(*new_calloc)(size_t count, size_t size),
  void *(*new_realloc)(void *ptr, size_t size),
  void (*new_free)(void *ptr)
) {
  ts_current_malloc = new_malloc ? new_malloc : ts_malloc_default;
  ts_current_calloc = new_calloc ? new_calloc : ts_calloc_default;
  ts_current_realloc = new_realloc ? new_realloc : ts_realloc_default;
  ts_current_free = new_free ? new_free : free;
}



================================================
FILE: lib/src/alloc.h
================================================
#ifndef TREE_SITTER_ALLOC_H_
#define TREE_SITTER_ALLOC_H_

#ifdef __cplusplus
extern "C" {
#endif

#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>

#if defined(TREE_SITTER_HIDDEN_SYMBOLS) || defined(_WIN32)
#define TS_PUBLIC
#else
#define TS_PUBLIC __attribute__((visibility("default")))
#endif

TS_PUBLIC extern void *(*ts_current_malloc)(size_t size);
TS_PUBLIC extern void *(*ts_current_calloc)(size_t count, size_t size);
TS_PUBLIC extern void *(*ts_current_realloc)(void *ptr, size_t size);
TS_PUBLIC extern void (*ts_current_free)(void *ptr);

// Allow clients to override allocation functions
#ifndef ts_malloc
#define ts_malloc  ts_current_malloc
#endif
#ifndef ts_calloc
#define ts_calloc  ts_current_calloc
#endif
#ifndef ts_realloc
#define ts_realloc ts_current_realloc
#endif
#ifndef ts_free
#define ts_free    ts_current_free
#endif

#ifdef __cplusplus
}
#endif

#endif // TREE_SITTER_ALLOC_H_



================================================
FILE: lib/src/array.h
================================================
#ifndef TREE_SITTER_ARRAY_H_
#define TREE_SITTER_ARRAY_H_

#ifdef __cplusplus
extern "C" {
#endif

#include "./alloc.h"
#include "./ts_assert.h"

#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>

#ifdef _MSC_VER
#pragma warning(push)
#pragma warning(disable : 4101)
#elif defined(__GNUC__) || defined(__clang__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-variable"
#endif

#define Array(T)       \
  struct {             \
    T *contents;       \
    uint32_t size;     \
    uint32_t capacity; \
  }

/// Initialize an array.
#define array_init(self) \
  ((self)->size = 0, (self)->capacity = 0, (self)->contents = NULL)

/// Create an empty array.
#define array_new() \
  { NULL, 0, 0 }

/// Get a pointer to the element at a given `index` in the array.
#define array_get(self, _index) \
  (ts_assert((uint32_t)(_index) < (self)->size), &(self)->contents[_index])

/// Get a pointer to the first element in the array.
#define array_front(self) array_get(self, 0)

/// Get a pointer to the last element in the array.
#define array_back(self) array_get(self, (self)->size - 1)

/// Clear the array, setting its size to zero. Note that this does not free any
/// memory allocated for the array's contents.
#define array_clear(self) ((self)->size = 0)

/// Reserve `new_capacity` elements of space in the array. If `new_capacity` is
/// less than the array's current capacity, this function has no effect.
#define array_reserve(self, new_capacity) \
  _array__reserve((Array *)(self), array_elem_size(self), new_capacity)

/// Free any memory allocated for this array. Note that this does not free any
/// memory allocated for the array's contents.
#define array_delete(self) _array__delete((Array *)(self))

/// Push a new `element` onto the end of the array.
#define array_push(self, element)                            \
  (_array__grow((Array *)(self), 1, array_elem_size(self)), \
   (self)->contents[(self)->size++] = (element))

/// Increase the array's size by `count` elements.
/// New elements are zero-initialized.
#define array_grow_by(self, count) \
  do { \
    if ((count) == 0) break; \
    _array__grow((Array *)(self), count, array_elem_size(self)); \
    memset((self)->contents + (self)->size, 0, (count) * array_elem_size(self)); \
    (self)->size += (count); \
  } while (0)

/// Append all elements from one array to the end of another.
#define array_push_all(self, other)                                       \
  array_extend((self), (other)->size, (other)->contents)

/// Append `count` elements to the end of the array, reading their values from the
/// `contents` pointer.
#define array_extend(self, count, contents)                    \
  _array__splice(                                               \
    (Array *)(self), array_elem_size(self), (self)->size, \
    0, count,  contents                                        \
  )

/// Remove `old_count` elements from the array starting at the given `index`. At
/// the same index, insert `new_count` new elements, reading their values from the
/// `new_contents` pointer.
#define array_splice(self, _index, old_count, new_count, new_contents)  \
  _array__splice(                                                       \
    (Array *)(self), array_elem_size(self), _index,                \
    old_count, new_count, new_contents                                 \
  )

/// Insert one `element` into the array at the given `index`.
#define array_insert(self, _index, element) \
  _array__splice((Array *)(self), array_elem_size(self), _index, 0, 1, &(element))

/// Remove one element from the array at the given `index`.
#define array_erase(self, _index) \
  _array__erase((Array *)(self), array_elem_size(self), _index)

/// Pop the last element off the array, returning the element by value.
#define array_pop(self) ((self)->contents[--(self)->size])

/// Assign the contents of one array to another, reallocating if necessary.
#define array_assign(self, other) \
  _array__assign((Array *)(self), (const Array *)(other), array_elem_size(self))

/// Swap one array with another
#define array_swap(self, other) \
  _array__swap((Array *)(self), (Array *)(other))

/// Get the size of the array contents
#define array_elem_size(self) (sizeof *(self)->contents)

/// Search a sorted array for a given `needle` value, using the given `compare`
/// callback to determine the order.
///
/// If an existing element is found to be equal to `needle`, then the `index`
/// out-parameter is set to the existing value's index, and the `exists`
/// out-parameter is set to true. Otherwise, `index` is set to an index where
/// `needle` should be inserted in order to preserve the sorting, and `exists`
/// is set to false.
#define array_search_sorted_with(self, compare, needle, _index, _exists) \
  _array__search_sorted(self, 0, compare, , needle, _index, _exists)

/// Search a sorted array for a given `needle` value, using integer comparisons
/// of a given struct field (specified with a leading dot) to determine the order.
///
/// See also `array_search_sorted_with`.
#define array_search_sorted_by(self, field, needle, _index, _exists) \
  _array__search_sorted(self, 0, _compare_int, field, needle, _index, _exists)

/// Insert a given `value` into a sorted array, using the given `compare`
/// callback to determine the order.
#define array_insert_sorted_with(self, compare, value) \
  do { \
    unsigned _index, _exists; \
    array_search_sorted_with(self, compare, &(value), &_index, &_exists); \
    if (!_exists) array_insert(self, _index, value); \
  } while (0)

/// Insert a given `value` into a sorted array, using integer comparisons of
/// a given struct field (specified with a leading dot) to determine the order.
///
/// See also `array_search_sorted_by`.
#define array_insert_sorted_by(self, field, value) \
  do { \
    unsigned _index, _exists; \
    array_search_sorted_by(self, field, (value) field, &_index, &_exists); \
    if (!_exists) array_insert(self, _index, value); \
  } while (0)

// Private

typedef Array(void) Array;

/// This is not what you're looking for, see `array_delete`.
static inline void _array__delete(Array *self) {
  if (self->contents) {
    ts_free(self->contents);
    self->contents = NULL;
    self->size = 0;
    self->capacity = 0;
  }
}

/// This is not what you're looking for, see `array_erase`.
static inline void _array__erase(Array *self, size_t element_size,
                                uint32_t index) {
  ts_assert(index < self->size);
  char *contents = (char *)self->contents;
  memmove(contents + index * element_size, contents + (index + 1) * element_size,
          (self->size - index - 1) * element_size);
  self->size--;
}

/// This is not what you're looking for, see `array_reserve`.
static inline void _array__reserve(Array *self, size_t element_size, uint32_t new_capacity) {
  if (new_capacity > self->capacity) {
    if (self->contents) {
      self->contents = ts_realloc(self->contents, new_capacity * element_size);
    } else {
      self->contents = ts_malloc(new_capacity * element_size);
    }
    self->capacity = new_capacity;
  }
}

/// This is not what you're looking for, see `array_assign`.
static inline void _array__assign(Array *self, const Array *other, size_t element_size) {
  _array__reserve(self, element_size, other->size);
  self->size = other->size;
  memcpy(self->contents, other->contents, self->size * element_size);
}

/// This is not what you're looking for, see `array_swap`.
static inline void _array__swap(Array *self, Array *other) {
  Array swap = *other;
  *other = *self;
  *self = swap;
}

/// This is not what you're looking for, see `array_push` or `array_grow_by`.
static inline void _array__grow(Array *self, uint32_t count, size_t element_size) {
  uint32_t new_size = self->size + count;
  if (new_size > self->capacity) {
    uint32_t new_capacity = self->capacity * 2;
    if (new_capacity < 8) new_capacity = 8;
    if (new_capacity < new_size) new_capacity = new_size;
    _array__reserve(self, element_size, new_capacity);
  }
}

/// This is not what you're looking for, see `array_splice`.
static inline void _array__splice(Array *self, size_t element_size,
                                 uint32_t index, uint32_t old_count,
                                 uint32_t new_count, const void *elements) {
  uint32_t new_size = self->size + new_count - old_count;
  uint32_t old_end = index + old_count;
  uint32_t new_end = index + new_count;
  ts_assert(old_end <= self->size);

  _array__reserve(self, element_size, new_size);

  char *contents = (char *)self->contents;
  if (self->size > old_end) {
    memmove(
      contents + new_end * element_size,
      contents + old_end * element_size,
      (self->size - old_end) * element_size
    );
  }
  if (new_count > 0) {
    if (elements) {
      memcpy(
        (contents + index * element_size),
        elements,
        new_count * element_size
      );
    } else {
      memset(
        (contents + index * element_size),
        0,
        new_count * element_size
      );
    }
  }
  self->size += new_count - old_count;
}

/// A binary search routine, based on Rust's `std::slice::binary_search_by`.
/// This is not what you're looking for, see `array_search_sorted_with` or `array_search_sorted_by`.
#define _array__search_sorted(self, start, compare, suffix, needle, _index, _exists) \
  do { \
    *(_index) = start; \
    *(_exists) = false; \
    uint32_t size = (self)->size - *(_index); \
    if (size == 0) break; \
    int comparison; \
    while (size > 1) { \
      uint32_t half_size = size / 2; \
      uint32_t mid_index = *(_index) + half_size; \
      comparison = compare(&((self)->contents[mid_index] suffix), (needle)); \
      if (comparison <= 0) *(_index) = mid_index; \
      size -= half_size; \
    } \
    comparison = compare(&((self)->contents[*(_index)] suffix), (needle)); \
    if (comparison == 0) *(_exists) = true; \
    else if (comparison < 0) *(_index) += 1; \
  } while (0)

/// Helper macro for the `_sorted_by` routines below. This takes the left (existing)
/// parameter by reference in order to work with the generic sorting function above.
#define _compare_int(a, b) ((int)*(a) - (int)(b))

#ifdef _MSC_VER
#pragma warning(pop)
#elif defined(__GNUC__) || defined(__clang__)
#pragma GCC diagnostic pop
#endif

#ifdef __cplusplus
}
#endif

#endif  // TREE_SITTER_ARRAY_H_



================================================
FILE: lib/src/atomic.h
================================================
#ifndef TREE_SITTER_ATOMIC_H_
#define TREE_SITTER_ATOMIC_H_

#include <stddef.h>
#include <stdint.h>
#include <stdlib.h>

#ifdef __TINYC__

static inline size_t atomic_load(const volatile size_t *p) {
  return *p;
}

static inline uint32_t atomic_inc(volatile uint32_t *p) {
  *p += 1;
  return *p;
}

static inline uint32_t atomic_dec(volatile uint32_t *p) {
  *p-= 1;
  return *p;
}

#elif defined(_WIN32)

#include <windows.h>

static inline size_t atomic_load(const volatile size_t *p) {
  return *p;
}

static inline uint32_t atomic_inc(volatile uint32_t *p) {
  return InterlockedIncrement((long volatile *)p);
}

static inline uint32_t atomic_dec(volatile uint32_t *p) {
  return InterlockedDecrement((long volatile *)p);
}

#else

static inline size_t atomic_load(const volatile size_t *p) {
#ifdef __ATOMIC_RELAXED
  return __atomic_load_n(p, __ATOMIC_RELAXED);
#else
  return __sync_fetch_and_add((volatile size_t *)p, 0);
#endif
}

static inline uint32_t atomic_inc(volatile uint32_t *p) {
  #ifdef __ATOMIC_RELAXED
    return __atomic_add_fetch(p, 1U, __ATOMIC_SEQ_CST);
  #else
    return __sync_add_and_fetch(p, 1U);
  #endif
}

static inline uint32_t atomic_dec(volatile uint32_t *p) {
  #ifdef __ATOMIC_RELAXED
    return __atomic_sub_fetch(p, 1U, __ATOMIC_SEQ_CST);
  #else
    return __sync_sub_and_fetch(p, 1U);
  #endif
}

#endif

#endif  // TREE_SITTER_ATOMIC_H_



================================================
FILE: lib/src/clock.h
================================================
#ifndef TREE_SITTER_CLOCK_H_
#define TREE_SITTER_CLOCK_H_

#include <stdbool.h>
#include <stdint.h>

typedef uint64_t TSDuration;

#ifdef _WIN32

// Windows:
// * Represent a time as a performance counter value.
// * Represent a duration as a number of performance counter ticks.

#include <windows.h>
typedef uint64_t TSClock;

static inline TSDuration duration_from_micros(uint64_t micros) {
  LARGE_INTEGER frequency;
  QueryPerformanceFrequency(&frequency);
  return micros * (uint64_t)frequency.QuadPart / 1000000;
}

static inline uint64_t duration_to_micros(TSDuration self) {
  LARGE_INTEGER frequency;
  QueryPerformanceFrequency(&frequency);
  return self * 1000000 / (uint64_t)frequency.QuadPart;
}

static inline TSClock clock_null(void) {
  return 0;
}

static inline TSClock clock_now(void) {
  LARGE_INTEGER result;
  QueryPerformanceCounter(&result);
  return (uint64_t)result.QuadPart;
}

static inline TSClock clock_after(TSClock base, TSDuration duration) {
  return base + duration;
}

static inline bool clock_is_null(TSClock self) {
  return !self;
}

static inline bool clock_is_gt(TSClock self, TSClock other) {
  return self > other;
}

#elif defined(CLOCK_MONOTONIC)

// POSIX with monotonic clock support (Linux, macOS)
// * Represent a time as a monotonic (seconds, nanoseconds) pair.
// * Represent a duration as a number of microseconds.
//
// On these platforms, parse timeouts will correspond accurately to
// real time, regardless of what other processes are running.

#include <time.h>
typedef struct timespec TSClock;

static inline TSDuration duration_from_micros(uint64_t micros) {
  return micros;
}

static inline uint64_t duration_to_micros(TSDuration self) {
  return self;
}

static inline TSClock clock_now(void) {
  TSClock result;
  clock_gettime(CLOCK_MONOTONIC, &result);
  return result;
}

static inline TSClock clock_null(void) {
  return (TSClock) {0, 0};
}

static inline TSClock clock_after(TSClock base, TSDuration duration) {
  TSClock result = base;
  result.tv_sec += duration / 1000000;
  result.tv_nsec += (duration % 1000000) * 1000;
  if (result.tv_nsec >= 1000000000) {
    result.tv_nsec -= 1000000000;
    ++(result.tv_sec);
  }
  return result;
}

static inline bool clock_is_null(TSClock self) {
  return !self.tv_sec && !self.tv_nsec;
}

static inline bool clock_is_gt(TSClock self, TSClock other) {
  if (self.tv_sec > other.tv_sec) return true;
  if (self.tv_sec < other.tv_sec) return false;
  return self.tv_nsec > other.tv_nsec;
}

#else

// POSIX without monotonic clock support
// * Represent a time as a process clock value.
// * Represent a duration as a number of process clock ticks.
//
// On these platforms, parse timeouts may be affected by other processes,
// which is not ideal, but is better than using a non-monotonic time API
// like `gettimeofday`.

#include <time.h>
typedef uint64_t TSClock;

static inline TSDuration duration_from_micros(uint64_t micros) {
  return micros * (uint64_t)CLOCKS_PER_SEC / 1000000;
}

static inline uint64_t duration_to_micros(TSDuration self) {
  return self * 1000000 / (uint64_t)CLOCKS_PER_SEC;
}

static inline TSClock clock_null(void) {
  return 0;
}

static inline TSClock clock_now(void) {
  return (uint64_t)clock();
}

static inline TSClock clock_after(TSClock base, TSDuration duration) {
  return base + duration;
}

static inline bool clock_is_null(TSClock self) {
  return !self;
}

static inline bool clock_is_gt(TSClock self, TSClock other) {
  return self > other;
}

#endif

#endif  // TREE_SITTER_CLOCK_H_



================================================
FILE: lib/src/error_costs.h
================================================
#ifndef TREE_SITTER_ERROR_COSTS_H_
#define TREE_SITTER_ERROR_COSTS_H_

#define ERROR_STATE 0
#define ERROR_COST_PER_RECOVERY 500
#define ERROR_COST_PER_MISSING_TREE 110
#define ERROR_COST_PER_SKIPPED_TREE 100
#define ERROR_COST_PER_SKIPPED_LINE 30
#define ERROR_COST_PER_SKIPPED_CHAR 1

#endif



================================================
FILE: lib/src/get_changed_ranges.c
================================================
#include "./get_changed_ranges.h"
#include "./subtree.h"
#include "./language.h"
#include "./error_costs.h"
#include "./tree_cursor.h"
#include "./ts_assert.h"

// #define DEBUG_GET_CHANGED_RANGES

static void ts_range_array_add(
  TSRangeArray *self,
  Length start,
  Length end
) {
  if (self->size > 0) {
    TSRange *last_range = array_back(self);
    if (start.bytes <= last_range->end_byte) {
      last_range->end_byte = end.bytes;
      last_range->end_point = end.extent;
      return;
    }
  }

  if (start.bytes < end.bytes) {
    TSRange range = { start.extent, end.extent, start.bytes, end.bytes };
    array_push(self, range);
  }
}

bool ts_range_array_intersects(
  const TSRangeArray *self,
  unsigned start_index,
  uint32_t start_byte,
  uint32_t end_byte
) {
  for (unsigned i = start_index; i < self->size; i++) {
    TSRange *range = array_get(self, i);
    if (range->end_byte > start_byte) {
      if (range->start_byte >= end_byte) break;
      return true;
    }
  }
  return false;
}

void ts_range_array_get_changed_ranges(
  const TSRange *old_ranges, unsigned old_range_count,
  const TSRange *new_ranges, unsigned new_range_count,
  TSRangeArray *differences
) {
  unsigned new_index = 0;
  unsigned old_index = 0;
  Length current_position = length_zero();
  bool in_old_range = false;
  bool in_new_range = false;

  while (old_index < old_range_count || new_index < new_range_count) {
    const TSRange *old_range = &old_ranges[old_index];
    const TSRange *new_range = &new_ranges[new_index];

    Length next_old_position;
    if (in_old_range) {
      next_old_position = (Length) {old_range->end_byte, old_range->end_point};
    } else if (old_index < old_range_count) {
      next_old_position = (Length) {old_range->start_byte, old_range->start_point};
    } else {
      next_old_position = LENGTH_MAX;
    }

    Length next_new_position;
    if (in_new_range) {
      next_new_position = (Length) {new_range->end_byte, new_range->end_point};
    } else if (new_index < new_range_count) {
      next_new_position = (Length) {new_range->start_byte, new_range->start_point};
    } else {
      next_new_position = LENGTH_MAX;
    }

    if (next_old_position.bytes < next_new_position.bytes) {
      if (in_old_range != in_new_range) {
        ts_range_array_add(differences, current_position, next_old_position);
      }
      if (in_old_range) old_index++;
      current_position = next_old_position;
      in_old_range = !in_old_range;
    } else if (next_new_position.bytes < next_old_position.bytes) {
      if (in_old_range != in_new_range) {
        ts_range_array_add(differences, current_position, next_new_position);
      }
      if (in_new_range) new_index++;
      current_position = next_new_position;
      in_new_range = !in_new_range;
    } else {
      if (in_old_range != in_new_range) {
        ts_range_array_add(differences, current_position, next_new_position);
      }
      if (in_old_range) old_index++;
      if (in_new_range) new_index++;
      in_old_range = !in_old_range;
      in_new_range = !in_new_range;
      current_position = next_new_position;
    }
  }
}

typedef struct {
  TreeCursor cursor;
  const TSLanguage *language;
  unsigned visible_depth;
  bool in_padding;
  Subtree prev_external_token;
} Iterator;

static Iterator iterator_new(
  TreeCursor *cursor,
  const Subtree *tree,
  const TSLanguage *language
) {
  array_clear(&cursor->stack);
  array_push(&cursor->stack, ((TreeCursorEntry) {
    .subtree = tree,
    .position = length_zero(),
    .child_index = 0,
    .structural_child_index = 0,
  }));
  return (Iterator) {
    .cursor = *cursor,
    .language = language,
    .visible_depth = 1,
    .in_padding = false,
    .prev_external_token = NULL_SUBTREE,
  };
}

static bool iterator_done(Iterator *self) {
  return self->cursor.stack.size == 0;
}

static Length iterator_start_position(Iterator *self) {
  TreeCursorEntry entry = *array_back(&self->cursor.stack);
  if (self->in_padding) {
    return entry.position;
  } else {
    return length_add(entry.position, ts_subtree_padding(*entry.subtree));
  }
}

static Length iterator_end_position(Iterator *self) {
  TreeCursorEntry entry = *array_back(&self->cursor.stack);
  Length result = length_add(entry.position, ts_subtree_padding(*entry.subtree));
  if (self->in_padding) {
    return result;
  } else {
    return length_add(result, ts_subtree_size(*entry.subtree));
  }
}

static bool iterator_tree_is_visible(const Iterator *self) {
  TreeCursorEntry entry = *array_back(&self->cursor.stack);
  if (ts_subtree_visible(*entry.subtree)) return true;
  if (self->cursor.stack.size > 1) {
    Subtree parent = *array_get(&self->cursor.stack, self->cursor.stack.size - 2)->subtree;
    return ts_language_alias_at(
      self->language,
      parent.ptr->production_id,
      entry.structural_child_index
    ) != 0;
  }
  return false;
}

static void iterator_get_visible_state(
  const Iterator *self,
  Subtree *tree,
  TSSymbol *alias_symbol,
  uint32_t *start_byte
) {
  uint32_t i = self->cursor.stack.size - 1;

  if (self->in_padding) {
    if (i == 0) return;
    i--;
  }

  for (; i + 1 > 0; i--) {
    TreeCursorEntry entry = *array_get(&self->cursor.stack, i);

    if (i > 0) {
      const Subtree *parent = array_get(&self->cursor.stack, i - 1)->subtree;
      *alias_symbol = ts_language_alias_at(
        self->language,
        parent->ptr->production_id,
        entry.structural_child_index
      );
    }

    if (ts_subtree_visible(*entry.subtree) || *alias_symbol) {
      *tree = *entry.subtree;
      *start_byte = entry.position.bytes;
      break;
    }
  }
}

static void iterator_ascend(Iterator *self) {
  if (iterator_done(self)) return;
  if (iterator_tree_is_visible(self) && !self->in_padding) self->visible_depth--;
  if (array_back(&self->cursor.stack)->child_index > 0) self->in_padding = false;
  self->cursor.stack.size--;
}

static bool iterator_descend(Iterator *self, uint32_t goal_position) {
  if (self->in_padding) return false;

  bool did_descend = false;
  do {
    did_descend = false;
    TreeCursorEntry entry = *array_back(&self->cursor.stack);
    Length position = entry.position;
    uint32_t structural_child_index = 0;
    for (uint32_t i = 0, n = ts_subtree_child_count(*entry.subtree); i < n; i++) {
      const Subtree *child = &ts_subtree_children(*entry.subtree)[i];
      Length child_left = length_add(position, ts_subtree_padding(*child));
      Length child_right = length_add(child_left, ts_subtree_size(*child));

      if (child_right.bytes > goal_position) {
        array_push(&self->cursor.stack, ((TreeCursorEntry) {
          .subtree = child,
          .position = position,
          .child_index = i,
          .structural_child_index = structural_child_index,
        }));

        if (iterator_tree_is_visible(self)) {
          if (child_left.bytes > goal_position) {
            self->in_padding = true;
          } else {
            self->visible_depth++;
          }
          return true;
        }

        did_descend = true;
        break;
      }

      position = child_right;
      if (!ts_subtree_extra(*child)) structural_child_index++;
      Subtree last_external_token = ts_subtree_last_external_token(*child);
      if (last_external_token.ptr) {
        self->prev_external_token = last_external_token;
      }
    }
  } while (did_descend);

  return false;
}

static void iterator_advance(Iterator *self) {
  if (self->in_padding) {
    self->in_padding = false;
    if (iterator_tree_is_visible(self)) {
      self->visible_depth++;
    } else {
      iterator_descend(self, 0);
    }
    return;
  }

  for (;;) {
    if (iterator_tree_is_visible(self)) self->visible_depth--;
    TreeCursorEntry entry = array_pop(&self->cursor.stack);
    if (iterator_done(self)) return;

    const Subtree *parent = array_back(&self->cursor.stack)->subtree;
    uint32_t child_index = entry.child_index + 1;
    Subtree last_external_token = ts_subtree_last_external_token(*entry.subtree);
    if (last_external_token.ptr) {
      self->prev_external_token = last_external_token;
    }
    if (ts_subtree_child_count(*parent) > child_index) {
      Length position = length_add(entry.position, ts_subtree_total_size(*entry.subtree));
      uint32_t structural_child_index = entry.structural_child_index;
      if (!ts_subtree_extra(*entry.subtree)) structural_child_index++;
      const Subtree *next_child = &ts_subtree_children(*parent)[child_index];

      array_push(&self->cursor.stack, ((TreeCursorEntry) {
        .subtree = next_child,
        .position = position,
        .child_index = child_index,
        .structural_child_index = structural_child_index,
      }));

      if (iterator_tree_is_visible(self)) {
        if (ts_subtree_padding(*next_child).bytes > 0) {
          self->in_padding = true;
        } else {
          self->visible_depth++;
        }
      } else {
        iterator_descend(self, 0);
      }
      break;
    }
  }
}

typedef enum {
  IteratorDiffers,
  IteratorMayDiffer,
  IteratorMatches,
} IteratorComparison;

static IteratorComparison iterator_compare(
  const Iterator *old_iter,
  const Iterator *new_iter
) {
  Subtree old_tree = NULL_SUBTREE;
  Subtree new_tree = NULL_SUBTREE;
  uint32_t old_start = 0;
  uint32_t new_start = 0;
  TSSymbol old_alias_symbol = 0;
  TSSymbol new_alias_symbol = 0;
  iterator_get_visible_state(old_iter, &old_tree, &old_alias_symbol, &old_start);
  iterator_get_visible_state(new_iter, &new_tree, &new_alias_symbol, &new_start);
  TSSymbol old_symbol = ts_subtree_symbol(old_tree);
  TSSymbol new_symbol = ts_subtree_symbol(new_tree);

  if (!old_tree.ptr && !new_tree.ptr) return IteratorMatches;
  if (!old_tree.ptr || !new_tree.ptr) return IteratorDiffers;
  if (old_alias_symbol != new_alias_symbol || old_symbol != new_symbol) return IteratorDiffers;

  uint32_t old_size = ts_subtree_size(old_tree).bytes;
  uint32_t new_size = ts_subtree_size(new_tree).bytes;
  TSStateId old_state = ts_subtree_parse_state(old_tree);
  TSStateId new_state = ts_subtree_parse_state(new_tree);
  bool old_has_external_tokens = ts_subtree_has_external_tokens(old_tree);
  bool new_has_external_tokens = ts_subtree_has_external_tokens(new_tree);
  uint32_t old_error_cost = ts_subtree_error_cost(old_tree);
  uint32_t new_error_cost = ts_subtree_error_cost(new_tree);

  if (
    old_start != new_start ||
    old_symbol == ts_builtin_sym_error ||
    old_size != new_size ||
    old_state == TS_TREE_STATE_NONE ||
    new_state == TS_TREE_STATE_NONE ||
    ((old_state == ERROR_STATE) != (new_state == ERROR_STATE)) ||
    old_error_cost != new_error_cost ||
    old_has_external_tokens != new_has_external_tokens ||
    ts_subtree_has_changes(old_tree) ||
    (
      old_has_external_tokens &&
      !ts_subtree_external_scanner_state_eq(old_iter->prev_external_token, new_iter->prev_external_token)
    )
  ) {
    return IteratorMayDiffer;
  }

  return IteratorMatches;
}

#ifdef DEBUG_GET_CHANGED_RANGES
static inline void iterator_print_state(Iterator *self) {
  TreeCursorEntry entry = *array_back(&self->cursor.stack);
  TSPoint start = iterator_start_position(self).extent;
  TSPoint end = iterator_end_position(self).extent;
  const char *name = ts_language_symbol_name(self->language, ts_subtree_symbol(*entry.subtree));
  printf(
    "(%-25s %s\t depth:%u [%u, %u] - [%u, %u])",
    name, self->in_padding ? "(p)" : "   ",
    self->visible_depth,
    start.row, start.column,
    end.row, end.column
  );
}
#endif

unsigned ts_subtree_get_changed_ranges(
  const Subtree *old_tree, const Subtree *new_tree,
  TreeCursor *cursor1, TreeCursor *cursor2,
  const TSLanguage *language,
  const TSRangeArray *included_range_differences,
  TSRange **ranges
) {
  TSRangeArray results = array_new();

  Iterator old_iter = iterator_new(cursor1, old_tree, language);
  Iterator new_iter = iterator_new(cursor2, new_tree, language);

  unsigned included_range_difference_index = 0;

  Length position = iterator_start_position(&old_iter);
  Length next_position = iterator_start_position(&new_iter);
  if (position.bytes < next_position.bytes) {
    ts_range_array_add(&results, position, next_position);
    position = next_position;
  } else if (position.bytes > next_position.bytes) {
    ts_range_array_add(&results, next_position, position);
    next_position = position;
  }

  do {
    #ifdef DEBUG_GET_CHANGED_RANGES
    printf("At [%-2u, %-2u] Compare ", position.extent.row, position.extent.column);
    iterator_print_state(&old_iter);
    printf("\tvs\t");
    iterator_print_state(&new_iter);
    puts("");
    #endif

    // Compare the old and new subtrees.
    IteratorComparison comparison = iterator_compare(&old_iter, &new_iter);

    // Even if the two subtrees appear to be identical, they could differ
    // internally if they contain a range of text that was previously
    // excluded from the parse, and is now included, or vice-versa.
    if (comparison == IteratorMatches && ts_range_array_intersects(
      included_range_differences,
      included_range_difference_index,
      position.bytes,
      iterator_end_position(&old_iter).bytes
    )) {
      comparison = IteratorMayDiffer;
    }

    bool is_changed = false;
    switch (comparison) {
      // If the subtrees are definitely identical, move to the end
      // of both subtrees.
      case IteratorMatches:
        next_position = iterator_end_position(&old_iter);
        break;

      // If the subtrees might differ internally, descend into both
      // subtrees, finding the first child that spans the current position.
      case IteratorMayDiffer:
        if (iterator_descend(&old_iter, position.bytes)) {
          if (!iterator_descend(&new_iter, position.bytes)) {
            is_changed = true;
            next_position = iterator_end_position(&old_iter);
          }
        } else if (iterator_descend(&new_iter, position.bytes)) {
          is_changed = true;
          next_position = iterator_end_position(&new_iter);
        } else {
          next_position = length_min(
            iterator_end_position(&old_iter),
            iterator_end_position(&new_iter)
          );
        }
        break;

      // If the subtrees are different, record a change and then move
      // to the end of both subtrees.
      case IteratorDiffers:
        is_changed = true;
        next_position = length_min(
          iterator_end_position(&old_iter),
          iterator_end_position(&new_iter)
        );
        break;
    }

    // Ensure that both iterators are caught up to the current position.
    while (
      !iterator_done(&old_iter) &&
      iterator_end_position(&old_iter).bytes <= next_position.bytes
    ) iterator_advance(&old_iter);
    while (
      !iterator_done(&new_iter) &&
      iterator_end_position(&new_iter).bytes <= next_position.bytes
    ) iterator_advance(&new_iter);

    // Ensure that both iterators are at the same depth in the tree.
    while (old_iter.visible_depth > new_iter.visible_depth) {
      iterator_ascend(&old_iter);
    }
    while (new_iter.visible_depth > old_iter.visible_depth) {
      iterator_ascend(&new_iter);
    }

    if (is_changed) {
      #ifdef DEBUG_GET_CHANGED_RANGES
      printf(
        "  change: [[%u, %u] - [%u, %u]]\n",
        position.extent.row + 1, position.extent.column,
        next_position.extent.row + 1, next_position.extent.column
      );
      #endif

      ts_range_array_add(&results, position, next_position);
    }

    position = next_position;

    // Keep track of the current position in the included range differences
    // array in order to avoid scanning the entire array on each iteration.
    while (included_range_difference_index < included_range_differences->size) {
      const TSRange *range = array_get(included_range_differences,
        included_range_difference_index
      );
      if (range->end_byte <= position.bytes) {
        included_range_difference_index++;
      } else {
        break;
      }
    }
  } while (!iterator_done(&old_iter) && !iterator_done(&new_iter));

  Length old_size = ts_subtree_total_size(*old_tree);
  Length new_size = ts_subtree_total_size(*new_tree);
  if (old_size.bytes < new_size.bytes) {
    ts_range_array_add(&results, old_size, new_size);
  } else if (new_size.bytes < old_size.bytes) {
    ts_range_array_add(&results, new_size, old_size);
  }

  *cursor1 = old_iter.cursor;
  *cursor2 = new_iter.cursor;
  *ranges = results.contents;
  return results.size;
}



================================================
FILE: lib/src/get_changed_ranges.h
================================================
#ifndef TREE_SITTER_GET_CHANGED_RANGES_H_
#define TREE_SITTER_GET_CHANGED_RANGES_H_

#ifdef __cplusplus
extern "C" {
#endif

#include "./tree_cursor.h"
#include "./subtree.h"

typedef Array(TSRange) TSRangeArray;

void ts_range_array_get_changed_ranges(
  const TSRange *old_ranges, unsigned old_range_count,
  const TSRange *new_ranges, unsigned new_range_count,
  TSRangeArray *differences
);

bool ts_range_array_intersects(
  const TSRangeArray *self, unsigned start_index,
  uint32_t start_byte, uint32_t end_byte
);

unsigned ts_subtree_get_changed_ranges(
  const Subtree *old_tree, const Subtree *new_tree,
  TreeCursor *cursor1, TreeCursor *cursor2,
  const TSLanguage *language,
  const TSRangeArray *included_range_differences,
  TSRange **ranges
);

#ifdef __cplusplus
}
#endif

#endif  // TREE_SITTER_GET_CHANGED_RANGES_H_



================================================
FILE: lib/src/host.h
================================================

// Determine endian and pointer size based on known defines.
// TS_BIG_ENDIAN and TS_PTR_SIZE can be set as -D compiler arguments
// to override this.

#if !defined(TS_BIG_ENDIAN)
#if (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) \
  || (defined( __APPLE_CC__) && (defined(__ppc__) || defined(__ppc64__)))
#define TS_BIG_ENDIAN 1
#else
#define TS_BIG_ENDIAN 0
#endif
#endif

#if !defined(TS_PTR_SIZE)
#if UINTPTR_MAX == 0xFFFFFFFF
#define TS_PTR_SIZE 32
#else
#define TS_PTR_SIZE 64
#endif
#endif



================================================
FILE: lib/src/language.c
================================================
#include "./language.h"
#include "./wasm_store.h"
#include "tree_sitter/api.h"
#include <string.h>

const TSLanguage *ts_language_copy(const TSLanguage *self) {
  if (self && ts_language_is_wasm(self)) {
    ts_wasm_language_retain(self);
  }
  return self;
}

void ts_language_delete(const TSLanguage *self) {
  if (self && ts_language_is_wasm(self)) {
    ts_wasm_language_release(self);
  }
}

uint32_t ts_language_symbol_count(const TSLanguage *self) {
  return self->symbol_count + self->alias_count;
}

uint32_t ts_language_state_count(const TSLanguage *self) {
  return self->state_count;
}

const TSSymbol *ts_language_supertypes(const TSLanguage *self, uint32_t *length) {
  if (self->abi_version >= LANGUAGE_VERSION_WITH_RESERVED_WORDS) {
    *length = self->supertype_count;
    return self->supertype_symbols;
  } else {
    *length = 0;
    return NULL;
  }
}

const TSSymbol *ts_language_subtypes(
  const TSLanguage *self,
  TSSymbol supertype,
  uint32_t *length
) {
  if (self->abi_version < LANGUAGE_VERSION_WITH_RESERVED_WORDS || !ts_language_symbol_metadata(self, supertype).supertype) {
    *length = 0;
    return NULL;
  }

  TSMapSlice slice = self->supertype_map_slices[supertype];
  *length = slice.length;
  return &self->supertype_map_entries[slice.index];
}

uint32_t ts_language_version(const TSLanguage *self) {
  return self->abi_version;
}

uint32_t ts_language_abi_version(const TSLanguage *self) {
  return self->abi_version;
}

const TSLanguageMetadata *ts_language_metadata(const TSLanguage *self) {
    return self->abi_version >= LANGUAGE_VERSION_WITH_RESERVED_WORDS ? &self->metadata : NULL;
}

const char *ts_language_name(const TSLanguage *self) {
  return self->abi_version >= LANGUAGE_VERSION_WITH_RESERVED_WORDS ? self->name : NULL;
}

uint32_t ts_language_field_count(const TSLanguage *self) {
  return self->field_count;
}

void ts_language_table_entry(
  const TSLanguage *self,
  TSStateId state,
  TSSymbol symbol,
  TableEntry *result
) {
  if (symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat) {
    result->action_count = 0;
    result->is_reusable = false;
    result->actions = NULL;
  } else {
    ts_assert(symbol < self->token_count);
    uint32_t action_index = ts_language_lookup(self, state, symbol);
    const TSParseActionEntry *entry = &self->parse_actions[action_index];
    result->action_count = entry->entry.count;
    result->is_reusable = entry->entry.reusable;
    result->actions = (const TSParseAction *)(entry + 1);
  }
}

TSLexerMode ts_language_lex_mode_for_state(
   const TSLanguage *self,
   TSStateId state
) {
  if (self->abi_version < 15) {
    TSLexMode mode = ((const TSLexMode *)self->lex_modes)[state];
    return (TSLexerMode) {
      .lex_state = mode.lex_state,
      .external_lex_state = mode.external_lex_state,
      .reserved_word_set_id = 0,
    };
  } else {
    return self->lex_modes[state];
  }
}

bool ts_language_is_reserved_word(
  const TSLanguage *self,
  TSStateId state,
  TSSymbol symbol
) {
  TSLexerMode lex_mode = ts_language_lex_mode_for_state(self, state);
  if (lex_mode.reserved_word_set_id > 0) {
    unsigned start = lex_mode.reserved_word_set_id * self->max_reserved_word_set_size;
    unsigned end = start + self->max_reserved_word_set_size;
    for (unsigned i = start; i < end; i++) {
      if (self->reserved_words[i] == symbol) return true;
      if (self->reserved_words[i] == 0) break;
    }
  }
  return false;
}

TSSymbolMetadata ts_language_symbol_metadata(
  const TSLanguage *self,
  TSSymbol symbol
) {
  if (symbol == ts_builtin_sym_error)  {
    return (TSSymbolMetadata) {.visible = true, .named = true};
  } else if (symbol == ts_builtin_sym_error_repeat) {
    return (TSSymbolMetadata) {.visible = false, .named = false};
  } else {
    return self->symbol_metadata[symbol];
  }
}

TSSymbol ts_language_public_symbol(
  const TSLanguage *self,
  TSSymbol symbol
) {
  if (symbol == ts_builtin_sym_error) return symbol;
  return self->public_symbol_map[symbol];
}

TSStateId ts_language_next_state(
  const TSLanguage *self,
  TSStateId state,
  TSSymbol symbol
) {
  if (symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat) {
    return 0;
  } else if (symbol < self->token_count) {
    uint32_t count;
    const TSParseAction *actions = ts_language_actions(self, state, symbol, &count);
    if (count > 0) {
      TSParseAction action = actions[count - 1];
      if (action.type == TSParseActionTypeShift) {
        return action.shift.extra ? state : action.shift.state;
      }
    }
    return 0;
  } else {
    return ts_language_lookup(self, state, symbol);
  }
}

const char *ts_language_symbol_name(
  const TSLanguage *self,
  TSSymbol symbol
) {
  if (symbol == ts_builtin_sym_error) {
    return "ERROR";
  } else if (symbol == ts_builtin_sym_error_repeat) {
    return "_ERROR";
  } else if (symbol < ts_language_symbol_count(self)) {
    return self->symbol_names[symbol];
  } else {
    return NULL;
  }
}

TSSymbol ts_language_symbol_for_name(
  const TSLanguage *self,
  const char *string,
  uint32_t length,
  bool is_named
) {
  if (!strncmp(string, "ERROR", length)) return ts_builtin_sym_error;
  uint16_t count = (uint16_t)ts_language_symbol_count(self);
  for (TSSymbol i = 0; i < count; i++) {
    TSSymbolMetadata metadata = ts_language_symbol_metadata(self, i);
    if ((!metadata.visible && !metadata.supertype) || metadata.named != is_named) continue;
    const char *symbol_name = self->symbol_names[i];
    if (!strncmp(symbol_name, string, length) && !symbol_name[length]) {
      return self->public_symbol_map[i];
    }
  }
  return 0;
}

TSSymbolType ts_language_symbol_type(
  const TSLanguage *self,
  TSSymbol symbol
) {
  TSSymbolMetadata metadata = ts_language_symbol_metadata(self, symbol);
  if (metadata.named && metadata.visible) {
    return TSSymbolTypeRegular;
  } else if (metadata.visible) {
    return TSSymbolTypeAnonymous;
  } else if (metadata.supertype) {
    return TSSymbolTypeSupertype;
  } else {
    return TSSymbolTypeAuxiliary;
  }
}

const char *ts_language_field_name_for_id(
  const TSLanguage *self,
  TSFieldId id
) {
  uint32_t count = ts_language_field_count(self);
  if (count && id <= count) {
    return self->field_names[id];
  } else {
    return NULL;
  }
}

TSFieldId ts_language_field_id_for_name(
  const TSLanguage *self,
  const char *name,
  uint32_t name_length
) {
  uint16_t count = (uint16_t)ts_language_field_count(self);
  for (TSSymbol i = 1; i < count + 1; i++) {
    switch (strncmp(name, self->field_names[i], name_length)) {
      case 0:
        if (self->field_names[i][name_length] == 0) return i;
        break;
      case -1:
        return 0;
      default:
        break;
    }
  }
  return 0;
}

TSLookaheadIterator *ts_lookahead_iterator_new(const TSLanguage *self, TSStateId state) {
  if (state >= self->state_count) return NULL;
  LookaheadIterator *iterator = ts_malloc(sizeof(LookaheadIterator));
  *iterator = ts_language_lookaheads(self, state);
  return (TSLookaheadIterator *)iterator;
}

void ts_lookahead_iterator_delete(TSLookaheadIterator *self) {
  ts_free(self);
}

bool ts_lookahead_iterator_reset_state(TSLookaheadIterator * self, TSStateId state) {
  LookaheadIterator *iterator = (LookaheadIterator *)self;
  if (state >= iterator->language->state_count) return false;
  *iterator = ts_language_lookaheads(iterator->language, state);
  return true;
}

const TSLanguage *ts_lookahead_iterator_language(const TSLookaheadIterator *self) {
  const LookaheadIterator *iterator = (const LookaheadIterator *)self;
  return iterator->language;
}

bool ts_lookahead_iterator_reset(TSLookaheadIterator *self, const TSLanguage *language, TSStateId state) {
  if (state >= language->state_count) return false;
  LookaheadIterator *iterator = (LookaheadIterator *)self;
  *iterator = ts_language_lookaheads(language, state);
  return true;
}

bool ts_lookahead_iterator_next(TSLookaheadIterator *self) {
  LookaheadIterator *iterator = (LookaheadIterator *)self;
  return ts_lookahead_iterator__next(iterator);
}

TSSymbol ts_lookahead_iterator_current_symbol(const TSLookaheadIterator *self) {
  const LookaheadIterator *iterator = (const LookaheadIterator *)self;
  return iterator->symbol;
}

const char *ts_lookahead_iterator_current_symbol_name(const TSLookaheadIterator *self) {
  const LookaheadIterator *iterator = (const LookaheadIterator *)self;
  return ts_language_symbol_name(iterator->language, iterator->symbol);
}



================================================
FILE: lib/src/language.h
================================================
#ifndef TREE_SITTER_LANGUAGE_H_
#define TREE_SITTER_LANGUAGE_H_

#ifdef __cplusplus
extern "C" {
#endif

#include "./subtree.h"
#include "./parser.h"

#define ts_builtin_sym_error_repeat (ts_builtin_sym_error - 1)

#define LANGUAGE_VERSION_WITH_RESERVED_WORDS 15
#define LANGUAGE_VERSION_WITH_PRIMARY_STATES 14

typedef struct {
  const TSParseAction *actions;
  uint32_t action_count;
  bool is_reusable;
} TableEntry;

typedef struct {
  const TSLanguage *language;
  const uint16_t *data;
  const uint16_t *group_end;
  TSStateId state;
  uint16_t table_value;
  uint16_t section_index;
  uint16_t group_count;
  bool is_small_state;

  const TSParseAction *actions;
  TSSymbol symbol;
  TSStateId next_state;
  uint16_t action_count;
} LookaheadIterator;

void ts_language_table_entry(const TSLanguage *self, TSStateId state, TSSymbol symbol, TableEntry *result);
TSLexerMode ts_language_lex_mode_for_state(const TSLanguage *self, TSStateId state);
bool ts_language_is_reserved_word(const TSLanguage *self, TSStateId state, TSSymbol symbol);
TSSymbolMetadata ts_language_symbol_metadata(const TSLanguage *self, TSSymbol symbol);
TSSymbol ts_language_public_symbol(const TSLanguage *self, TSSymbol symbol);

static inline const TSParseAction *ts_language_actions(
  const TSLanguage *self,
  TSStateId state,
  TSSymbol symbol,
  uint32_t *count
) {
  TableEntry entry;
  ts_language_table_entry(self, state, symbol, &entry);
  *count = entry.action_count;
  return entry.actions;
}

static inline bool ts_language_has_reduce_action(
  const TSLanguage *self,
  TSStateId state,
  TSSymbol symbol
) {
  TableEntry entry;
  ts_language_table_entry(self, state, symbol, &entry);
  return entry.action_count > 0 && entry.actions[0].type == TSParseActionTypeReduce;
}

// Lookup the table value for a given symbol and state.
//
// For non-terminal symbols, the table value represents a successor state.
// For terminal symbols, it represents an index in the actions table.
// For 'large' parse states, this is a direct lookup. For 'small' parse
// states, this requires searching through the symbol groups to find
// the given symbol.
static inline uint16_t ts_language_lookup(
  const TSLanguage *self,
  TSStateId state,
  TSSymbol symbol
) {
  if (state >= self->large_state_count) {
    uint32_t index = self->small_parse_table_map[state - self->large_state_count];
    const uint16_t *data = &self->small_parse_table[index];
    uint16_t group_count = *(data++);
    for (unsigned i = 0; i < group_count; i++) {
      uint16_t section_value = *(data++);
      uint16_t symbol_count = *(data++);
      for (unsigned j = 0; j < symbol_count; j++) {
        if (*(data++) == symbol) return section_value;
      }
    }
    return 0;
  } else {
    return self->parse_table[state * self->symbol_count + symbol];
  }
}

static inline bool ts_language_has_actions(
  const TSLanguage *self,
  TSStateId state,
  TSSymbol symbol
) {
  return ts_language_lookup(self, state, symbol) != 0;
}

// Iterate over all of the symbols that are valid in the given state.
//
// For 'large' parse states, this just requires iterating through
// all possible symbols and checking the parse table for each one.
// For 'small' parse states, this exploits the structure of the
// table to only visit the valid symbols.
static inline LookaheadIterator ts_language_lookaheads(
  const TSLanguage *self,
  TSStateId state
) {
  bool is_small_state = state >= self->large_state_count;
  const uint16_t *data;
  const uint16_t *group_end = NULL;
  uint16_t group_count = 0;
  if (is_small_state) {
    uint32_t index = self->small_parse_table_map[state - self->large_state_count];
    data = &self->small_parse_table[index];
    group_end = data + 1;
    group_count = *data;
  } else {
    data = &self->parse_table[state * self->symbol_count] - 1;
  }
  return (LookaheadIterator) {
    .language = self,
    .data = data,
    .group_end = group_end,
    .group_count = group_count,
    .is_small_state = is_small_state,
    .symbol = UINT16_MAX,
    .next_state = 0,
  };
}

static inline bool ts_lookahead_iterator__next(LookaheadIterator *self) {
  // For small parse states, valid symbols are listed explicitly,
  // grouped by their value. There's no need to look up the actions
  // again until moving to the next group.
  if (self->is_small_state) {
    self->data++;
    if (self->data == self->group_end) {
      if (self->group_count == 0) return false;
      self->group_count--;
      self->table_value = *(self->data++);
      unsigned symbol_count = *(self->data++);
      self->group_end = self->data + symbol_count;
      self->symbol = *self->data;
    } else {
      self->symbol = *self->data;
      return true;
    }
  }

  // For large parse states, iterate through every symbol until one
  // is found that has valid actions.
  else {
    do {
      self->data++;
      self->symbol++;
      if (self->symbol >= self->language->symbol_count) return false;
      self->table_value = *self->data;
    } while (!self->table_value);
  }

  // Depending on if the symbols is terminal or non-terminal, the table value either
  // represents a list of actions or a successor state.
  if (self->symbol < self->language->token_count) {
    const TSParseActionEntry *entry = &self->language->parse_actions[self->table_value];
    self->action_count = entry->entry.count;
    self->actions = (const TSParseAction *)(entry + 1);
    self->next_state = 0;
  } else {
    self->action_count = 0;
    self->next_state = self->table_value;
  }
  return true;
}

// Whether the state is a "primary state". If this returns false, it indicates that there exists
// another state that behaves identically to this one with respect to query analysis.
static inline bool ts_language_state_is_primary(
  const TSLanguage *self,
  TSStateId state
) {
  if (self->abi_version >= LANGUAGE_VERSION_WITH_PRIMARY_STATES) {
    return state == self->primary_state_ids[state];
  } else {
    return true;
  }
}

static inline const bool *ts_language_enabled_external_tokens(
  const TSLanguage *self,
  unsigned external_scanner_state
) {
  if (external_scanner_state == 0) {
    return NULL;
  } else {
    return self->external_scanner.states + self->external_token_count * external_scanner_state;
  }
}

static inline const TSSymbol *ts_language_alias_sequence(
  const TSLanguage *self,
  uint32_t production_id
) {
  return production_id ?
    &self->alias_sequences[production_id * self->max_alias_sequence_length] :
    NULL;
}

static inline TSSymbol ts_language_alias_at(
  const TSLanguage *self,
  uint32_t production_id,
  uint32_t child_index
) {
  return production_id ?
    self->alias_sequences[production_id * self->max_alias_sequence_length + child_index] :
    0;
}

static inline void ts_language_field_map(
  const TSLanguage *self,
  uint32_t production_id,
  const TSFieldMapEntry **start,
  const TSFieldMapEntry **end
) {
  if (self->field_count == 0) {
    *start = NULL;
    *end = NULL;
    return;
  }

  TSMapSlice slice = self->field_map_slices[production_id];
  *start = &self->field_map_entries[slice.index];
  *end = &self->field_map_entries[slice.index] + slice.length;
}

static inline void ts_language_aliases_for_symbol(
  const TSLanguage *self,
  TSSymbol original_symbol,
  const TSSymbol **start,
  const TSSymbol **end
) {
  *start = &self->public_symbol_map[original_symbol];
  *end = *start + 1;

  unsigned idx = 0;
  for (;;) {
    TSSymbol symbol = self->alias_map[idx++];
    if (symbol == 0 || symbol > original_symbol) break;
    uint16_t count = self->alias_map[idx++];
    if (symbol == original_symbol) {
      *start = &self->alias_map[idx];
      *end = &self->alias_map[idx + count];
      break;
    }
    idx += count;
  }
}

static inline void ts_language_write_symbol_as_dot_string(
  const TSLanguage *self,
  FILE *f,
  TSSymbol symbol
) {
  const char *name = ts_language_symbol_name(self, symbol);
  for (const char *chr = name; *chr; chr++) {
    switch (*chr) {
      case '"':
      case '\\':
        fputc('\\', f);
        fputc(*chr, f);
        break;
      case '\n':
        fputs("\\n", f);
        break;
      case '\t':
        fputs("\\t", f);
        break;
      default:
        fputc(*chr, f);
        break;
    }
  }
}

#ifdef __cplusplus
}
#endif

#endif  // TREE_SITTER_LANGUAGE_H_



================================================
FILE: lib/src/length.h
================================================
#ifndef TREE_SITTER_LENGTH_H_
#define TREE_SITTER_LENGTH_H_

#include <stdlib.h>
#include <stdbool.h>
#include "./point.h"
#include "tree_sitter/api.h"

typedef struct {
  uint32_t bytes;
  TSPoint extent;
} Length;

static const Length LENGTH_UNDEFINED = {0, {0, 1}};
static const Length LENGTH_MAX = {UINT32_MAX, {UINT32_MAX, UINT32_MAX}};

static inline bool length_is_undefined(Length length) {
  return length.bytes == 0 && length.extent.column != 0;
}

static inline Length length_min(Length len1, Length len2) {
  return (len1.bytes < len2.bytes) ? len1 : len2;
}

static inline Length length_add(Length len1, Length len2) {
  Length result;
  result.bytes = len1.bytes + len2.bytes;
  result.extent = point_add(len1.extent, len2.extent);
  return result;
}

static inline Length length_sub(Length len1, Length len2) {
  Length result;
  result.bytes = (len1.bytes >= len2.bytes) ? len1.bytes - len2.bytes : 0;
  result.extent = point_sub(len1.extent, len2.extent);
  return result;
}

static inline Length length_zero(void) {
  Length result = {0, {0, 0}};
  return result;
}

static inline Length length_saturating_sub(Length len1, Length len2) {
  if (len1.bytes > len2.bytes) {
    return length_sub(len1, len2);
  } else {
    return length_zero();
  }
}

#endif



================================================
FILE: lib/src/lexer.c
================================================
#include "./length.h"
#include "./lexer.h"
#include "./unicode.h"

#include "tree_sitter/api.h"

#include <stdarg.h>
#include <stdio.h>

#define LOG(message, character)              \
  if (self->logger.log) {                    \
    snprintf(                                \
      self->debug_buffer,                    \
      TREE_SITTER_SERIALIZATION_BUFFER_SIZE, \
      32 <= character && character < 127 ?   \
        message " character:'%c'" :          \
        message " character:%d",             \
      character                              \
    );                                       \
    self->logger.log(                        \
      self->logger.payload,                  \
      TSLogTypeLex,                          \
      self->debug_buffer                     \
    );                                       \
  }

static const int32_t BYTE_ORDER_MARK = 0xFEFF;

static const TSRange DEFAULT_RANGE = {
  .start_point = {
    .row = 0,
    .column = 0,
  },
  .end_point = {
    .row = UINT32_MAX,
    .column = UINT32_MAX,
  },
  .start_byte = 0,
  .end_byte = UINT32_MAX
};

/**
 * Sets the column data to the given value and marks it valid.
 * @param self The lexer state.
 * @param val The new value of the column data.
 */
static void ts_lexer__set_column_data(Lexer *self, uint32_t val) {
  self->column_data.valid = true;
  self->column_data.value = val;
}

/**
 * Increments the value of the column data; no-op if invalid.
 * @param self The lexer state.
 */
static void ts_lexer__increment_column_data(Lexer *self) {
  if (self->column_data.valid) {
    self->column_data.value++;
  }
}

/**
 * Marks the column data as invalid.
 * @param self The lexer state.
 */
static void ts_lexer__invalidate_column_data(Lexer *self) {
  self->column_data.valid = false;
  self->column_data.value = 0;
}

// Check if the lexer has reached EOF. This state is stored
// by setting the lexer's `current_included_range_index` such that
// it has consumed all of its available ranges.
static bool ts_lexer__eof(const TSLexer *_self) {
  Lexer *self = (Lexer *)_self;
  return self->current_included_range_index == self->included_range_count;
}

// Clear the currently stored chunk of source code, because the lexer's
// position has changed.
static void ts_lexer__clear_chunk(Lexer *self) {
  self->chunk = NULL;
  self->chunk_size = 0;
  self->chunk_start = 0;
}

// Call the lexer's input callback to obtain a new chunk of source code
// for the current position.
static void ts_lexer__get_chunk(Lexer *self) {
  self->chunk_start = self->current_position.bytes;
  self->chunk = self->input.read(
    self->input.payload,
    self->current_position.bytes,
    self->current_position.extent,
    &self->chunk_size
  );
  if (!self->chunk_size) {
    self->current_included_range_index = self->included_range_count;
    self->chunk = NULL;
  }
}

// Decode the next unicode character in the current chunk of source code.
// This assumes that the lexer has already retrieved a chunk of source
// code that spans the current position.
static void ts_lexer__get_lookahead(Lexer *self) {
  uint32_t position_in_chunk = self->current_position.bytes - self->chunk_start;
  uint32_t size = self->chunk_size - position_in_chunk;

  if (size == 0) {
    self->lookahead_size = 1;
    self->data.lookahead = '\0';
    return;
  }

  const uint8_t *chunk = (const uint8_t *)self->chunk + position_in_chunk;
  DecodeFunction decode =
    self->input.encoding == TSInputEncodingUTF8    ? ts_decode_utf8     :
    self->input.encoding == TSInputEncodingUTF16LE ? ts_decode_utf16_le :
    self->input.encoding == TSInputEncodingUTF16BE ? ts_decode_utf16_be : self->input.decode;

  self->lookahead_size = decode(chunk, size, &self->data.lookahead);

  // If this chunk ended in the middle of a multi-byte character,
  // try again with a fresh chunk.
  if (self->data.lookahead == TS_DECODE_ERROR && size < 4) {
    ts_lexer__get_chunk(self);
    chunk = (const uint8_t *)self->chunk;
    size = self->chunk_size;
    self->lookahead_size = decode(chunk, size, &self->data.lookahead);
  }

  if (self->data.lookahead == TS_DECODE_ERROR) {
    self->lookahead_size = 1;
  }
}

static void ts_lexer_goto(Lexer *self, Length position) {
  if (position.bytes != self->current_position.bytes) {
    ts_lexer__invalidate_column_data(self);
  }

  self->current_position = position;

  // Move to the first valid position at or after the given position.
  bool found_included_range = false;
  for (unsigned i = 0; i < self->included_range_count; i++) {
    TSRange *included_range = &self->included_ranges[i];
    if (
      included_range->end_byte > self->current_position.bytes &&
      included_range->end_byte > included_range->start_byte
    ) {
      if (included_range->start_byte >= self->current_position.bytes) {
        self->current_position = (Length) {
          .bytes = included_range->start_byte,
          .extent = included_range->start_point,
        };
      }

      self->current_included_range_index = i;
      found_included_range = true;
      break;
    }
  }

  if (found_included_range) {
    // If the current position is outside of the current chunk of text,
    // then clear out the current chunk of text.
    if (self->chunk && (
      self->current_position.bytes < self->chunk_start ||
      self->current_position.bytes >= self->chunk_start + self->chunk_size
    )) {
      ts_lexer__clear_chunk(self);
    }

    self->lookahead_size = 0;
    self->data.lookahead = '\0';
  }

  // If the given position is beyond any of included ranges, move to the EOF
  // state - past the end of the included ranges.
  else {
    self->current_included_range_index = self->included_range_count;
    TSRange *last_included_range = &self->included_ranges[self->included_range_count - 1];
    self->current_position = (Length) {
      .bytes = last_included_range->end_byte,
      .extent = last_included_range->end_point,
    };
    ts_lexer__clear_chunk(self);
    self->lookahead_size = 1;
    self->data.lookahead = '\0';
  }
}

/**
 * Actually advances the lexer. Does not log anything.
 * @param self The lexer state.
 * @param skip Whether to mark the consumed codepoint as whitespace.
 */
static void ts_lexer__do_advance(Lexer *self, bool skip) {
  if (self->lookahead_size) {
    if (self->data.lookahead == '\n') {
      self->current_position.extent.row++;
      self->current_position.extent.column = 0;
      ts_lexer__set_column_data(self, 0);
    } else {
      bool is_bom = self->current_position.bytes == 0 &&
        self->data.lookahead == BYTE_ORDER_MARK;
      if (!is_bom) ts_lexer__increment_column_data(self);
      self->current_position.extent.column += self->lookahead_size;
    }
    self->current_position.bytes += self->lookahead_size;
  }

  const TSRange *current_range = &self->included_ranges[self->current_included_range_index];
  while (
    self->current_position.bytes >= current_range->end_byte ||
    current_range->end_byte == current_range->start_byte
  ) {
    if (self->current_included_range_index < self->included_range_count) {
      self->current_included_range_index++;
    }
    if (self->current_included_range_index < self->included_range_count) {
      current_range++;
      self->current_position = (Length) {
        current_range->start_byte,
        current_range->start_point,
      };
    } else {
      current_range = NULL;
      break;
    }
  }

  if (skip) self->token_start_position = self->current_position;

  if (current_range) {
    if (
      self->current_position.bytes < self->chunk_start ||
      self->current_position.bytes >= self->chunk_start + self->chunk_size
    ) {
      ts_lexer__get_chunk(self);
    }
    ts_lexer__get_lookahead(self);
  } else {
    ts_lexer__clear_chunk(self);
    self->data.lookahead = '\0';
    self->lookahead_size = 1;
  }
}

// Advance to the next character in the source code, retrieving a new
// chunk of source code if needed.
static void ts_lexer__advance(TSLexer *_self, bool skip) {
  Lexer *self = (Lexer *)_self;
  if (!self->chunk) return;

  if (skip) {
    LOG("skip", self->data.lookahead)
  } else {
    LOG("consume", self->data.lookahead)
  }

  ts_lexer__do_advance(self, skip);
}

// Mark that a token match has completed. This can be called multiple
// times if a longer match is found later.
static void ts_lexer__mark_end(TSLexer *_self) {
  Lexer *self = (Lexer *)_self;
  if (!ts_lexer__eof(&self->data)) {
    // If the lexer is right at the beginning of included range,
    // then the token should be considered to end at the *end* of the
    // previous included range, rather than here.
    TSRange *current_included_range = &self->included_ranges[
      self->current_included_range_index
    ];
    if (
      self->current_included_range_index > 0 &&
      self->current_position.bytes == current_included_range->start_byte
    ) {
      TSRange *previous_included_range = current_included_range - 1;
      self->token_end_position = (Length) {
        previous_included_range->end_byte,
        previous_included_range->end_point,
      };
      return;
    }
  }
  self->token_end_position = self->current_position;
}

static uint32_t ts_lexer__get_column(TSLexer *_self) {
  Lexer *self = (Lexer *)_self;

  self->did_get_column = true;

  if (!self->column_data.valid) {
    // Record current position
    uint32_t goal_byte = self->current_position.bytes;

    // Back up to the beginning of the line
    Length start_of_col = {
      self->current_position.bytes - self->current_position.extent.column,
      {self->current_position.extent.row, 0},
    };
    ts_lexer_goto(self, start_of_col);
    ts_lexer__set_column_data(self, 0);
    ts_lexer__get_chunk(self);

    if (!ts_lexer__eof(_self)) {
      ts_lexer__get_lookahead(self);

      // Advance to the recorded position
      while (self->current_position.bytes < goal_byte && !ts_lexer__eof(_self) && self->chunk) {
        ts_lexer__do_advance(self, false);
        if (ts_lexer__eof(_self)) break;
      }
    }
  }

  return self->column_data.value;
}

// Is the lexer at a boundary between two disjoint included ranges of
// source code? This is exposed as an API because some languages' external
// scanners need to perform custom actions at these boundaries.
static bool ts_lexer__is_at_included_range_start(const TSLexer *_self) {
  const Lexer *self = (const Lexer *)_self;
  if (self->current_included_range_index < self->included_range_count) {
    TSRange *current_range = &self->included_ranges[self->current_included_range_index];
    return self->current_position.bytes == current_range->start_byte;
  } else {
    return false;
  }
}

static void ts_lexer__log(const TSLexer *_self, const char *fmt, ...) {
  Lexer *self = (Lexer *)_self;
  va_list args;
  va_start(args, fmt);
  if (self->logger.log) {
    vsnprintf(self->debug_buffer, TREE_SITTER_SERIALIZATION_BUFFER_SIZE, fmt, args);
    self->logger.log(self->logger.payload, TSLogTypeLex, self->debug_buffer);
  }
  va_end(args);
}

void ts_lexer_init(Lexer *self) {
  *self = (Lexer) {
    .data = {
      // The lexer's methods are stored as struct fields so that generated
      // parsers can call them without needing to be linked against this
      // library.
      .advance = ts_lexer__advance,
      .mark_end = ts_lexer__mark_end,
      .get_column = ts_lexer__get_column,
      .is_at_included_range_start = ts_lexer__is_at_included_range_start,
      .eof = ts_lexer__eof,
      .log = ts_lexer__log,
      .lookahead = 0,
      .result_symbol = 0,
    },
    .chunk = NULL,
    .chunk_size = 0,
    .chunk_start = 0,
    .current_position = {0, {0, 0}},
    .logger = {
      .payload = NULL,
      .log = NULL
    },
    .included_ranges = NULL,
    .included_range_count = 0,
    .current_included_range_index = 0,
    .did_get_column = false,
    .column_data = {
      .valid = false,
      .value = 0
    }
  };
  ts_lexer_set_included_ranges(self, NULL, 0);
}

void ts_lexer_delete(Lexer *self) {
  ts_free(self->included_ranges);
}

void ts_lexer_set_input(Lexer *self, TSInput input) {
  self->input = input;
  ts_lexer__clear_chunk(self);
  ts_lexer_goto(self, self->current_position);
}

// Move the lexer to the given position. This doesn't do any work
// if the parser is already at the given position.
void ts_lexer_reset(Lexer *self, Length position) {
  if (position.bytes != self->current_position.bytes) {
    ts_lexer_goto(self, position);
  }
}

void ts_lexer_start(Lexer *self) {
  self->token_start_position = self->current_position;
  self->token_end_position = LENGTH_UNDEFINED;
  self->data.result_symbol = 0;
  self->did_get_column = false;
  if (!ts_lexer__eof(&self->data)) {
    if (!self->chunk_size) ts_lexer__get_chunk(self);
    if (!self->lookahead_size) ts_lexer__get_lookahead(self);
    if (self->current_position.bytes == 0) {
      if (self->data.lookahead == BYTE_ORDER_MARK) {
        ts_lexer__advance(&self->data, true);
      }
      ts_lexer__set_column_data(self, 0);
    }
  }
}

void ts_lexer_finish(Lexer *self, uint32_t *lookahead_end_byte) {
  if (length_is_undefined(self->token_end_position)) {
    ts_lexer__mark_end(&self->data);
  }

  // If the token ended at an included range boundary, then its end position
  // will have been reset to the end of the preceding range. Reset the start
  // position to match.
  if (self->token_end_position.bytes < self->token_start_position.bytes) {
    self->token_start_position = self->token_end_position;
  }

  uint32_t current_lookahead_end_byte = self->current_position.bytes + 1;

  // In order to determine that a byte sequence is invalid UTF8 or UTF16,
  // the character decoding algorithm may have looked at the following byte.
  // Therefore, the next byte *after* the current (invalid) character
  // affects the interpretation of the current character.
  if (self->data.lookahead == TS_DECODE_ERROR) {
    current_lookahead_end_byte += 4; // the maximum number of bytes read to identify an invalid code point
  }

  if (current_lookahead_end_byte > *lookahead_end_byte) {
    *lookahead_end_byte = current_lookahead_end_byte;
  }
}

void ts_lexer_mark_end(Lexer *self) {
  ts_lexer__mark_end(&self->data);
}

bool ts_lexer_set_included_ranges(
  Lexer *self,
  const TSRange *ranges,
  uint32_t count
) {
  if (count == 0 || !ranges) {
    ranges = &DEFAULT_RANGE;
    count = 1;
  } else {
    uint32_t previous_byte = 0;
    for (unsigned i = 0; i < count; i++) {
      const TSRange *range = &ranges[i];
      if (
        range->start_byte < previous_byte ||
        range->end_byte < range->start_byte
      ) return false;
      previous_byte = range->end_byte;
    }
  }

  size_t size = count * sizeof(TSRange);
  self->included_ranges = ts_realloc(self->included_ranges, size);
  memcpy(self->included_ranges, ranges, size);
  self->included_range_count = count;
  ts_lexer_goto(self, self->current_position);
  return true;
}

TSRange *ts_lexer_included_ranges(const Lexer *self, uint32_t *count) {
  *count = self->included_range_count;
  return self->included_ranges;
}

#undef LOG



================================================
FILE: lib/src/lexer.h
================================================
#ifndef TREE_SITTER_LEXER_H_
#define TREE_SITTER_LEXER_H_

#ifdef __cplusplus
extern "C" {
#endif

#include "./length.h"
#include "./subtree.h"
#include "tree_sitter/api.h"
#include "./parser.h"

typedef struct {
  uint32_t value;
  bool valid;
} ColumnData;

typedef struct {
  TSLexer data;
  Length current_position;
  Length token_start_position;
  Length token_end_position;

  TSRange *included_ranges;
  const char *chunk;
  TSInput input;
  TSLogger logger;

  uint32_t included_range_count;
  uint32_t current_included_range_index;
  uint32_t chunk_start;
  uint32_t chunk_size;
  uint32_t lookahead_size;
  bool did_get_column;
  ColumnData column_data;

  char debug_buffer[TREE_SITTER_SERIALIZATION_BUFFER_SIZE];
} Lexer;

void ts_lexer_init(Lexer *self);
void ts_lexer_delete(Lexer *self);
void ts_lexer_set_input(Lexer *self, TSInput input);
void ts_lexer_reset(Lexer *self, Length position);
void ts_lexer_start(Lexer *self);
void ts_lexer_finish(Lexer *self, uint32_t *lookahead_end_byte);
void ts_lexer_mark_end(Lexer *self);
bool ts_lexer_set_included_ranges(Lexer *self, const TSRange *ranges, uint32_t count);
TSRange *ts_lexer_included_ranges(const Lexer *self, uint32_t *count);

#ifdef __cplusplus
}
#endif

#endif  // TREE_SITTER_LEXER_H_



================================================
FILE: lib/src/lib.c
================================================
#include "./alloc.c"
#include "./get_changed_ranges.c"
#include "./language.c"
#include "./lexer.c"
#include "./node.c"
#include "./parser.c"
#include "./query.c"
#include "./stack.c"
#include "./subtree.c"
#include "./tree_cursor.c"
#include "./tree.c"
#include "./wasm_store.c"



================================================
FILE: lib/src/node.c
================================================
#include <stdbool.h>
#include "./point.h"
#include "./subtree.h"
#include "./tree.h"
#include "./language.h"

typedef struct {
  Subtree parent;
  const TSTree *tree;
  Length position;
  uint32_t child_index;
  uint32_t structural_child_index;
  const TSSymbol *alias_sequence;
} NodeChildIterator;

static inline bool ts_node__is_relevant(TSNode self, bool include_anonymous);

// TSNode - constructors

TSNode ts_node_new(
  const TSTree *tree,
  const Subtree *subtree,
  Length position,
  TSSymbol alias
) {
  return (TSNode) {
    {position.bytes, position.extent.row, position.extent.column, alias},
    subtree,
    tree,
  };
}

static inline TSNode ts_node__null(void) {
  return ts_node_new(NULL, NULL, length_zero(), 0);
}

// TSNode - accessors

uint32_t ts_node_start_byte(TSNode self) {
  return self.context[0];
}

TSPoint ts_node_start_point(TSNode self) {
  return (TSPoint) {self.context[1], self.context[2]};
}

static inline uint32_t ts_node__alias(const TSNode *self) {
  return self->context[3];
}

static inline Subtree ts_node__subtree(TSNode self) {
  return *(const Subtree *)self.id;
}

// NodeChildIterator

static inline NodeChildIterator ts_node_iterate_children(const TSNode *node) {
  Subtree subtree = ts_node__subtree(*node);
  if (ts_subtree_child_count(subtree) == 0) {
    return (NodeChildIterator) {NULL_SUBTREE, node->tree, length_zero(), 0, 0, NULL};
  }
  const TSSymbol *alias_sequence = ts_language_alias_sequence(
    node->tree->language,
    subtree.ptr->production_id
  );
  return (NodeChildIterator) {
    .tree = node->tree,
    .parent = subtree,
    .position = {ts_node_start_byte(*node), ts_node_start_point(*node)},
    .child_index = 0,
    .structural_child_index = 0,
    .alias_sequence = alias_sequence,
  };
}

static inline bool ts_node_child_iterator_done(NodeChildIterator *self) {
  return self->child_index == self->parent.ptr->child_count;
}

static inline bool ts_node_child_iterator_next(
  NodeChildIterator *self,
  TSNode *result
) {
  if (!self->parent.ptr || ts_node_child_iterator_done(self)) return false;
  const Subtree *child = &ts_subtree_children(self->parent)[self->child_index];
  TSSymbol alias_symbol = 0;
  if (!ts_subtree_extra(*child)) {
    if (self->alias_sequence) {
      alias_symbol = self->alias_sequence[self->structural_child_index];
    }
    self->structural_child_index++;
  }
  if (self->child_index > 0) {
    self->position = length_add(self->position, ts_subtree_padding(*child));
  }
  *result = ts_node_new(
    self->tree,
    child,
    self->position,
    alias_symbol
  );
  self->position = length_add(self->position, ts_subtree_size(*child));
  self->child_index++;
  return true;
}

// TSNode - private

static inline bool ts_node__is_relevant(TSNode self, bool include_anonymous) {
  Subtree tree = ts_node__subtree(self);
  if (include_anonymous) {
    return ts_subtree_visible(tree) || ts_node__alias(&self);
  } else {
    TSSymbol alias = ts_node__alias(&self);
    if (alias) {
      return ts_language_symbol_metadata(self.tree->language, alias).named;
    } else {
      return ts_subtree_visible(tree) && ts_subtree_named(tree);
    }
  }
}

static inline uint32_t ts_node__relevant_child_count(
  TSNode self,
  bool include_anonymous
) {
  Subtree tree = ts_node__subtree(self);
  if (ts_subtree_child_count(tree) > 0) {
    if (include_anonymous) {
      return tree.ptr->visible_child_count;
    } else {
      return tree.ptr->named_child_count;
    }
  } else {
    return 0;
  }
}

static inline TSNode ts_node__child(
  TSNode self,
  uint32_t child_index,
  bool include_anonymous
) {
  TSNode result = self;
  bool did_descend = true;

  while (did_descend) {
    did_descend = false;

    TSNode child;
    uint32_t index = 0;
    NodeChildIterator iterator = ts_node_iterate_children(&result);
    while (ts_node_child_iterator_next(&iterator, &child)) {
      if (ts_node__is_relevant(child, include_anonymous)) {
        if (index == child_index) {
          return child;
        }
        index++;
      } else {
        uint32_t grandchild_index = child_index - index;
        uint32_t grandchild_count = ts_node__relevant_child_count(child, include_anonymous);
        if (grandchild_index < grandchild_count) {
          did_descend = true;
          result = child;
          child_index = grandchild_index;
          break;
        }
        index += grandchild_count;
      }
    }
  }

  return ts_node__null();
}

static bool ts_subtree_has_trailing_empty_descendant(
  Subtree self,
  Subtree other
) {
  for (unsigned i = ts_subtree_child_count(self) - 1; i + 1 > 0; i--) {
    Subtree child = ts_subtree_children(self)[i];
    if (ts_subtree_total_bytes(child) > 0) break;
    if (child.ptr == other.ptr || ts_subtree_has_trailing_empty_descendant(child, other)) {
      return true;
    }
  }
  return false;
}

static inline TSNode ts_node__prev_sibling(TSNode self, bool include_anonymous) {
  Subtree self_subtree = ts_node__subtree(self);
  bool self_is_empty = ts_subtree_total_bytes(self_subtree) == 0;
  uint32_t target_end_byte = ts_node_end_byte(self);

  TSNode node = ts_node_parent(self);
  TSNode earlier_node = ts_node__null();
  bool earlier_node_is_relevant = false;

  while (!ts_node_is_null(node)) {
    TSNode earlier_child = ts_node__null();
    bool earlier_child_is_relevant = false;
    bool found_child_containing_target = false;

    TSNode child;
    NodeChildIterator iterator = ts_node_iterate_children(&node);
    while (ts_node_child_iterator_next(&iterator, &child)) {
      if (child.id == self.id) break;
      if (iterator.position.bytes > target_end_byte) {
        found_child_containing_target = true;
        break;
      }

      if (iterator.position.bytes == target_end_byte &&
          (!self_is_empty ||
           ts_subtree_has_trailing_empty_descendant(ts_node__subtree(child), self_subtree))) {
        found_child_containing_target = true;
        break;
      }

      if (ts_node__is_relevant(child, include_anonymous)) {
        earlier_child = child;
        earlier_child_is_relevant = true;
      } else if (ts_node__relevant_child_count(child, include_anonymous) > 0) {
        earlier_child = child;
        earlier_child_is_relevant = false;
      }
    }

    if (found_child_containing_target) {
      if (!ts_node_is_null(earlier_child)) {
        earlier_node = earlier_child;
        earlier_node_is_relevant = earlier_child_is_relevant;
      }
      node = child;
    } else if (earlier_child_is_relevant) {
      return earlier_child;
    } else if (!ts_node_is_null(earlier_child)) {
      node = earlier_child;
    } else if (earlier_node_is_relevant) {
      return earlier_node;
    } else {
      node = earlier_node;
      earlier_node = ts_node__null();
      earlier_node_is_relevant = false;
    }
  }

  return ts_node__null();
}

static inline TSNode ts_node__next_sibling(TSNode self, bool include_anonymous) {
  uint32_t target_end_byte = ts_node_end_byte(self);

  TSNode node = ts_node_parent(self);
  TSNode later_node = ts_node__null();
  bool later_node_is_relevant = false;

  while (!ts_node_is_null(node)) {
    TSNode later_child = ts_node__null();
    bool later_child_is_relevant = false;
    TSNode child_containing_target = ts_node__null();

    TSNode child;
    NodeChildIterator iterator = ts_node_iterate_children(&node);
    while (ts_node_child_iterator_next(&iterator, &child)) {
      if (iterator.position.bytes <= target_end_byte) continue;
      uint32_t start_byte = ts_node_start_byte(self);
      uint32_t child_start_byte = ts_node_start_byte(child);

      bool is_empty = start_byte == target_end_byte;
      bool contains_target = is_empty ?
        child_start_byte < start_byte :
        child_start_byte <= start_byte;

      if (contains_target) {
        if (ts_node__subtree(child).ptr != ts_node__subtree(self).ptr) {
          child_containing_target = child;
        }
      } else if (ts_node__is_relevant(child, include_anonymous)) {
        later_child = child;
        later_child_is_relevant = true;
        break;
      } else if (ts_node__relevant_child_count(child, include_anonymous) > 0) {
        later_child = child;
        later_child_is_relevant = false;
        break;
      }
    }

    if (!ts_node_is_null(child_containing_target)) {
      if (!ts_node_is_null(later_child)) {
        later_node = later_child;
        later_node_is_relevant = later_child_is_relevant;
      }
      node = child_containing_target;
    } else if (later_child_is_relevant) {
      return later_child;
    } else if (!ts_node_is_null(later_child)) {
      node = later_child;
    } else if (later_node_is_relevant) {
      return later_node;
    } else {
      node = later_node;
    }
  }

  return ts_node__null();
}

static inline TSNode ts_node__first_child_for_byte(
  TSNode self,
  uint32_t goal,
  bool include_anonymous
) {
  TSNode node = self;
  bool did_descend = true;

  NodeChildIterator last_iterator;
  bool has_last_iterator = false;

  while (did_descend) {
    did_descend = false;

    TSNode child;
    NodeChildIterator iterator = ts_node_iterate_children(&node);
  loop:
    while (ts_node_child_iterator_next(&iterator, &child)) {
      if (ts_node_end_byte(child) > goal) {
        if (ts_node__is_relevant(child, include_anonymous)) {
          return child;
        } else if (ts_node_child_count(child) > 0) {
          if (iterator.child_index < ts_subtree_child_count(ts_node__subtree(child))) {
            last_iterator = iterator;
            has_last_iterator = true;
          }
          did_descend = true;
          node = child;
          break;
        }
      }
    }

    if (!did_descend && has_last_iterator) {
      iterator = last_iterator;
      has_last_iterator = false;
      goto loop;
    }
  }

  return ts_node__null();
}

static inline TSNode ts_node__descendant_for_byte_range(
  TSNode self,
  uint32_t range_start,
  uint32_t range_end,
  bool include_anonymous
) {
  if (range_start > range_end) {
    return ts_node__null();
  }
  TSNode node = self;
  TSNode last_visible_node = self;

  bool did_descend = true;
  while (did_descend) {
    did_descend = false;

    TSNode child;
    NodeChildIterator iterator = ts_node_iterate_children(&node);
    while (ts_node_child_iterator_next(&iterator, &child)) {
      uint32_t node_end = iterator.position.bytes;

      // The end of this node must extend far enough forward to touch
      // the end of the range
      if (node_end < range_end) continue;

      // ...and exceed the start of the range, unless the node itself is
      // empty, in which case it must at least be equal to the start of the range.
      bool is_empty = ts_node_start_byte(child) == node_end;
      if (is_empty ? node_end < range_start : node_end <= range_start) continue;

      // The start of this node must extend far enough backward to
      // touch the start of the range.
      if (range_start < ts_node_start_byte(child)) break;

      node = child;
      if (ts_node__is_relevant(node, include_anonymous)) {
        last_visible_node = node;
      }
      did_descend = true;
      break;
    }
  }

  return last_visible_node;
}

static inline TSNode ts_node__descendant_for_point_range(
  TSNode self,
  TSPoint range_start,
  TSPoint range_end,
  bool include_anonymous
) {
  if (point_gt(range_start, range_end)) {
    return ts_node__null();
  }
  TSNode node = self;
  TSNode last_visible_node = self;

  bool did_descend = true;
  while (did_descend) {
    did_descend = false;

    TSNode child;
    NodeChildIterator iterator = ts_node_iterate_children(&node);
    while (ts_node_child_iterator_next(&iterator, &child)) {
      TSPoint node_end = iterator.position.extent;

      // The end of this node must extend far enough forward to touch
      // the end of the range
      if (point_lt(node_end, range_end)) continue;

      // ...and exceed the start of the range, unless the node itself is
      // empty, in which case it must at least be equal to the start of the range.
      bool is_empty =  point_eq(ts_node_start_point(child), node_end);
      if (is_empty ? point_lt(node_end, range_start) : point_lte(node_end, range_start)) {
        continue;
      }

      // The start of this node must extend far enough backward to
      // touch the start of the range.
      if (point_lt(range_start, ts_node_start_point(child))) break;

      node = child;
      if (ts_node__is_relevant(node, include_anonymous)) {
        last_visible_node = node;
      }
      did_descend = true;
      break;
    }
  }

  return last_visible_node;
}

// TSNode - public

uint32_t ts_node_end_byte(TSNode self) {
  return ts_node_start_byte(self) + ts_subtree_size(ts_node__subtree(self)).bytes;
}

TSPoint ts_node_end_point(TSNode self) {
  return point_add(ts_node_start_point(self), ts_subtree_size(ts_node__subtree(self)).extent);
}

TSSymbol ts_node_symbol(TSNode self) {
  TSSymbol symbol = ts_node__alias(&self);
  if (!symbol) symbol = ts_subtree_symbol(ts_node__subtree(self));
  return ts_language_public_symbol(self.tree->language, symbol);
}

const char *ts_node_type(TSNode self) {
  TSSymbol symbol = ts_node__alias(&self);
  if (!symbol) symbol = ts_subtree_symbol(ts_node__subtree(self));
  return ts_language_symbol_name(self.tree->language, symbol);
}

const TSLanguage *ts_node_language(TSNode self) {
  return self.tree->language;
}

TSSymbol ts_node_grammar_symbol(TSNode self) {
  return ts_subtree_symbol(ts_node__subtree(self));
}

const char *ts_node_grammar_type(TSNode self) {
  TSSymbol symbol = ts_subtree_symbol(ts_node__subtree(self));
  return ts_language_symbol_name(self.tree->language, symbol);
}

char *ts_node_string(TSNode self) {
  TSSymbol alias_symbol = ts_node__alias(&self);
  return ts_subtree_string(
    ts_node__subtree(self),
    alias_symbol,
    ts_language_symbol_metadata(self.tree->language, alias_symbol).visible,
    self.tree->language,
    false
  );
}

bool ts_node_eq(TSNode self, TSNode other) {
  return self.tree == other.tree && self.id == other.id;
}

bool ts_node_is_null(TSNode self) {
  return self.id == 0;
}

bool ts_node_is_extra(TSNode self) {
  return ts_subtree_extra(ts_node__subtree(self));
}

bool ts_node_is_named(TSNode self) {
  TSSymbol alias = ts_node__alias(&self);
  return alias
    ? ts_language_symbol_metadata(self.tree->language, alias).named
    : ts_subtree_named(ts_node__subtree(self));
}

bool ts_node_is_missing(TSNode self) {
  return ts_subtree_missing(ts_node__subtree(self));
}

bool ts_node_has_changes(TSNode self) {
  return ts_subtree_has_changes(ts_node__subtree(self));
}

bool ts_node_has_error(TSNode self) {
  return ts_subtree_error_cost(ts_node__subtree(self)) > 0;
}

bool ts_node_is_error(TSNode self) {
  TSSymbol symbol = ts_node_symbol(self);
  return symbol == ts_builtin_sym_error;
}

uint32_t ts_node_descendant_count(TSNode self) {
  return ts_subtree_visible_descendant_count(ts_node__subtree(self)) + 1;
}

TSStateId ts_node_parse_state(TSNode self) {
  return ts_subtree_parse_state(ts_node__subtree(self));
}

TSStateId ts_node_next_parse_state(TSNode self) {
  const TSLanguage *language = self.tree->language;
  uint16_t state = ts_node_parse_state(self);
  if (state == TS_TREE_STATE_NONE) {
    return TS_TREE_STATE_NONE;
  }
  uint16_t symbol = ts_node_grammar_symbol(self);
  return ts_language_next_state(language, state, symbol);
}

TSNode ts_node_parent(TSNode self) {
  TSNode node = ts_tree_root_node(self.tree);
  if (node.id == self.id) return ts_node__null();

  while (true) {
    TSNode next_node = ts_node_child_with_descendant(node, self);
    if (next_node.id == self.id || ts_node_is_null(next_node)) break;
    node = next_node;
  }

  return node;
}

TSNode ts_node_child_with_descendant(TSNode self, TSNode descendant) {
  uint32_t start_byte = ts_node_start_byte(descendant);
  uint32_t end_byte = ts_node_end_byte(descendant);
  bool is_empty = start_byte == end_byte;

  do {
    NodeChildIterator iter = ts_node_iterate_children(&self);
    do {
      if (
        !ts_node_child_iterator_next(&iter, &self)
        || ts_node_start_byte(self) > start_byte
      ) {
        return ts_node__null();
      }
      if (self.id == descendant.id) {
        return self;
      }

      // If the descendant is empty, and the end byte is within `self`,
      // we check whether `self` contains it or not.
      if (is_empty && iter.position.bytes >= end_byte && ts_node_child_count(self) > 0) {
        TSNode child = ts_node_child_with_descendant(self, descendant);
        // If the child is not null, return self if it's relevant, else return the child
        if (!ts_node_is_null(child)) {
          return ts_node__is_relevant(self, true) ? self : child;
        }
      }
    } while ((is_empty ? iter.position.bytes <= end_byte : iter.position.bytes < end_byte) || ts_node_child_count(self) == 0);
  } while (!ts_node__is_relevant(self, true));

  return self;
}

TSNode ts_node_child(TSNode self, uint32_t child_index) {
  return ts_node__child(self, child_index, true);
}

TSNode ts_node_named_child(TSNode self, uint32_t child_index) {
  return ts_node__child(self, child_index, false);
}

TSNode ts_node_child_by_field_id(TSNode self, TSFieldId field_id) {
recur:
  if (!field_id || ts_node_child_count(self) == 0) return ts_node__null();

  const TSFieldMapEntry *field_map, *field_map_end;
  ts_language_field_map(
    self.tree->language,
    ts_node__subtree(self).ptr->production_id,
    &field_map,
    &field_map_end
  );
  if (field_map == field_map_end) return ts_node__null();

  // The field mappings are sorted by their field id. Scan all
  // the mappings to find the ones for the given field id.
  while (field_map->field_id < field_id) {
    field_map++;
    if (field_map == field_map_end) return ts_node__null();
  }
  while (field_map_end[-1].field_id > field_id) {
    field_map_end--;
    if (field_map == field_map_end) return ts_node__null();
  }

  TSNode child;
  NodeChildIterator iterator = ts_node_iterate_children(&self);
  while (ts_node_child_iterator_next(&iterator, &child)) {
    if (!ts_subtree_extra(ts_node__subtree(child))) {
      uint32_t index = iterator.structural_child_index - 1;
      if (index < field_map->child_index) continue;

      // Hidden nodes' fields are "inherited" by their visible parent.
      if (field_map->inherited) {

        // If this is the *last* possible child node for this field,
        // then perform a tail call to avoid recursion.
        if (field_map + 1 == field_map_end) {
          self = child;
          goto recur;
        }

        // Otherwise, descend into this child, but if it doesn't contain
        // the field, continue searching subsequent children.
        else {
          TSNode result = ts_node_child_by_field_id(child, field_id);
          if (result.id) return result;
          field_map++;
          if (field_map == field_map_end) return ts_node__null();
        }
      }

      else if (ts_node__is_relevant(child, true)) {
        return child;
      }

      // If the field refers to a hidden node with visible children,
      // return the first visible child.
      else if (ts_node_child_count(child) > 0 ) {
        return ts_node_child(child, 0);
      }

      // Otherwise, continue searching subsequent children.
      else {
        field_map++;
        if (field_map == field_map_end) return ts_node__null();
      }
    }
  }

  return ts_node__null();
}

static inline const char *ts_node__field_name_from_language(TSNode self, uint32_t structural_child_index) {
    const TSFieldMapEntry *field_map, *field_map_end;
    ts_language_field_map(
      self.tree->language,
      ts_node__subtree(self).ptr->production_id,
      &field_map,
      &field_map_end
    );
    for (; field_map != field_map_end; field_map++) {
      if (!field_map->inherited && field_map->child_index == structural_child_index) {
        return self.tree->language->field_names[field_map->field_id];
      }
    }
    return NULL;
}

const char *ts_node_field_name_for_child(TSNode self, uint32_t child_index) {
  TSNode result = self;
  bool did_descend = true;
  const char *inherited_field_name = NULL;

  while (did_descend) {
    did_descend = false;

    TSNode child;
    uint32_t index = 0;
    NodeChildIterator iterator = ts_node_iterate_children(&result);
    while (ts_node_child_iterator_next(&iterator, &child)) {
      if (ts_node__is_relevant(child, true)) {
        if (index == child_index) {
          if (ts_node_is_extra(child)) {
            return NULL;
          }
          const char *field_name = ts_node__field_name_from_language(result, iterator.structural_child_index - 1);
          if (field_name) return field_name;
          return inherited_field_name;
        }
        index++;
      } else {
        uint32_t grandchild_index = child_index - index;
        uint32_t grandchild_count = ts_node__relevant_child_count(child, true);
        if (grandchild_index < grandchild_count) {
          const char *field_name = ts_node__field_name_from_language(result, iterator.structural_child_index - 1);
          if (field_name) inherited_field_name = field_name;

          did_descend = true;
          result = child;
          child_index = grandchild_index;
          break;
        }
        index += grandchild_count;
      }
    }
  }

  return NULL;
}

const char *ts_node_field_name_for_named_child(TSNode self, uint32_t named_child_index) {
  TSNode result = self;
  bool did_descend = true;
  const char *inherited_field_name = NULL;

  while (did_descend) {
    did_descend = false;

    TSNode child;
    uint32_t index = 0;
    NodeChildIterator iterator = ts_node_iterate_children(&result);
    while (ts_node_child_iterator_next(&iterator, &child)) {
      if (ts_node__is_relevant(child, false)) {
        if (index == named_child_index) {
          if (ts_node_is_extra(child)) {
            return NULL;
          }
          const char *field_name = ts_node__field_name_from_language(result, iterator.structural_child_index - 1);
          if (field_name) return field_name;
          return inherited_field_name;
        }
        index++;
      } else {
        uint32_t named_grandchild_index = named_child_index - index;
        uint32_t grandchild_count = ts_node__relevant_child_count(child, false);
        if (named_grandchild_index < grandchild_count) {
          const char *field_name = ts_node__field_name_from_language(result, iterator.structural_child_index - 1);
          if (field_name) inherited_field_name = field_name;

          did_descend = true;
          result = child;
          named_child_index = named_grandchild_index;
          break;
        }
        index += grandchild_count;
      }
    }
  }

  return NULL;
}

TSNode ts_node_child_by_field_name(
  TSNode self,
  const char *name,
  uint32_t name_length
) {
  TSFieldId field_id = ts_language_field_id_for_name(
    self.tree->language,
    name,
    name_length
  );
  return ts_node_child_by_field_id(self, field_id);
}

uint32_t ts_node_child_count(TSNode self) {
  Subtree tree = ts_node__subtree(self);
  if (ts_subtree_child_count(tree) > 0) {
    return tree.ptr->visible_child_count;
  } else {
    return 0;
  }
}

uint32_t ts_node_named_child_count(TSNode self) {
  Subtree tree = ts_node__subtree(self);
  if (ts_subtree_child_count(tree) > 0) {
    return tree.ptr->named_child_count;
  } else {
    return 0;
  }
}

TSNode ts_node_next_sibling(TSNode self) {
  return ts_node__next_sibling(self, true);
}

TSNode ts_node_next_named_sibling(TSNode self) {
  return ts_node__next_sibling(self, false);
}

TSNode ts_node_prev_sibling(TSNode self) {
  return ts_node__prev_sibling(self, true);
}

TSNode ts_node_prev_named_sibling(TSNode self) {
  return ts_node__prev_sibling(self, false);
}

TSNode ts_node_first_child_for_byte(TSNode self, uint32_t byte) {
  return ts_node__first_child_for_byte(self, byte, true);
}

TSNode ts_node_first_named_child_for_byte(TSNode self, uint32_t byte) {
  return ts_node__first_child_for_byte(self, byte, false);
}

TSNode ts_node_descendant_for_byte_range(
  TSNode self,
  uint32_t start,
  uint32_t end
) {
  return ts_node__descendant_for_byte_range(self, start, end, true);
}

TSNode ts_node_named_descendant_for_byte_range(
  TSNode self,
  uint32_t start,
  uint32_t end
) {
  return ts_node__descendant_for_byte_range(self, start, end, false);
}

TSNode ts_node_descendant_for_point_range(
  TSNode self,
  TSPoint start,
  TSPoint end
) {
  return ts_node__descendant_for_point_range(self, start, end, true);
}

TSNode ts_node_named_descendant_for_point_range(
  TSNode self,
  TSPoint start,
  TSPoint end
) {
  return ts_node__descendant_for_point_range(self, start, end, false);
}

void ts_node_edit(TSNode *self, const TSInputEdit *edit) {
  uint32_t start_byte = ts_node_start_byte(*self);
  TSPoint start_point = ts_node_start_point(*self);

  if (start_byte >= edit->old_end_byte) {
    start_byte = edit->new_end_byte + (start_byte - edit->old_end_byte);
    start_point = point_add(edit->new_end_point, point_sub(start_point, edit->old_end_point));
  } else if (start_byte > edit->start_byte) {
    start_byte = edit->new_end_byte;
    start_point = edit->new_end_point;
  }

  self->context[0] = start_byte;
  self->context[1] = start_point.row;
  self->context[2] = start_point.column;
}



================================================
FILE: lib/src/parser.h
================================================
#ifndef TREE_SITTER_PARSER_H_
#define TREE_SITTER_PARSER_H_

#ifdef __cplusplus
extern "C" {
#endif

#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>

#define ts_builtin_sym_error ((TSSymbol)-1)
#define ts_builtin_sym_end 0
#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024

#ifndef TREE_SITTER_API_H_
typedef uint16_t TSStateId;
typedef uint16_t TSSymbol;
typedef uint16_t TSFieldId;
typedef struct TSLanguage TSLanguage;
typedef struct TSLanguageMetadata {
  uint8_t major_version;
  uint8_t minor_version;
  uint8_t patch_version;
} TSLanguageMetadata;
#endif

typedef struct {
  TSFieldId field_id;
  uint8_t child_index;
  bool inherited;
} TSFieldMapEntry;

// Used to index the field and supertype maps.
typedef struct {
  uint16_t index;
  uint16_t length;
} TSMapSlice;

typedef struct {
  bool visible;
  bool named;
  bool supertype;
} TSSymbolMetadata;

typedef struct TSLexer TSLexer;

struct TSLexer {
  int32_t lookahead;
  TSSymbol result_symbol;
  void (*advance)(TSLexer *, bool);
  void (*mark_end)(TSLexer *);
  uint32_t (*get_column)(TSLexer *);
  bool (*is_at_included_range_start)(const TSLexer *);
  bool (*eof)(const TSLexer *);
  void (*log)(const TSLexer *, const char *, ...);
};

typedef enum {
  TSParseActionTypeShift,
  TSParseActionTypeReduce,
  TSParseActionTypeAccept,
  TSParseActionTypeRecover,
} TSParseActionType;

typedef union {
  struct {
    uint8_t type;
    TSStateId state;
    bool extra;
    bool repetition;
  } shift;
  struct {
    uint8_t type;
    uint8_t child_count;
    TSSymbol symbol;
    int16_t dynamic_precedence;
    uint16_t production_id;
  } reduce;
  uint8_t type;
} TSParseAction;

typedef struct {
  uint16_t lex_state;
  uint16_t external_lex_state;
} TSLexMode;

typedef struct {
  uint16_t lex_state;
  uint16_t external_lex_state;
  uint16_t reserved_word_set_id;
} TSLexerMode;

typedef union {
  TSParseAction action;
  struct {
    uint8_t count;
    bool reusable;
  } entry;
} TSParseActionEntry;

typedef struct {
  int32_t start;
  int32_t end;
} TSCharacterRange;

struct TSLanguage {
  uint32_t abi_version;
  uint32_t symbol_count;
  uint32_t alias_count;
  uint32_t token_count;
  uint32_t external_token_count;
  uint32_t state_count;
  uint32_t large_state_count;
  uint32_t production_id_count;
  uint32_t field_count;
  uint16_t max_alias_sequence_length;
  const uint16_t *parse_table;
  const uint16_t *small_parse_table;
  const uint32_t *small_parse_table_map;
  const TSParseActionEntry *parse_actions;
  const char * const *symbol_names;
  const char * const *field_names;
  const TSMapSlice *field_map_slices;
  const TSFieldMapEntry *field_map_entries;
  const TSSymbolMetadata *symbol_metadata;
  const TSSymbol *public_symbol_map;
  const uint16_t *alias_map;
  const TSSymbol *alias_sequences;
  const TSLexerMode *lex_modes;
  bool (*lex_fn)(TSLexer *, TSStateId);
  bool (*keyword_lex_fn)(TSLexer *, TSStateId);
  TSSymbol keyword_capture_token;
  struct {
    const bool *states;
    const TSSymbol *symbol_map;
    void *(*create)(void);
    void (*destroy)(void *);
    bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist);
    unsigned (*serialize)(void *, char *);
    void (*deserialize)(void *, const char *, unsigned);
  } external_scanner;
  const TSStateId *primary_state_ids;
  const char *name;
  const TSSymbol *reserved_words;
  uint16_t max_reserved_word_set_size;
  uint32_t supertype_count;
  const TSSymbol *supertype_symbols;
  const TSMapSlice *supertype_map_slices;
  const TSSymbol *supertype_map_entries;
  TSLanguageMetadata metadata;
};

static inline bool set_contains(const TSCharacterRange *ranges, uint32_t len, int32_t lookahead) {
  uint32_t index = 0;
  uint32_t size = len - index;
  while (size > 1) {
    uint32_t half_size = size / 2;
    uint32_t mid_index = index + half_size;
    const TSCharacterRange *range = &ranges[mid_index];
    if (lookahead >= range->start && lookahead <= range->end) {
      return true;
    } else if (lookahead > range->end) {
      index = mid_index;
    }
    size -= half_size;
  }
  const TSCharacterRange *range = &ranges[index];
  return (lookahead >= range->start && lookahead <= range->end);
}

/*
 *  Lexer Macros
 */

#ifdef _MSC_VER
#define UNUSED __pragma(warning(suppress : 4101))
#else
#define UNUSED __attribute__((unused))
#endif

#define START_LEXER()           \
  bool result = false;          \
  bool skip = false;            \
  UNUSED                        \
  bool eof = false;             \
  int32_t lookahead;            \
  goto start;                   \
  next_state:                   \
  lexer->advance(lexer, skip);  \
  start:                        \
  skip = false;                 \
  lookahead = lexer->lookahead;

#define ADVANCE(state_value) \
  {                          \
    state = state_value;     \
    goto next_state;         \
  }

#define ADVANCE_MAP(...)                                              \
  {                                                                   \
    static const uint16_t map[] = { __VA_ARGS__ };                    \
    for (uint32_t i = 0; i < sizeof(map) / sizeof(map[0]); i += 2) {  \
      if (map[i] == lookahead) {                                      \
        state = map[i + 1];                                           \
        goto next_state;                                              \
      }                                                               \
    }                                                                 \
  }

#define SKIP(state_value) \
  {                       \
    skip = true;          \
    state = state_value;  \
    goto next_state;      \
  }

#define ACCEPT_TOKEN(symbol_value)     \
  result = true;                       \
  lexer->result_symbol = symbol_value; \
  lexer->mark_end(lexer);

#define END_STATE() return result;

/*
 *  Parse Table Macros
 */

#define SMALL_STATE(id) ((id) - LARGE_STATE_COUNT)

#define STATE(id) id

#define ACTIONS(id) id

#define SHIFT(state_value)            \
  {{                                  \
    .shift = {                        \
      .type = TSParseActionTypeShift, \
      .state = (state_value)          \
    }                                 \
  }}

#define SHIFT_REPEAT(state_value)     \
  {{                                  \
    .shift = {                        \
      .type = TSParseActionTypeShift, \
      .state = (state_value),         \
      .repetition = true              \
    }                                 \
  }}

#define SHIFT_EXTRA()                 \
  {{                                  \
    .shift = {                        \
      .type = TSParseActionTypeShift, \
      .extra = true                   \
    }                                 \
  }}

#define REDUCE(symbol_name, children, precedence, prod_id) \
  {{                                                       \
    .reduce = {                                            \
      .type = TSParseActionTypeReduce,                     \
      .symbol = symbol_name,                               \
      .child_count = children,                             \
      .dynamic_precedence = precedence,                    \
      .production_id = prod_id                             \
    },                                                     \
  }}

#define RECOVER()                    \
  {{                                 \
    .type = TSParseActionTypeRecover \
  }}

#define ACCEPT_INPUT()              \
  {{                                \
    .type = TSParseActionTypeAccept \
  }}

#ifdef __cplusplus
}
#endif

#endif  // TREE_SITTER_PARSER_H_



================================================
FILE: lib/src/point.h
================================================
#ifndef TREE_SITTER_POINT_H_
#define TREE_SITTER_POINT_H_

#include "tree_sitter/api.h"

#define POINT_ZERO ((TSPoint) {0, 0})
#define POINT_MAX ((TSPoint) {UINT32_MAX, UINT32_MAX})

static inline TSPoint point__new(unsigned row, unsigned column) {
  TSPoint result = {row, column};
  return result;
}

static inline TSPoint point_add(TSPoint a, TSPoint b) {
  if (b.row > 0)
    return point__new(a.row + b.row, b.column);
  else
    return point__new(a.row, a.column + b.column);
}

static inline TSPoint point_sub(TSPoint a, TSPoint b) {
  if (a.row > b.row)
    return point__new(a.row - b.row, a.column);
  else
    return point__new(0, (a.column >= b.column) ? a.column - b.column : 0);
}

static inline bool point_lte(TSPoint a, TSPoint b) {
  return (a.row < b.row) || (a.row == b.row && a.column <= b.column);
}

static inline bool point_lt(TSPoint a, TSPoint b) {
  return (a.row < b.row) || (a.row == b.row && a.column < b.column);
}

static inline bool point_gt(TSPoint a, TSPoint b) {
  return (a.row > b.row) || (a.row == b.row && a.column > b.column);
}

static inline bool point_gte(TSPoint a, TSPoint b) {
  return (a.row > b.row) || (a.row == b.row && a.column >= b.column);
}

static inline bool point_eq(TSPoint a, TSPoint b) {
  return a.row == b.row && a.column == b.column;
}

#endif



================================================
FILE: lib/src/reduce_action.h
================================================
#ifndef TREE_SITTER_REDUCE_ACTION_H_
#define TREE_SITTER_REDUCE_ACTION_H_

#ifdef __cplusplus
extern "C" {
#endif

#include "./array.h"
#include "tree_sitter/api.h"

typedef struct {
  uint32_t count;
  TSSymbol symbol;
  int dynamic_precedence;
  unsigned short production_id;
} ReduceAction;

typedef Array(ReduceAction) ReduceActionSet;

static inline void ts_reduce_action_set_add(ReduceActionSet *self,
                                            ReduceAction new_action) {
  for (uint32_t i = 0; i < self->size; i++) {
    ReduceAction action = self->contents[i];
    if (action.symbol == new_action.symbol && action.count == new_action.count)
      return;
  }
  array_push(self, new_action);
}

#ifdef __cplusplus
}
#endif

#endif  // TREE_SITTER_REDUCE_ACTION_H_



================================================
FILE: lib/src/reusable_node.h
================================================
#include "./subtree.h"

typedef struct {
  Subtree tree;
  uint32_t child_index;
  uint32_t byte_offset;
} StackEntry;

typedef struct {
  Array(StackEntry) stack;
  Subtree last_external_token;
} ReusableNode;

static inline ReusableNode reusable_node_new(void) {
  return (ReusableNode) {array_new(), NULL_SUBTREE};
}

static inline void reusable_node_clear(ReusableNode *self) {
  array_clear(&self->stack);
  self->last_external_token = NULL_SUBTREE;
}

static inline Subtree reusable_node_tree(ReusableNode *self) {
  return self->stack.size > 0
    ? self->stack.contents[self->stack.size - 1].tree
    : NULL_SUBTREE;
}

static inline uint32_t reusable_node_byte_offset(ReusableNode *self) {
  return self->stack.size > 0
    ? self->stack.contents[self->stack.size - 1].byte_offset
    : UINT32_MAX;
}

static inline void reusable_node_delete(ReusableNode *self) {
  array_delete(&self->stack);
}

static inline void reusable_node_advance(ReusableNode *self) {
  StackEntry last_entry = *array_back(&self->stack);
  uint32_t byte_offset = last_entry.byte_offset + ts_subtree_total_bytes(last_entry.tree);
  if (ts_subtree_has_external_tokens(last_entry.tree)) {
    self->last_external_token = ts_subtree_last_external_token(last_entry.tree);
  }

  Subtree tree;
  uint32_t next_index;
  do {
    StackEntry popped_entry = array_pop(&self->stack);
    next_index = popped_entry.child_index + 1;
    if (self->stack.size == 0) return;
    tree = array_back(&self->stack)->tree;
  } while (ts_subtree_child_count(tree) <= next_index);

  array_push(&self->stack, ((StackEntry) {
    .tree = ts_subtree_children(tree)[next_index],
    .child_index = next_index,
    .byte_offset = byte_offset,
  }));
}

static inline bool reusable_node_descend(ReusableNode *self) {
  StackEntry last_entry = *array_back(&self->stack);
  if (ts_subtree_child_count(last_entry.tree) > 0) {
    array_push(&self->stack, ((StackEntry) {
      .tree = ts_subtree_children(last_entry.tree)[0],
      .child_index = 0,
      .byte_offset = last_entry.byte_offset,
    }));
    return true;
  } else {
    return false;
  }
}

static inline void reusable_node_advance_past_leaf(ReusableNode *self) {
  while (reusable_node_descend(self)) {}
  reusable_node_advance(self);
}

static inline void reusable_node_reset(ReusableNode *self, Subtree tree) {
  reusable_node_clear(self);
  array_push(&self->stack, ((StackEntry) {
    .tree = tree,
    .child_index = 0,
    .byte_offset = 0,
  }));

  // Never reuse the root node, because it has a non-standard internal structure
  // due to transformations that are applied when it is accepted: adding the EOF
  // child and any extra children.
  if (!reusable_node_descend(self)) {
    reusable_node_clear(self);
  }
}



================================================
FILE: lib/src/stack.c
================================================
#include "./alloc.h"
#include "./language.h"
#include "./subtree.h"
#include "./array.h"
#include "./stack.h"
#include "./length.h"
#include <assert.h>
#include <inttypes.h>
#include <stdio.h>

#define MAX_LINK_COUNT 8
#define MAX_NODE_POOL_SIZE 50
#define MAX_ITERATOR_COUNT 64

#if defined _WIN32 && !defined __GNUC__
#define forceinline __forceinline
#else
#define forceinline static inline __attribute__((always_inline))
#endif

typedef struct StackNode StackNode;

typedef struct {
  StackNode *node;
  Subtree subtree;
  bool is_pending;
} StackLink;

struct StackNode {
  TSStateId state;
  Length position;
  StackLink links[MAX_LINK_COUNT];
  short unsigned int link_count;
  uint32_t ref_count;
  unsigned error_cost;
  unsigned node_count;
  int dynamic_precedence;
};

typedef struct {
  StackNode *node;
  SubtreeArray subtrees;
  uint32_t subtree_count;
  bool is_pending;
} StackIterator;

typedef Array(StackNode *) StackNodeArray;

typedef enum {
  StackStatusActive,
  StackStatusPaused,
  StackStatusHalted,
} StackStatus;

typedef struct {
  StackNode *node;
  StackSummary *summary;
  unsigned node_count_at_last_error;
  Subtree last_external_token;
  Subtree lookahead_when_paused;
  StackStatus status;
} StackHead;

struct Stack {
  Array(StackHead) heads;
  StackSliceArray slices;
  Array(StackIterator) iterators;
  StackNodeArray node_pool;
  StackNode *base_node;
  SubtreePool *subtree_pool;
};

typedef unsigned StackAction;
enum {
  StackActionNone,
  StackActionStop = 1,
  StackActionPop = 2,
};

typedef StackAction (*StackCallback)(void *, const StackIterator *);

static void stack_node_retain(StackNode *self) {
  if (!self)
    return;
  ts_assert(self->ref_count > 0);
  self->ref_count++;
  ts_assert(self->ref_count != 0);
}

static void stack_node_release(
  StackNode *self,
  StackNodeArray *pool,
  SubtreePool *subtree_pool
) {
recur:
  ts_assert(self->ref_count != 0);
  self->ref_count--;
  if (self->ref_count > 0) return;

  StackNode *first_predecessor = NULL;
  if (self->link_count > 0) {
    for (unsigned i = self->link_count - 1; i > 0; i--) {
      StackLink link = self->links[i];
      if (link.subtree.ptr) ts_subtree_release(subtree_pool, link.subtree);
      stack_node_release(link.node, pool, subtree_pool);
    }
    StackLink link = self->links[0];
    if (link.subtree.ptr) ts_subtree_release(subtree_pool, link.subtree);
    first_predecessor = self->links[0].node;
  }

  if (pool->size < MAX_NODE_POOL_SIZE) {
    array_push(pool, self);
  } else {
    ts_free(self);
  }

  if (first_predecessor) {
    self = first_predecessor;
    goto recur;
  }
}

/// Get the number of nodes in the subtree, for the purpose of measuring
/// how much progress has been made by a given version of the stack.
static uint32_t stack__subtree_node_count(Subtree subtree) {
  uint32_t count = ts_subtree_visible_descendant_count(subtree);
  if (ts_subtree_visible(subtree)) count++;

  // Count intermediate error nodes even though they are not visible,
  // because a stack version's node count is used to check whether it
  // has made any progress since the last time it encountered an error.
  if (ts_subtree_symbol(subtree) == ts_builtin_sym_error_repeat) count++;

  return count;
}

static StackNode *stack_node_new(
  StackNode *previous_node,
  Subtree subtree,
  bool is_pending,
  TSStateId state,
  StackNodeArray *pool
) {
  StackNode *node = pool->size > 0
    ? array_pop(pool)
    : ts_malloc(sizeof(StackNode));
  *node = (StackNode) {
    .ref_count = 1,
    .link_count = 0,
    .state = state
  };

  if (previous_node) {
    node->link_count = 1;
    node->links[0] = (StackLink) {
      .node = previous_node,
      .subtree = subtree,
      .is_pending = is_pending,
    };

    node->position = previous_node->position;
    node->error_cost = previous_node->error_cost;
    node->dynamic_precedence = previous_node->dynamic_precedence;
    node->node_count = previous_node->node_count;

    if (subtree.ptr) {
      node->error_cost += ts_subtree_error_cost(subtree);
      node->position = length_add(node->position, ts_subtree_total_size(subtree));
      node->node_count += stack__subtree_node_count(subtree);
      node->dynamic_precedence += ts_subtree_dynamic_precedence(subtree);
    }
  } else {
    node->position = length_zero();
    node->error_cost = 0;
  }

  return node;
}

static bool stack__subtree_is_equivalent(Subtree left, Subtree right) {
  if (left.ptr == right.ptr) return true;
  if (!left.ptr || !right.ptr) return false;

  // Symbols must match
  if (ts_subtree_symbol(left) != ts_subtree_symbol(right)) return false;

  // If both have errors, don't bother keeping both.
  if (ts_subtree_error_cost(left) > 0 && ts_subtree_error_cost(right) > 0) return true;

  return (
    ts_subtree_padding(left).bytes == ts_subtree_padding(right).bytes &&
    ts_subtree_size(left).bytes == ts_subtree_size(right).bytes &&
    ts_subtree_child_count(left) == ts_subtree_child_count(right) &&
    ts_subtree_extra(left) == ts_subtree_extra(right) &&
    ts_subtree_external_scanner_state_eq(left, right)
  );
}

static void stack_node_add_link(
  StackNode *self,
  StackLink link,
  SubtreePool *subtree_pool
) {
  if (link.node == self) return;

  for (int i = 0; i < self->link_count; i++) {
    StackLink *existing_link = &self->links[i];
    if (stack__subtree_is_equivalent(existing_link->subtree, link.subtree)) {
      // In general, we preserve ambiguities until they are removed from the stack
      // during a pop operation where multiple paths lead to the same node. But in
      // the special case where two links directly connect the same pair of nodes,
      // we can safely remove the ambiguity ahead of time without changing behavior.
      if (existing_link->node == link.node) {
        if (
          ts_subtree_dynamic_precedence(link.subtree) >
          ts_subtree_dynamic_precedence(existing_link->subtree)
        ) {
          ts_subtree_retain(link.subtree);
          ts_subtree_release(subtree_pool, existing_link->subtree);
          existing_link->subtree = link.subtree;
          self->dynamic_precedence =
            link.node->dynamic_precedence + ts_subtree_dynamic_precedence(link.subtree);
        }
        return;
      }

      // If the previous nodes are mergeable, merge them recursively.
      if (
        existing_link->node->state == link.node->state &&
        existing_link->node->position.bytes == link.node->position.bytes &&
        existing_link->node->error_cost == link.node->error_cost
      ) {
        for (int j = 0; j < link.node->link_count; j++) {
          stack_node_add_link(existing_link->node, link.node->links[j], subtree_pool);
        }
        int32_t dynamic_precedence = link.node->dynamic_precedence;
        if (link.subtree.ptr) {
          dynamic_precedence += ts_subtree_dynamic_precedence(link.subtree);
        }
        if (dynamic_precedence > self->dynamic_precedence) {
          self->dynamic_precedence = dynamic_precedence;
        }
        return;
      }
    }
  }

  if (self->link_count == MAX_LINK_COUNT) return;

  stack_node_retain(link.node);
  unsigned node_count = link.node->node_count;
  int dynamic_precedence = link.node->dynamic_precedence;
  self->links[self->link_count++] = link;

  if (link.subtree.ptr) {
    ts_subtree_retain(link.subtree);
    node_count += stack__subtree_node_count(link.subtree);
    dynamic_precedence += ts_subtree_dynamic_precedence(link.subtree);
  }

  if (node_count > self->node_count) self->node_count = node_count;
  if (dynamic_precedence > self->dynamic_precedence) self->dynamic_precedence = dynamic_precedence;
}

static void stack_head_delete(
  StackHead *self,
  StackNodeArray *pool,
  SubtreePool *subtree_pool
) {
  if (self->node) {
    if (self->last_external_token.ptr) {
      ts_subtree_release(subtree_pool, self->last_external_token);
    }
    if (self->lookahead_when_paused.ptr) {
      ts_subtree_release(subtree_pool, self->lookahead_when_paused);
    }
    if (self->summary) {
      array_delete(self->summary);
      ts_free(self->summary);
    }
    stack_node_release(self->node, pool, subtree_pool);
  }
}

static StackVersion ts_stack__add_version(
  Stack *self,
  StackVersion original_version,
  StackNode *node
) {
  StackHead head = {
    .node = node,
    .node_count_at_last_error = array_get(&self->heads, original_version)->node_count_at_last_error,
    .last_external_token = array_get(&self->heads, original_version)->last_external_token,
    .status = StackStatusActive,
    .lookahead_when_paused = NULL_SUBTREE,
  };
  array_push(&self->heads, head);
  stack_node_retain(node);
  if (head.last_external_token.ptr) ts_subtree_retain(head.last_external_token);
  return (StackVersion)(self->heads.size - 1);
}

static void ts_stack__add_slice(
  Stack *self,
  StackVersion original_version,
  StackNode *node,
  SubtreeArray *subtrees
) {
  for (uint32_t i = self->slices.size - 1; i + 1 > 0; i--) {
    StackVersion version = array_get(&self->slices, i)->version;
    if (array_get(&self->heads, version)->node == node) {
      StackSlice slice = {*subtrees, version};
      array_insert(&self->slices, i + 1, slice);
      return;
    }
  }

  StackVersion version = ts_stack__add_version(self, original_version, node);
  StackSlice slice = { *subtrees, version };
  array_push(&self->slices, slice);
}

static StackSliceArray stack__iter(
  Stack *self,
  StackVersion version,
  StackCallback callback,
  void *payload,
  int goal_subtree_count
) {
  array_clear(&self->slices);
  array_clear(&self->iterators);

  StackHead *head = array_get(&self->heads, version);
  StackIterator new_iterator = {
    .node = head->node,
    .subtrees = array_new(),
    .subtree_count = 0,
    .is_pending = true,
  };

  bool include_subtrees = false;
  if (goal_subtree_count >= 0) {
    include_subtrees = true;
    array_reserve(&new_iterator.subtrees, (uint32_t)ts_subtree_alloc_size(goal_subtree_count) / sizeof(Subtree));
  }

  array_push(&self->iterators, new_iterator);

  while (self->iterators.size > 0) {
    for (uint32_t i = 0, size = self->iterators.size; i < size; i++) {
      StackIterator *iterator = array_get(&self->iterators, i);
      StackNode *node = iterator->node;

      StackAction action = callback(payload, iterator);
      bool should_pop = action & StackActionPop;
      bool should_stop = action & StackActionStop || node->link_count == 0;

      if (should_pop) {
        SubtreeArray subtrees = iterator->subtrees;
        if (!should_stop) {
          ts_subtree_array_copy(subtrees, &subtrees);
        }
        ts_subtree_array_reverse(&subtrees);
        ts_stack__add_slice(
          self,
          version,
          node,
          &subtrees
        );
      }

      if (should_stop) {
        if (!should_pop) {
          ts_subtree_array_delete(self->subtree_pool, &iterator->subtrees);
        }
        array_erase(&self->iterators, i);
        i--, size--;
        continue;
      }

      for (uint32_t j = 1; j <= node->link_count; j++) {
        StackIterator *next_iterator;
        StackLink link;
        if (j == node->link_count) {
          link = node->links[0];
          next_iterator = array_get(&self->iterators, i);
        } else {
          if (self->iterators.size >= MAX_ITERATOR_COUNT) continue;
          link = node->links[j];
          StackIterator current_iterator = *array_get(&self->iterators, i);
          array_push(&self->iterators, current_iterator);
          next_iterator = array_back(&self->iterators);
          ts_subtree_array_copy(next_iterator->subtrees, &next_iterator->subtrees);
        }

        next_iterator->node = link.node;
        if (link.subtree.ptr) {
          if (include_subtrees) {
            array_push(&next_iterator->subtrees, link.subtree);
            ts_subtree_retain(link.subtree);
          }

          if (!ts_subtree_extra(link.subtree)) {
            next_iterator->subtree_count++;
            if (!link.is_pending) {
              next_iterator->is_pending = false;
            }
          }
        } else {
          next_iterator->subtree_count++;
          next_iterator->is_pending = false;
        }
      }
    }
  }

  return self->slices;
}

Stack *ts_stack_new(SubtreePool *subtree_pool) {
  Stack *self = ts_calloc(1, sizeof(Stack));

  array_init(&self->heads);
  array_init(&self->slices);
  array_init(&self->iterators);
  array_init(&self->node_pool);
  array_reserve(&self->heads, 4);
  array_reserve(&self->slices, 4);
  array_reserve(&self->iterators, 4);
  array_reserve(&self->node_pool, MAX_NODE_POOL_SIZE);

  self->subtree_pool = subtree_pool;
  self->base_node = stack_node_new(NULL, NULL_SUBTREE, false, 1, &self->node_pool);
  ts_stack_clear(self);

  return self;
}

void ts_stack_delete(Stack *self) {
  if (self->slices.contents)
    array_delete(&self->slices);
  if (self->iterators.contents)
    array_delete(&self->iterators);
  stack_node_release(self->base_node, &self->node_pool, self->subtree_pool);
  for (uint32_t i = 0; i < self->heads.size; i++) {
    stack_head_delete(array_get(&self->heads, i), &self->node_pool, self->subtree_pool);
  }
  array_clear(&self->heads);
  if (self->node_pool.contents) {
    for (uint32_t i = 0; i < self->node_pool.size; i++)
      ts_free(*array_get(&self->node_pool, i));
    array_delete(&self->node_pool);
  }
  array_delete(&self->heads);
  ts_free(self);
}

uint32_t ts_stack_version_count(const Stack *self) {
  return self->heads.size;
}

uint32_t ts_stack_halted_version_count(Stack *self) {
  uint32_t count = 0;
  for (uint32_t i = 0; i < self->heads.size; i++) {
    StackHead *head = array_get(&self->heads, i);
    if (head->status == StackStatusHalted) {
      count++;
    }
  }
  return count;
}

TSStateId ts_stack_state(const Stack *self, StackVersion version) {
  return array_get(&self->heads, version)->node->state;
}

Length ts_stack_position(const Stack *self, StackVersion version) {
  return array_get(&self->heads, version)->node->position;
}

Subtree ts_stack_last_external_token(const Stack *self, StackVersion version) {
  return array_get(&self->heads, version)->last_external_token;
}

void ts_stack_set_last_external_token(Stack *self, StackVersion version, Subtree token) {
  StackHead *head = array_get(&self->heads, version);
  if (token.ptr) ts_subtree_retain(token);
  if (head->last_external_token.ptr) ts_subtree_release(self->subtree_pool, head->last_external_token);
  head->last_external_token = token;
}

unsigned ts_stack_error_cost(const Stack *self, StackVersion version) {
  StackHead *head = array_get(&self->heads, version);
  unsigned result = head->node->error_cost;
  if (
    head->status == StackStatusPaused ||
    (head->node->state == ERROR_STATE && !head->node->links[0].subtree.ptr)) {
    result += ERROR_COST_PER_RECOVERY;
  }
  return result;
}

unsigned ts_stack_node_count_since_error(const Stack *self, StackVersion version) {
  StackHead *head = array_get(&self->heads, version);
  if (head->node->node_count < head->node_count_at_last_error) {
    head->node_count_at_last_error = head->node->node_count;
  }
  return head->node->node_count - head->node_count_at_last_error;
}

void ts_stack_push(
  Stack *self,
  StackVersion version,
  Subtree subtree,
  bool pending,
  TSStateId state
) {
  StackHead *head = array_get(&self->heads, version);
  StackNode *new_node = stack_node_new(head->node, subtree, pending, state, &self->node_pool);
  if (!subtree.ptr) head->node_count_at_last_error = new_node->node_count;
  head->node = new_node;
}

forceinline StackAction pop_count_callback(void *payload, const StackIterator *iterator) {
  unsigned *goal_subtree_count = payload;
  if (iterator->subtree_count == *goal_subtree_count) {
    return StackActionPop | StackActionStop;
  } else {
    return StackActionNone;
  }
}

StackSliceArray ts_stack_pop_count(Stack *self, StackVersion version, uint32_t count) {
  return stack__iter(self, version, pop_count_callback, &count, (int)count);
}


forceinline StackAction pop_pending_callback(void *payload, const StackIterator *iterator) {
  (void)payload;
  if (iterator->subtree_count >= 1) {
    if (iterator->is_pending) {
      return StackActionPop | StackActionStop;
    } else {
      return StackActionStop;
    }
  } else {
    return StackActionNone;
  }
}

StackSliceArray ts_stack_pop_pending(Stack *self, StackVersion version) {
  StackSliceArray pop = stack__iter(self, version, pop_pending_callback, NULL, 0);
  if (pop.size > 0) {
    ts_stack_renumber_version(self, array_get(&pop, 0)->version, version);
    array_get(&pop, 0)->version = version;
  }
  return pop;
}

forceinline StackAction pop_error_callback(void *payload, const StackIterator *iterator) {
  if (iterator->subtrees.size > 0) {
    bool *found_error = payload;
    if (!*found_error && ts_subtree_is_error(*array_get(&iterator->subtrees, 0))) {
      *found_error = true;
      return StackActionPop | StackActionStop;
    } else {
      return StackActionStop;
    }
  } else {
    return StackActionNone;
  }
}

SubtreeArray ts_stack_pop_error(Stack *self, StackVersion version) {
  StackNode *node = array_get(&self->heads, version)->node;
  for (unsigned i = 0; i < node->link_count; i++) {
    if (node->links[i].subtree.ptr && ts_subtree_is_error(node->links[i].subtree)) {
      bool found_error = false;
      StackSliceArray pop = stack__iter(self, version, pop_error_callback, &found_error, 1);
      if (pop.size > 0) {
        ts_assert(pop.size == 1);
        ts_stack_renumber_version(self, array_get(&pop, 0)->version, version);
        return array_get(&pop, 0)->subtrees;
      }
      break;
    }
  }
  return (SubtreeArray) {.size = 0};
}

forceinline StackAction pop_all_callback(void *payload, const StackIterator *iterator) {
  (void)payload;
  return iterator->node->link_count == 0 ? StackActionPop : StackActionNone;
}

StackSliceArray ts_stack_pop_all(Stack *self, StackVersion version) {
  return stack__iter(self, version, pop_all_callback, NULL, 0);
}

typedef struct {
  StackSummary *summary;
  unsigned max_depth;
} SummarizeStackSession;

forceinline StackAction summarize_stack_callback(void *payload, const StackIterator *iterator) {
  SummarizeStackSession *session = payload;
  TSStateId state = iterator->node->state;
  unsigned depth = iterator->subtree_count;
  if (depth > session->max_depth) return StackActionStop;
  for (unsigned i = session->summary->size - 1; i + 1 > 0; i--) {
    StackSummaryEntry entry = *array_get(session->summary, i);
    if (entry.depth < depth) break;
    if (entry.depth == depth && entry.state == state) return StackActionNone;
  }
  array_push(session->summary, ((StackSummaryEntry) {
    .position = iterator->node->position,
    .depth = depth,
    .state = state,
  }));
  return StackActionNone;
}

void ts_stack_record_summary(Stack *self, StackVersion version, unsigned max_depth) {
  SummarizeStackSession session = {
    .summary = ts_malloc(sizeof(StackSummary)),
    .max_depth = max_depth
  };
  array_init(session.summary);
  stack__iter(self, version, summarize_stack_callback, &session, -1);
  StackHead *head = array_get(&self->heads, version);
  if (head->summary) {
    array_delete(head->summary);
    ts_free(head->summary);
  }
  head->summary = session.summary;
}

StackSummary *ts_stack_get_summary(Stack *self, StackVersion version) {
  return array_get(&self->heads, version)->summary;
}

int ts_stack_dynamic_precedence(Stack *self, StackVersion version) {
  return array_get(&self->heads, version)->node->dynamic_precedence;
}

bool ts_stack_has_advanced_since_error(const Stack *self, StackVersion version) {
  const StackHead *head = array_get(&self->heads, version);
  const StackNode *node = head->node;
  if (node->error_cost == 0) return true;
  while (node) {
    if (node->link_count > 0) {
      Subtree subtree = node->links[0].subtree;
      if (subtree.ptr) {
        if (ts_subtree_total_bytes(subtree) > 0) {
          return true;
        } else if (
          node->node_count > head->node_count_at_last_error &&
          ts_subtree_error_cost(subtree) == 0
        ) {
          node = node->links[0].node;
          continue;
        }
      }
    }
    break;
  }
  return false;
}

void ts_stack_remove_version(Stack *self, StackVersion version) {
  stack_head_delete(array_get(&self->heads, version), &self->node_pool, self->subtree_pool);
  array_erase(&self->heads, version);
}

void ts_stack_renumber_version(Stack *self, StackVersion v1, StackVersion v2) {
  if (v1 == v2) return;
  ts_assert(v2 < v1);
  ts_assert((uint32_t)v1 < self->heads.size);
  StackHead *source_head = array_get(&self->heads, v1);
  StackHead *target_head = array_get(&self->heads, v2);
  if (target_head->summary && !source_head->summary) {
    source_head->summary = target_head->summary;
    target_head->summary = NULL;
  }
  stack_head_delete(target_head, &self->node_pool, self->subtree_pool);
  *target_head = *source_head;
  array_erase(&self->heads, v1);
}

void ts_stack_swap_versions(Stack *self, StackVersion v1, StackVersion v2) {
  StackHead temporary_head = *array_get(&self->heads, v1);
  *array_get(&self->heads, v1) = *array_get(&self->heads, v2);
  *array_get(&self->heads, v2) = temporary_head;
}

StackVersion ts_stack_copy_version(Stack *self, StackVersion version) {
  ts_assert(version < self->heads.size);
  StackHead version_head = *array_get(&self->heads, version);
  array_push(&self->heads, version_head);
  StackHead *head = array_back(&self->heads);
  stack_node_retain(head->node);
  if (head->last_external_token.ptr) ts_subtree_retain(head->last_external_token);
  head->summary = NULL;
  return self->heads.size - 1;
}

bool ts_stack_merge(Stack *self, StackVersion version1, StackVersion version2) {
  if (!ts_stack_can_merge(self, version1, version2)) return false;
  StackHead *head1 = array_get(&self->heads, version1);
  StackHead *head2 = array_get(&self->heads, version2);
  for (uint32_t i = 0; i < head2->node->link_count; i++) {
    stack_node_add_link(head1->node, head2->node->links[i], self->subtree_pool);
  }
  if (head1->node->state == ERROR_STATE) {
    head1->node_count_at_last_error = head1->node->node_count;
  }
  ts_stack_remove_version(self, version2);
  return true;
}

bool ts_stack_can_merge(Stack *self, StackVersion version1, StackVersion version2) {
  StackHead *head1 = array_get(&self->heads, version1);
  StackHead *head2 = array_get(&self->heads, version2);
  return
    head1->status == StackStatusActive &&
    head2->status == StackStatusActive &&
    head1->node->state == head2->node->state &&
    head1->node->position.bytes == head2->node->position.bytes &&
    head1->node->error_cost == head2->node->error_cost &&
    ts_subtree_external_scanner_state_eq(head1->last_external_token, head2->last_external_token);
}

void ts_stack_halt(Stack *self, StackVersion version) {
  array_get(&self->heads, version)->status = StackStatusHalted;
}

void ts_stack_pause(Stack *self, StackVersion version, Subtree lookahead) {
  StackHead *head = array_get(&self->heads, version);
  head->status = StackStatusPaused;
  head->lookahead_when_paused = lookahead;
  head->node_count_at_last_error = head->node->node_count;
}

bool ts_stack_is_active(const Stack *self, StackVersion version) {
  return array_get(&self->heads, version)->status == StackStatusActive;
}

bool ts_stack_is_halted(const Stack *self, StackVersion version) {
  return array_get(&self->heads, version)->status == StackStatusHalted;
}

bool ts_stack_is_paused(const Stack *self, StackVersion version) {
  return array_get(&self->heads, version)->status == StackStatusPaused;
}

Subtree ts_stack_resume(Stack *self, StackVersion version) {
  StackHead *head = array_get(&self->heads, version);
  ts_assert(head->status == StackStatusPaused);
  Subtree result = head->lookahead_when_paused;
  head->status = StackStatusActive;
  head->lookahead_when_paused = NULL_SUBTREE;
  return result;
}

void ts_stack_clear(Stack *self) {
  stack_node_retain(self->base_node);
  for (uint32_t i = 0; i < self->heads.size; i++) {
    stack_head_delete(array_get(&self->heads, i), &self->node_pool, self->subtree_pool);
  }
  array_clear(&self->heads);
  array_push(&self->heads, ((StackHead) {
    .node = self->base_node,
    .status = StackStatusActive,
    .last_external_token = NULL_SUBTREE,
    .lookahead_when_paused = NULL_SUBTREE,
  }));
}

bool ts_stack_print_dot_graph(Stack *self, const TSLanguage *language, FILE *f) {
  array_reserve(&self->iterators, 32);
  if (!f) f = stderr;

  fprintf(f, "digraph stack {\n");
  fprintf(f, "rankdir=\"RL\";\n");
  fprintf(f, "edge [arrowhead=none]\n");

  Array(StackNode *) visited_nodes = array_new();

  array_clear(&self->iterators);
  for (uint32_t i = 0; i < self->heads.size; i++) {
    StackHead *head = array_get(&self->heads, i);
    if (head->status == StackStatusHalted) continue;

    fprintf(f, "node_head_%u [shape=none, label=\"\"]\n", i);
    fprintf(f, "node_head_%u -> node_%p [", i, (void *)head->node);

    if (head->status == StackStatusPaused) {
      fprintf(f, "color=red ");
    }
    fprintf(f,
      "label=%u, fontcolor=blue, weight=10000, labeltooltip=\"node_count: %u\nerror_cost: %u",
      i,
      ts_stack_node_count_since_error(self, i),
      ts_stack_error_cost(self, i)
    );

    if (head->summary) {
      fprintf(f, "\nsummary:");
      for (uint32_t j = 0; j < head->summary->size; j++) fprintf(f, " %u", array_get(head->summary, j)->state);
    }

    if (head->last_external_token.ptr) {
      const ExternalScannerState *state = &head->last_external_token.ptr->external_scanner_state;
      const char *data = ts_external_scanner_state_data(state);
      fprintf(f, "\nexternal_scanner_state:");
      for (uint32_t j = 0; j < state->length; j++) fprintf(f, " %2X", data[j]);
    }

    fprintf(f, "\"]\n");
    array_push(&self->iterators, ((StackIterator) {
      .node = head->node
    }));
  }

  bool all_iterators_done = false;
  while (!all_iterators_done) {
    all_iterators_done = true;

    for (uint32_t i = 0; i < self->iterators.size; i++) {
      StackIterator iterator = *array_get(&self->iterators, i);
      StackNode *node = iterator.node;

      for (uint32_t j = 0; j < visited_nodes.size; j++) {
        if (*array_get(&visited_nodes, j) == node) {
          node = NULL;
          break;
        }
      }

      if (!node) continue;
      all_iterators_done = false;

      fprintf(f, "node_%p [", (void *)node);
      if (node->state == ERROR_STATE) {
        fprintf(f, "label=\"?\"");
      } else if (
        node->link_count == 1 &&
        node->links[0].subtree.ptr &&
        ts_subtree_extra(node->links[0].subtree)
      ) {
        fprintf(f, "shape=point margin=0 label=\"\"");
      } else {
        fprintf(f, "label=\"%d\"", node->state);
      }

      fprintf(
        f,
        " tooltip=\"position: %u,%u\nnode_count:%u\nerror_cost: %u\ndynamic_precedence: %d\"];\n",
        node->position.extent.row + 1,
        node->position.extent.column,
        node->node_count,
        node->error_cost,
        node->dynamic_precedence
      );

      for (int j = 0; j < node->link_count; j++) {
        StackLink link = node->links[j];
        fprintf(f, "node_%p -> node_%p [", (void *)node, (void *)link.node);
        if (link.is_pending) fprintf(f, "style=dashed ");
        if (link.subtree.ptr && ts_subtree_extra(link.subtree)) fprintf(f, "fontcolor=gray ");

        if (!link.subtree.ptr) {
          fprintf(f, "color=red");
        } else {
          fprintf(f, "label=\"");
          bool quoted = ts_subtree_visible(link.subtree) && !ts_subtree_named(link.subtree);
          if (quoted) fprintf(f, "'");
          ts_language_write_symbol_as_dot_string(language, f, ts_subtree_symbol(link.subtree));
          if (quoted) fprintf(f, "'");
          fprintf(f, "\"");
          fprintf(
            f,
            "labeltooltip=\"error_cost: %u\ndynamic_precedence: %" PRId32 "\"",
            ts_subtree_error_cost(link.subtree),
            ts_subtree_dynamic_precedence(link.subtree)
          );
        }

        fprintf(f, "];\n");

        StackIterator *next_iterator;
        if (j == 0) {
          next_iterator = array_get(&self->iterators, i);
        } else {
          array_push(&self->iterators, iterator);
          next_iterator = array_back(&self->iterators);
        }
        next_iterator->node = link.node;
      }

      array_push(&visited_nodes, node);
    }
  }

  fprintf(f, "}\n");

  array_delete(&visited_nodes);
  return true;
}

#undef forceinline



================================================
FILE: lib/src/stack.h
================================================
#ifndef TREE_SITTER_PARSE_STACK_H_
#define TREE_SITTER_PARSE_STACK_H_

#ifdef __cplusplus
extern "C" {
#endif

#include "./array.h"
#include "./subtree.h"
#include <stdio.h>

typedef struct Stack Stack;

typedef unsigned StackVersion;
#define STACK_VERSION_NONE ((StackVersion)-1)

typedef struct {
  SubtreeArray subtrees;
  StackVersion version;
} StackSlice;
typedef Array(StackSlice) StackSliceArray;

typedef struct {
  Length position;
  unsigned depth;
  TSStateId state;
} StackSummaryEntry;
typedef Array(StackSummaryEntry) StackSummary;

// Create a stack.
Stack *ts_stack_new(SubtreePool *subtree_pool);

// Release the memory reserved for a given stack.
void ts_stack_delete(Stack *self);

// Get the stack's current number of versions.
uint32_t ts_stack_version_count(const Stack *self);

// Get the stack's current number of halted versions.
uint32_t ts_stack_halted_version_count(Stack *self);

// Get the state at the top of the given version of the stack. If the stack is
// empty, this returns the initial state, 0.
TSStateId ts_stack_state(const Stack *self, StackVersion version);

// Get the last external token associated with a given version of the stack.
Subtree ts_stack_last_external_token(const Stack *self, StackVersion version);

// Set the last external token associated with a given version of the stack.
void ts_stack_set_last_external_token(Stack *self, StackVersion version, Subtree token);

// Get the position of the given version of the stack within the document.
Length ts_stack_position(const Stack *, StackVersion);

// Push a tree and state onto the given version of the stack.
//
// This transfers ownership of the tree to the Stack. Callers that
// need to retain ownership of the tree for their own purposes should
// first retain the tree.
void ts_stack_push(Stack *self, StackVersion version, Subtree subtree, bool pending, TSStateId state);

// Pop the given number of entries from the given version of the stack. This
// operation can increase the number of stack versions by revealing multiple
// versions which had previously been merged. It returns an array that
// specifies the index of each revealed version and the trees that were
// removed from that version.
StackSliceArray ts_stack_pop_count(Stack *self, StackVersion version, uint32_t count);

// Remove an error at the top of the given version of the stack.
SubtreeArray ts_stack_pop_error(Stack *self, StackVersion version);

// Remove any pending trees from the top of the given version of the stack.
StackSliceArray ts_stack_pop_pending(Stack *self, StackVersion version);

// Remove all trees from the given version of the stack.
StackSliceArray ts_stack_pop_all(Stack *self, StackVersion version);

// Get the maximum number of tree nodes reachable from this version of the stack
// since the last error was detected.
unsigned ts_stack_node_count_since_error(const Stack *self, StackVersion version);

int ts_stack_dynamic_precedence(Stack *self, StackVersion version);

bool ts_stack_has_advanced_since_error(const Stack *self, StackVersion version);

// Compute a summary of all the parse states near the top of the given
// version of the stack and store the summary for later retrieval.
void ts_stack_record_summary(Stack *self, StackVersion version, unsigned max_depth);

// Retrieve a summary of all the parse states near the top of the
// given version of the stack.
StackSummary *ts_stack_get_summary(Stack *self, StackVersion version);

// Get the total cost of all errors on the given version of the stack.
unsigned ts_stack_error_cost(const Stack *self, StackVersion version);

// Merge the given two stack versions if possible, returning true
// if they were successfully merged and false otherwise.
bool ts_stack_merge(Stack *self, StackVersion version1, StackVersion version2);

// Determine whether the given two stack versions can be merged.
bool ts_stack_can_merge(Stack *self, StackVersion version1, StackVersion version2);

Subtree ts_stack_resume(Stack *self, StackVersion version);

void ts_stack_pause(Stack *self, StackVersion version, Subtree lookahead);

void ts_stack_halt(Stack *self, StackVersion version);

bool ts_stack_is_active(const Stack *self, StackVersion version);

bool ts_stack_is_paused(const Stack *self, StackVersion version);

bool ts_stack_is_halted(const Stack *self, StackVersion version);

void ts_stack_renumber_version(Stack *self, StackVersion v1, StackVersion v2);

void ts_stack_swap_versions(Stack *, StackVersion v1, StackVersion v2);

StackVersion ts_stack_copy_version(Stack *self, StackVersion version);

// Remove the given version from the stack.
void ts_stack_remove_version(Stack *self, StackVersion version);

void ts_stack_clear(Stack *self);

bool ts_stack_print_dot_graph(Stack *self, const TSLanguage *language, FILE *f);

#ifdef __cplusplus
}
#endif

#endif  // TREE_SITTER_PARSE_STACK_H_



================================================
FILE: lib/src/subtree.c
================================================
#include <ctype.h>
#include <stdint.h>
#include <stdbool.h>
#include <string.h>
#include <stdio.h>
#include "./alloc.h"
#include "./array.h"
#include "./atomic.h"
#include "./subtree.h"
#include "./length.h"
#include "./language.h"
#include "./error_costs.h"
#include "./ts_assert.h"
#include <stddef.h>

typedef struct {
  Length start;
  Length old_end;
  Length new_end;
} Edit;

#define TS_MAX_INLINE_TREE_LENGTH UINT8_MAX
#define TS_MAX_TREE_POOL_SIZE 32

// ExternalScannerState

void ts_external_scanner_state_init(ExternalScannerState *self, const char *data, unsigned length) {
  self->length = length;
  if (length > sizeof(self->short_data)) {
    self->long_data = ts_malloc(length);
    memcpy(self->long_data, data, length);
  } else {
    memcpy(self->short_data, data, length);
  }
}

ExternalScannerState ts_external_scanner_state_copy(const ExternalScannerState *self) {
  ExternalScannerState result = *self;
  if (self->length > sizeof(self->short_data)) {
    result.long_data = ts_malloc(self->length);
    memcpy(result.long_data, self->long_data, self->length);
  }
  return result;
}

void ts_external_scanner_state_delete(ExternalScannerState *self) {
  if (self->length > sizeof(self->short_data)) {
    ts_free(self->long_data);
  }
}

const char *ts_external_scanner_state_data(const ExternalScannerState *self) {
  if (self->length > sizeof(self->short_data)) {
    return self->long_data;
  } else {
    return self->short_data;
  }
}

bool ts_external_scanner_state_eq(const ExternalScannerState *self, const char *buffer, unsigned length) {
  return
    self->length == length &&
    memcmp(ts_external_scanner_state_data(self), buffer, length) == 0;
}

// SubtreeArray

void ts_subtree_array_copy(SubtreeArray self, SubtreeArray *dest) {
  dest->size = self.size;
  dest->capacity = self.capacity;
  dest->contents = self.contents;
  if (self.capacity > 0) {
    dest->contents = ts_calloc(self.capacity, sizeof(Subtree));
    memcpy(dest->contents, self.contents, self.size * sizeof(Subtree));
    for (uint32_t i = 0; i < self.size; i++) {
      ts_subtree_retain(*array_get(dest, i));
    }
  }
}

void ts_subtree_array_clear(SubtreePool *pool, SubtreeArray *self) {
  for (uint32_t i = 0; i < self->size; i++) {
    ts_subtree_release(pool, *array_get(self, i));
  }
  array_clear(self);
}

void ts_subtree_array_delete(SubtreePool *pool, SubtreeArray *self) {
  ts_subtree_array_clear(pool, self);
  array_delete(self);
}

void ts_subtree_array_remove_trailing_extras(
  SubtreeArray *self,
  SubtreeArray *destination
) {
  array_clear(destination);
  while (self->size > 0) {
    Subtree last = *array_get(self, self->size - 1);
    if (ts_subtree_extra(last)) {
      self->size--;
      array_push(destination, last);
    } else {
      break;
    }
  }
  ts_subtree_array_reverse(destination);
}

void ts_subtree_array_reverse(SubtreeArray *self) {
  for (uint32_t i = 0, limit = self->size / 2; i < limit; i++) {
    size_t reverse_index = self->size - 1 - i;
    Subtree swap = *array_get(self, i);
    *array_get(self, i) = *array_get(self, reverse_index);
    *array_get(self, reverse_index) = swap;
  }
}

// SubtreePool

SubtreePool ts_subtree_pool_new(uint32_t capacity) {
  SubtreePool self = {array_new(), array_new()};
  array_reserve(&self.free_trees, capacity);
  return self;
}

void ts_subtree_pool_delete(SubtreePool *self) {
  if (self->free_trees.contents) {
    for (unsigned i = 0; i < self->free_trees.size; i++) {
      ts_free(array_get(&self->free_trees, i)->ptr);
    }
    array_delete(&self->free_trees);
  }
  if (self->tree_stack.contents) array_delete(&self->tree_stack);
}

static SubtreeHeapData *ts_subtree_pool_allocate(SubtreePool *self) {
  if (self->free_trees.size > 0) {
    return array_pop(&self->free_trees).ptr;
  } else {
    return ts_malloc(sizeof(SubtreeHeapData));
  }
}

static void ts_subtree_pool_free(SubtreePool *self, SubtreeHeapData *tree) {
  if (self->free_trees.capacity > 0 && self->free_trees.size + 1 <= TS_MAX_TREE_POOL_SIZE) {
    array_push(&self->free_trees, (MutableSubtree) {.ptr = tree});
  } else {
    ts_free(tree);
  }
}

// Subtree

static inline bool ts_subtree_can_inline(Length padding, Length size, uint32_t lookahead_bytes) {
  return
    padding.bytes < TS_MAX_INLINE_TREE_LENGTH &&
    padding.extent.row < 16 &&
    padding.extent.column < TS_MAX_INLINE_TREE_LENGTH &&
    size.bytes < TS_MAX_INLINE_TREE_LENGTH &&
    size.extent.row == 0 &&
    size.extent.column < TS_MAX_INLINE_TREE_LENGTH &&
    lookahead_bytes < 16;
}

Subtree ts_subtree_new_leaf(
  SubtreePool *pool, TSSymbol symbol, Length padding, Length size,
  uint32_t lookahead_bytes, TSStateId parse_state,
  bool has_external_tokens, bool depends_on_column,
  bool is_keyword, const TSLanguage *language
) {
  TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol);
  bool extra = symbol == ts_builtin_sym_end;

  bool is_inline = (
    symbol <= UINT8_MAX &&
    !has_external_tokens &&
    ts_subtree_can_inline(padding, size, lookahead_bytes)
  );

  if (is_inline) {
    return (Subtree) {{
      .parse_state = parse_state,
      .symbol = symbol,
      .padding_bytes = padding.bytes,
      .padding_rows = padding.extent.row,
      .padding_columns = padding.extent.column,
      .size_bytes = size.bytes,
      .lookahead_bytes = lookahead_bytes,
      .visible = metadata.visible,
      .named = metadata.named,
      .extra = extra,
      .has_changes = false,
      .is_missing = false,
      .is_keyword = is_keyword,
      .is_inline = true,
    }};
  } else {
    SubtreeHeapData *data = ts_subtree_pool_allocate(pool);
    *data = (SubtreeHeapData) {
      .ref_count = 1,
      .padding = padding,
      .size = size,
      .lookahead_bytes = lookahead_bytes,
      .error_cost = 0,
      .child_count = 0,
      .symbol = symbol,
      .parse_state = parse_state,
      .visible = metadata.visible,
      .named = metadata.named,
      .extra = extra,
      .fragile_left = false,
      .fragile_right = false,
      .has_changes = false,
      .has_external_tokens = has_external_tokens,
      .has_external_scanner_state_change = false,
      .depends_on_column = depends_on_column,
      .is_missing = false,
      .is_keyword = is_keyword,
      {{.first_leaf = {.symbol = 0, .parse_state = 0}}}
    };
    return (Subtree) {.ptr = data};
  }
}

void ts_subtree_set_symbol(
  MutableSubtree *self,
  TSSymbol symbol,
  const TSLanguage *language
) {
  TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol);
  if (self->data.is_inline) {
    ts_assert(symbol < UINT8_MAX);
    self->data.symbol = symbol;
    self->data.named = metadata.named;
    self->data.visible = metadata.visible;
  } else {
    self->ptr->symbol = symbol;
    self->ptr->named = metadata.named;
    self->ptr->visible = metadata.visible;
  }
}

Subtree ts_subtree_new_error(
  SubtreePool *pool, int32_t lookahead_char, Length padding, Length size,
  uint32_t bytes_scanned, TSStateId parse_state, const TSLanguage *language
) {
  Subtree result = ts_subtree_new_leaf(
    pool, ts_builtin_sym_error, padding, size, bytes_scanned,
    parse_state, false, false, false, language
  );
  SubtreeHeapData *data = (SubtreeHeapData *)result.ptr;
  data->fragile_left = true;
  data->fragile_right = true;
  data->lookahead_char = lookahead_char;
  return result;
}

// Clone a subtree.
MutableSubtree ts_subtree_clone(Subtree self) {
  size_t alloc_size = ts_subtree_alloc_size(self.ptr->child_count);
  Subtree *new_children = ts_malloc(alloc_size);
  Subtree *old_children = ts_subtree_children(self);
  memcpy(new_children, old_children, alloc_size);
  SubtreeHeapData *result = (SubtreeHeapData *)&new_children[self.ptr->child_count];
  if (self.ptr->child_count > 0) {
    for (uint32_t i = 0; i < self.ptr->child_count; i++) {
      ts_subtree_retain(new_children[i]);
    }
  } else if (self.ptr->has_external_tokens) {
    result->external_scanner_state = ts_external_scanner_state_copy(
      &self.ptr->external_scanner_state
    );
  }
  result->ref_count = 1;
  return (MutableSubtree) {.ptr = result};
}

// Get mutable version of a subtree.
//
// This takes ownership of the subtree. If the subtree has only one owner,
// this will directly convert it into a mutable version. Otherwise, it will
// perform a copy.
MutableSubtree ts_subtree_make_mut(SubtreePool *pool, Subtree self) {
  if (self.data.is_inline) return (MutableSubtree) {self.data};
  if (self.ptr->ref_count == 1) return ts_subtree_to_mut_unsafe(self);
  MutableSubtree result = ts_subtree_clone(self);
  ts_subtree_release(pool, self);
  return result;
}

void ts_subtree_compress(
  MutableSubtree self,
  unsigned count,
  const TSLanguage *language,
  MutableSubtreeArray *stack
) {
  unsigned initial_stack_size = stack->size;

  MutableSubtree tree = self;
  TSSymbol symbol = tree.ptr->symbol;
  for (unsigned i = 0; i < count; i++) {
    if (tree.ptr->ref_count > 1 || tree.ptr->child_count < 2) break;

    MutableSubtree child = ts_subtree_to_mut_unsafe(ts_subtree_children(tree)[0]);
    if (
      child.data.is_inline ||
      child.ptr->child_count < 2 ||
      child.ptr->ref_count > 1 ||
      child.ptr->symbol != symbol
    ) break;

    MutableSubtree grandchild = ts_subtree_to_mut_unsafe(ts_subtree_children(child)[0]);
    if (
      grandchild.data.is_inline ||
      grandchild.ptr->child_count < 2 ||
      grandchild.ptr->ref_count > 1 ||
      grandchild.ptr->symbol != symbol
    ) break;

    ts_subtree_children(tree)[0] = ts_subtree_from_mut(grandchild);
    ts_subtree_children(child)[0] = ts_subtree_children(grandchild)[grandchild.ptr->child_count - 1];
    ts_subtree_children(grandchild)[grandchild.ptr->child_count - 1] = ts_subtree_from_mut(child);
    array_push(stack, tree);
    tree = grandchild;
  }

  while (stack->size > initial_stack_size) {
    tree = array_pop(stack);
    MutableSubtree child = ts_subtree_to_mut_unsafe(ts_subtree_children(tree)[0]);
    MutableSubtree grandchild = ts_subtree_to_mut_unsafe(ts_subtree_children(child)[child.ptr->child_count - 1]);
    ts_subtree_summarize_children(grandchild, language);
    ts_subtree_summarize_children(child, language);
    ts_subtree_summarize_children(tree, language);
  }
}

// Assign all of the node's properties that depend on its children.
void ts_subtree_summarize_children(
  MutableSubtree self,
  const TSLanguage *language
) {
  ts_assert(!self.data.is_inline);

  self.ptr->named_child_count = 0;
  self.ptr->visible_child_count = 0;
  self.ptr->error_cost = 0;
  self.ptr->repeat_depth = 0;
  self.ptr->visible_descendant_count = 0;
  self.ptr->has_external_tokens = false;
  self.ptr->depends_on_column = false;
  self.ptr->has_external_scanner_state_change = false;
  self.ptr->dynamic_precedence = 0;

  uint32_t structural_index = 0;
  const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->production_id);
  uint32_t lookahead_end_byte = 0;

  const Subtree *children = ts_subtree_children(self);
  for (uint32_t i = 0; i < self.ptr->child_count; i++) {
    Subtree child = children[i];

    if (
      self.ptr->size.extent.row == 0 &&
      ts_subtree_depends_on_column(child)
    ) {
      self.ptr->depends_on_column = true;
    }

    if (ts_subtree_has_external_scanner_state_change(child)) {
      self.ptr->has_external_scanner_state_change = true;
    }

    if (i == 0) {
      self.ptr->padding = ts_subtree_padding(child);
      self.ptr->size = ts_subtree_size(child);
    } else {
      self.ptr->size = length_add(self.ptr->size, ts_subtree_total_size(child));
    }

    uint32_t child_lookahead_end_byte =
      self.ptr->padding.bytes +
      self.ptr->size.bytes +
      ts_subtree_lookahead_bytes(child);
    if (child_lookahead_end_byte > lookahead_end_byte) {
      lookahead_end_byte = child_lookahead_end_byte;
    }

    if (ts_subtree_symbol(child) != ts_builtin_sym_error_repeat) {
      self.ptr->error_cost += ts_subtree_error_cost(child);
    }

    uint32_t grandchild_count = ts_subtree_child_count(child);
    if (
      self.ptr->symbol == ts_builtin_sym_error ||
      self.ptr->symbol == ts_builtin_sym_error_repeat
    ) {
      if (!ts_subtree_extra(child) && !(ts_subtree_is_error(child) && grandchild_count == 0)) {
        if (ts_subtree_visible(child)) {
          self.ptr->error_cost += ERROR_COST_PER_SKIPPED_TREE;
        } else if (grandchild_count > 0) {
          self.ptr->error_cost += ERROR_COST_PER_SKIPPED_TREE * child.ptr->visible_child_count;
        }
      }
    }

    self.ptr->dynamic_precedence += ts_subtree_dynamic_precedence(child);
    self.ptr->visible_descendant_count += ts_subtree_visible_descendant_count(child);

    if (
      !ts_subtree_extra(child) &&
      ts_subtree_symbol(child) != 0 &&
      alias_sequence &&
      alias_sequence[structural_index] != 0
    ) {
      self.ptr->visible_descendant_count++;
      self.ptr->visible_child_count++;
      if (ts_language_symbol_metadata(language, alias_sequence[structural_index]).named) {
        self.ptr->named_child_count++;
      }
    } else if (ts_subtree_visible(child)) {
      self.ptr->visible_descendant_count++;
      self.ptr->visible_child_count++;
      if (ts_subtree_named(child)) self.ptr->named_child_count++;
    } else if (grandchild_count > 0) {
      self.ptr->visible_child_count += child.ptr->visible_child_count;
      self.ptr->named_child_count += child.ptr->named_child_count;
    }

    if (ts_subtree_has_external_tokens(child)) self.ptr->has_external_tokens = true;

    if (ts_subtree_is_error(child)) {
      self.ptr->fragile_left = self.ptr->fragile_right = true;
      self.ptr->parse_state = TS_TREE_STATE_NONE;
    }

    if (!ts_subtree_extra(child)) structural_index++;
  }

  self.ptr->lookahead_bytes = lookahead_end_byte - self.ptr->size.bytes - self.ptr->padding.bytes;

  if (
    self.ptr->symbol == ts_builtin_sym_error ||
    self.ptr->symbol == ts_builtin_sym_error_repeat
  ) {
    self.ptr->error_cost +=
      ERROR_COST_PER_RECOVERY +
      ERROR_COST_PER_SKIPPED_CHAR * self.ptr->size.bytes +
      ERROR_COST_PER_SKIPPED_LINE * self.ptr->size.extent.row;
  }

  if (self.ptr->child_count > 0) {
    Subtree first_child = children[0];
    Subtree last_child = children[self.ptr->child_count - 1];

    self.ptr->first_leaf.symbol = ts_subtree_leaf_symbol(first_child);
    self.ptr->first_leaf.parse_state = ts_subtree_leaf_parse_state(first_child);

    if (ts_subtree_fragile_left(first_child)) self.ptr->fragile_left = true;
    if (ts_subtree_fragile_right(last_child)) self.ptr->fragile_right = true;

    if (
      self.ptr->child_count >= 2 &&
      !self.ptr->visible &&
      !self.ptr->named &&
      ts_subtree_symbol(first_child) == self.ptr->symbol
    ) {
      if (ts_subtree_repeat_depth(first_child) > ts_subtree_repeat_depth(last_child)) {
        self.ptr->repeat_depth = ts_subtree_repeat_depth(first_child) + 1;
      } else {
        self.ptr->repeat_depth = ts_subtree_repeat_depth(last_child) + 1;
      }
    }
  }
}

// Create a new parent node with the given children.
//
// This takes ownership of the children array.
MutableSubtree ts_subtree_new_node(
  TSSymbol symbol,
  SubtreeArray *children,
  unsigned production_id,
  const TSLanguage *language
) {
  TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol);
  bool fragile = symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat;

  // Allocate the node's data at the end of the array of children.
  size_t new_byte_size = ts_subtree_alloc_size(children->size);
  if (children->capacity * sizeof(Subtree) < new_byte_size) {
    children->contents = ts_realloc(children->contents, new_byte_size);
    children->capacity = (uint32_t)(new_byte_size / sizeof(Subtree));
  }
  SubtreeHeapData *data = (SubtreeHeapData *)&children->contents[children->size];

  *data = (SubtreeHeapData) {
    .ref_count = 1,
    .symbol = symbol,
    .child_count = children->size,
    .visible = metadata.visible,
    .named = metadata.named,
    .has_changes = false,
    .has_external_scanner_state_change = false,
    .fragile_left = fragile,
    .fragile_right = fragile,
    .is_keyword = false,
    {{
      .visible_descendant_count = 0,
      .production_id = production_id,
      .first_leaf = {.symbol = 0, .parse_state = 0},
    }}
  };
  MutableSubtree result = {.ptr = data};
  ts_subtree_summarize_children(result, language);
  return result;
}

// Create a new error node containing the given children.
//
// This node is treated as 'extra'. Its children are prevented from having
// having any effect on the parse state.
Subtree ts_subtree_new_error_node(
  SubtreeArray *children,
  bool extra,
  const TSLanguage *language
) {
  MutableSubtree result = ts_subtree_new_node(
    ts_builtin_sym_error, children, 0, language
  );
  result.ptr->extra = extra;
  return ts_subtree_from_mut(result);
}

// Create a new 'missing leaf' node.
//
// This node is treated as 'extra'. Its children are prevented from having
// having any effect on the parse state.
Subtree ts_subtree_new_missing_leaf(
  SubtreePool *pool,
  TSSymbol symbol,
  Length padding,
  uint32_t lookahead_bytes,
  const TSLanguage *language
) {
  Subtree result = ts_subtree_new_leaf(
    pool, symbol, padding, length_zero(), lookahead_bytes,
    0, false, false, false, language
  );
  if (result.data.is_inline) {
    result.data.is_missing = true;
  } else {
    ((SubtreeHeapData *)result.ptr)->is_missing = true;
  }
  return result;
}

void ts_subtree_retain(Subtree self) {
  if (self.data.is_inline) return;
  ts_assert(self.ptr->ref_count > 0);
  atomic_inc((volatile uint32_t *)&self.ptr->ref_count);
  ts_assert(self.ptr->ref_count != 0);
}

void ts_subtree_release(SubtreePool *pool, Subtree self) {
  if (self.data.is_inline) return;
  array_clear(&pool->tree_stack);

  ts_assert(self.ptr->ref_count > 0);
  if (atomic_dec((volatile uint32_t *)&self.ptr->ref_count) == 0) {
    array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(self));
  }

  while (pool->tree_stack.size > 0) {
    MutableSubtree tree = array_pop(&pool->tree_stack);
    if (tree.ptr->child_count > 0) {
      Subtree *children = ts_subtree_children(tree);
      for (uint32_t i = 0; i < tree.ptr->child_count; i++) {
        Subtree child = children[i];
        if (child.data.is_inline) continue;
        ts_assert(child.ptr->ref_count > 0);
        if (atomic_dec((volatile uint32_t *)&child.ptr->ref_count) == 0) {
          array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(child));
        }
      }
      ts_free(children);
    } else {
      if (tree.ptr->has_external_tokens) {
        ts_external_scanner_state_delete(&tree.ptr->external_scanner_state);
      }
      ts_subtree_pool_free(pool, tree.ptr);
    }
  }
}

int ts_subtree_compare(Subtree left, Subtree right, SubtreePool *pool) {
  array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(left));
  array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(right));

  while (pool->tree_stack.size > 0) {
    right = ts_subtree_from_mut(array_pop(&pool->tree_stack));
    left = ts_subtree_from_mut(array_pop(&pool->tree_stack));

    int result = 0;
    if (ts_subtree_symbol(left) < ts_subtree_symbol(right)) result = -1;
    else if (ts_subtree_symbol(right) < ts_subtree_symbol(left)) result = 1;
    else if (ts_subtree_child_count(left) < ts_subtree_child_count(right)) result = -1;
    else if (ts_subtree_child_count(right) < ts_subtree_child_count(left)) result = 1;
    if (result != 0) {
      array_clear(&pool->tree_stack);
      return result;
    }

    for (uint32_t i = ts_subtree_child_count(left); i > 0; i--) {
      Subtree left_child = ts_subtree_children(left)[i - 1];
      Subtree right_child = ts_subtree_children(right)[i - 1];
      array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(left_child));
      array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(right_child));
    }
  }

  return 0;
}

static inline void ts_subtree_set_has_changes(MutableSubtree *self) {
  if (self->data.is_inline) {
    self->data.has_changes = true;
  } else {
    self->ptr->has_changes = true;
  }
}

Subtree ts_subtree_edit(Subtree self, const TSInputEdit *input_edit, SubtreePool *pool) {
  typedef struct {
    Subtree *tree;
    Edit edit;
  } EditEntry;

  Array(EditEntry) stack = array_new();
  array_push(&stack, ((EditEntry) {
    .tree = &self,
    .edit = (Edit) {
      .start = {input_edit->start_byte, input_edit->start_point},
      .old_end = {input_edit->old_end_byte, input_edit->old_end_point},
      .new_end = {input_edit->new_end_byte, input_edit->new_end_point},
    },
  }));

  while (stack.size) {
    EditEntry entry = array_pop(&stack);
    Edit edit = entry.edit;
    bool is_noop = edit.old_end.bytes == edit.start.bytes && edit.new_end.bytes == edit.start.bytes;
    bool is_pure_insertion = edit.old_end.bytes == edit.start.bytes;
    bool parent_depends_on_column = ts_subtree_depends_on_column(*entry.tree);
    bool column_shifted = edit.new_end.extent.column != edit.old_end.extent.column;

    Length size = ts_subtree_size(*entry.tree);
    Length padding = ts_subtree_padding(*entry.tree);
    Length total_size = length_add(padding, size);
    uint32_t lookahead_bytes = ts_subtree_lookahead_bytes(*entry.tree);
    uint32_t end_byte = total_size.bytes + lookahead_bytes;
    if (edit.start.bytes > end_byte || (is_noop && edit.start.bytes == end_byte)) continue;

    // If the edit is entirely within the space before this subtree, then shift this
    // subtree over according to the edit without changing its size.
    if (edit.old_end.bytes <= padding.bytes) {
      padding = length_add(edit.new_end, length_sub(padding, edit.old_end));
    }

    // If the edit starts in the space before this subtree and extends into this subtree,
    // shrink the subtree's content to compensate for the change in the space before it.
    else if (edit.start.bytes < padding.bytes) {
      size = length_saturating_sub(size, length_sub(edit.old_end, padding));
      padding = edit.new_end;
    }

    // If the edit is within this subtree, resize the subtree to reflect the edit.
    else if (
      edit.start.bytes < total_size.bytes ||
      (edit.start.bytes == total_size.bytes && is_pure_insertion)
    ) {
      size = length_add(
        length_sub(edit.new_end, padding),
        length_saturating_sub(total_size, edit.old_end)
      );
    }

    MutableSubtree result = ts_subtree_make_mut(pool, *entry.tree);

    if (result.data.is_inline) {
      if (ts_subtree_can_inline(padding, size, lookahead_bytes)) {
        result.data.padding_bytes = padding.bytes;
        result.data.padding_rows = padding.extent.row;
        result.data.padding_columns = padding.extent.column;
        result.data.size_bytes = size.bytes;
      } else {
        SubtreeHeapData *data = ts_subtree_pool_allocate(pool);
        data->ref_count = 1;
        data->padding = padding;
        data->size = size;
        data->lookahead_bytes = lookahead_bytes;
        data->error_cost = 0;
        data->child_count = 0;
        data->symbol = result.data.symbol;
        data->parse_state = result.data.parse_state;
        data->visible = result.data.visible;
        data->named = result.data.named;
        data->extra = result.data.extra;
        data->fragile_left = false;
        data->fragile_right = false;
        data->has_changes = false;
        data->has_external_tokens = false;
        data->depends_on_column = false;
        data->is_missing = result.data.is_missing;
        data->is_keyword = result.data.is_keyword;
        result.ptr = data;
      }
    } else {
      result.ptr->padding = padding;
      result.ptr->size = size;
    }

    ts_subtree_set_has_changes(&result);
    *entry.tree = ts_subtree_from_mut(result);

    Length child_left, child_right = length_zero();
    for (uint32_t i = 0, n = ts_subtree_child_count(*entry.tree); i < n; i++) {
      Subtree *child = &ts_subtree_children(*entry.tree)[i];
      Length child_size = ts_subtree_total_size(*child);
      child_left = child_right;
      child_right = length_add(child_left, child_size);

      // If this child ends before the edit, it is not affected.
      if (child_right.bytes + ts_subtree_lookahead_bytes(*child) < edit.start.bytes) continue;

      // Keep editing child nodes until a node is reached that starts after the edit.
      // Also, if this node's validity depends on its column position, then continue
      // invalidating child nodes until reaching a line break.
      if ((
        (child_left.bytes > edit.old_end.bytes) ||
        (child_left.bytes == edit.old_end.bytes && child_size.bytes > 0 && i > 0)
      ) && (
        !parent_depends_on_column ||
        child_left.extent.row > padding.extent.row
      ) && (
        !ts_subtree_depends_on_column(*child) ||
        !column_shifted ||
        child_left.extent.row > edit.old_end.extent.row
      )) {
        break;
      }

      // Transform edit into the child's coordinate space.
      Edit child_edit = {
        .start = length_saturating_sub(edit.start, child_left),
        .old_end = length_saturating_sub(edit.old_end, child_left),
        .new_end = length_saturating_sub(edit.new_end, child_left),
      };

      // Interpret all inserted text as applying to the *first* child that touches the edit.
      // Subsequent children are only never have any text inserted into them; they are only
      // shrunk to compensate for the edit.
      if (
        child_right.bytes > edit.start.bytes ||
        (child_right.bytes == edit.start.bytes && is_pure_insertion)
      ) {
        edit.new_end = edit.start;
      }

      // Children that occur before the edit are not reshaped by the edit.
      else {
        child_edit.old_end = child_edit.start;
        child_edit.new_end = child_edit.start;
      }

      // Queue processing of this child's subtree.
      array_push(&stack, ((EditEntry) {
        .tree = child,
        .edit = child_edit,
      }));
    }
  }

  array_delete(&stack);
  return self;
}

Subtree ts_subtree_last_external_token(Subtree tree) {
  if (!ts_subtree_has_external_tokens(tree)) return NULL_SUBTREE;
  while (tree.ptr->child_count > 0) {
    for (uint32_t i = tree.ptr->child_count - 1; i + 1 > 0; i--) {
      Subtree child = ts_subtree_children(tree)[i];
      if (ts_subtree_has_external_tokens(child)) {
        tree = child;
        break;
      }
    }
  }
  return tree;
}

static size_t ts_subtree__write_char_to_string(char *str, size_t n, int32_t chr) {
  if (chr == -1)
    return snprintf(str, n, "INVALID");
  else if (chr == '\0')
    return snprintf(str, n, "'\\0'");
  else if (chr == '\n')
    return snprintf(str, n, "'\\n'");
  else if (chr == '\t')
    return snprintf(str, n, "'\\t'");
  else if (chr == '\r')
    return snprintf(str, n, "'\\r'");
  else if (0 < chr && chr < 128 && isprint(chr))
    return snprintf(str, n, "'%c'", chr);
  else
    return snprintf(str, n, "%d", chr);
}

static const char *const ROOT_FIELD = "__ROOT__";

static size_t ts_subtree__write_to_string(
  Subtree self, char *string, size_t limit,
  const TSLanguage *language, bool include_all,
  TSSymbol alias_symbol, bool alias_is_named, const char *field_name
) {
  if (!self.ptr) return snprintf(string, limit, "(NULL)");

  char *cursor = string;
  char **writer = (limit > 1) ? &cursor : &string;
  bool is_root = field_name == ROOT_FIELD;
  bool is_visible =
    include_all ||
    ts_subtree_missing(self) ||
    (
      alias_symbol
        ? alias_is_named
        : ts_subtree_visible(self) && ts_subtree_named(self)
    );

  if (is_visible) {
    if (!is_root) {
      cursor += snprintf(*writer, limit, " ");
      if (field_name) {
        cursor += snprintf(*writer, limit, "%s: ", field_name);
      }
    }

    if (ts_subtree_is_error(self) && ts_subtree_child_count(self) == 0 && self.ptr->size.bytes > 0) {
      cursor += snprintf(*writer, limit, "(UNEXPECTED ");
      cursor += ts_subtree__write_char_to_string(*writer, limit, self.ptr->lookahead_char);
    } else {
      TSSymbol symbol = alias_symbol ? alias_symbol : ts_subtree_symbol(self);
      const char *symbol_name = ts_language_symbol_name(language, symbol);
      if (ts_subtree_missing(self)) {
        cursor += snprintf(*writer, limit, "(MISSING ");
        if (alias_is_named || ts_subtree_named(self)) {
          cursor += snprintf(*writer, limit, "%s", symbol_name);
        } else {
          cursor += snprintf(*writer, limit, "\"%s\"", symbol_name);
        }
      } else {
        cursor += snprintf(*writer, limit, "(%s", symbol_name);
      }
    }
  } else if (is_root) {
    TSSymbol symbol = alias_symbol ? alias_symbol : ts_subtree_symbol(self);
    const char *symbol_name = ts_language_symbol_name(language, symbol);
    if (ts_subtree_child_count(self) > 0) {
      cursor += snprintf(*writer, limit, "(%s", symbol_name);
    } else if (ts_subtree_named(self)) {
      cursor += snprintf(*writer, limit, "(%s)", symbol_name);
    } else {
      cursor += snprintf(*writer, limit, "(\"%s\")", symbol_name);
    }
  }

  if (ts_subtree_child_count(self)) {
    const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->production_id);
    const TSFieldMapEntry *field_map, *field_map_end;
    ts_language_field_map(
      language,
      self.ptr->production_id,
      &field_map,
      &field_map_end
    );

    uint32_t structural_child_index = 0;
    for (uint32_t i = 0; i < self.ptr->child_count; i++) {
      Subtree child = ts_subtree_children(self)[i];
      if (ts_subtree_extra(child)) {
        cursor += ts_subtree__write_to_string(
          child, *writer, limit,
          language, include_all,
          0, false, NULL
        );
      } else {
        TSSymbol subtree_alias_symbol = alias_sequence
          ? alias_sequence[structural_child_index]
          : 0;
        bool subtree_alias_is_named = subtree_alias_symbol
          ? ts_language_symbol_metadata(language, subtree_alias_symbol).named
          : false;

        const char *child_field_name = is_visible ? NULL : field_name;
        for (const TSFieldMapEntry *map = field_map; map < field_map_end; map++) {
          if (!map->inherited && map->child_index == structural_child_index) {
            child_field_name = language->field_names[map->field_id];
            break;
          }
        }

        cursor += ts_subtree__write_to_string(
          child, *writer, limit,
          language, include_all,
          subtree_alias_symbol, subtree_alias_is_named, child_field_name
        );
        structural_child_index++;
      }
    }
  }

  if (is_visible) cursor += snprintf(*writer, limit, ")");

  return cursor - string;
}

char *ts_subtree_string(
  Subtree self,
  TSSymbol alias_symbol,
  bool alias_is_named,
  const TSLanguage *language,
  bool include_all
) {
  char scratch_string[1];
  size_t size = ts_subtree__write_to_string(
    self, scratch_string, 1,
    language, include_all,
    alias_symbol, alias_is_named, ROOT_FIELD
  ) + 1;
  char *result = ts_malloc(size * sizeof(char));
  ts_subtree__write_to_string(
    self, result, size,
    language, include_all,
    alias_symbol, alias_is_named, ROOT_FIELD
  );
  return result;
}

void ts_subtree__print_dot_graph(const Subtree *self, uint32_t start_offset,
                                 const TSLanguage *language, TSSymbol alias_symbol,
                                 FILE *f) {
  TSSymbol subtree_symbol = ts_subtree_symbol(*self);
  TSSymbol symbol = alias_symbol ? alias_symbol : subtree_symbol;
  uint32_t end_offset = start_offset + ts_subtree_total_bytes(*self);
  fprintf(f, "tree_%p [label=\"", (void *)self);
  ts_language_write_symbol_as_dot_string(language, f, symbol);
  fprintf(f, "\"");

  if (ts_subtree_child_count(*self) == 0) fprintf(f, ", shape=plaintext");
  if (ts_subtree_extra(*self)) fprintf(f, ", fontcolor=gray");
  if (ts_subtree_has_changes(*self)) fprintf(f, ", color=green, penwidth=2");

  fprintf(f, ", tooltip=\""
    "range: %u - %u\n"
    "state: %d\n"
    "error-cost: %u\n"
    "has-changes: %u\n"
    "depends-on-column: %u\n"
    "descendant-count: %u\n"
    "repeat-depth: %u\n"
    "lookahead-bytes: %u",
    start_offset, end_offset,
    ts_subtree_parse_state(*self),
    ts_subtree_error_cost(*self),
    ts_subtree_has_changes(*self),
    ts_subtree_depends_on_column(*self),
    ts_subtree_visible_descendant_count(*self),
    ts_subtree_repeat_depth(*self),
    ts_subtree_lookahead_bytes(*self)
  );

  if (ts_subtree_is_error(*self) && ts_subtree_child_count(*self) == 0 && self->ptr->lookahead_char != 0) {
    fprintf(f, "\ncharacter: '%c'", self->ptr->lookahead_char);
  }

  fprintf(f, "\"]\n");

  uint32_t child_start_offset = start_offset;
  uint32_t child_info_offset =
    language->max_alias_sequence_length *
    ts_subtree_production_id(*self);
  for (uint32_t i = 0, n = ts_subtree_child_count(*self); i < n; i++) {
    const Subtree *child = &ts_subtree_children(*self)[i];
    TSSymbol subtree_alias_symbol = 0;
    if (!ts_subtree_extra(*child) && child_info_offset) {
      subtree_alias_symbol = language->alias_sequences[child_info_offset];
      child_info_offset++;
    }
    ts_subtree__print_dot_graph(child, child_start_offset, language, subtree_alias_symbol, f);
    fprintf(f, "tree_%p -> tree_%p [tooltip=%u]\n", (void *)self, (void *)child, i);
    child_start_offset += ts_subtree_total_bytes(*child);
  }
}

void ts_subtree_print_dot_graph(Subtree self, const TSLanguage *language, FILE *f) {
  fprintf(f, "digraph tree {\n");
  fprintf(f, "edge [arrowhead=none]\n");
  ts_subtree__print_dot_graph(&self, 0, language, 0, f);
  fprintf(f, "}\n");
}

const ExternalScannerState *ts_subtree_external_scanner_state(Subtree self) {
  static const ExternalScannerState empty_state = {{.short_data = {0}}, .length = 0};
  if (
    self.ptr &&
    !self.data.is_inline &&
    self.ptr->has_external_tokens &&
    self.ptr->child_count == 0
  ) {
    return &self.ptr->external_scanner_state;
  } else {
    return &empty_state;
  }
}

bool ts_subtree_external_scanner_state_eq(Subtree self, Subtree other) {
  const ExternalScannerState *state_self = ts_subtree_external_scanner_state(self);
  const ExternalScannerState *state_other = ts_subtree_external_scanner_state(other);
  return ts_external_scanner_state_eq(
    state_self,
    ts_external_scanner_state_data(state_other),
    state_other->length
  );
}



================================================
FILE: lib/src/subtree.h
================================================
#ifndef TREE_SITTER_SUBTREE_H_
#define TREE_SITTER_SUBTREE_H_

#ifdef __cplusplus
extern "C" {
#endif

#include <limits.h>
#include <stdbool.h>
#include <stdio.h>
#include "./length.h"
#include "./array.h"
#include "./error_costs.h"
#include "./host.h"
#include "tree_sitter/api.h"
#include "./parser.h"

#define TS_TREE_STATE_NONE USHRT_MAX
#define NULL_SUBTREE ((Subtree) {.ptr = NULL})

// The serialized state of an external scanner.
//
// Every time an external token subtree is created after a call to an
// external scanner, the scanner's `serialize` function is called to
// retrieve a serialized copy of its state. The bytes are then copied
// onto the subtree itself so that the scanner's state can later be
// restored using its `deserialize` function.
//
// Small byte arrays are stored inline, and long ones are allocated
// separately on the heap.
typedef struct {
  union {
    char *long_data;
    char short_data[24];
  };
  uint32_t length;
} ExternalScannerState;

// A compact representation of a subtree.
//
// This representation is used for small leaf nodes that are not
// errors, and were not created by an external scanner.
//
// The idea behind the layout of this struct is that the `is_inline`
// bit will fall exactly into the same location as the least significant
// bit of the pointer in `Subtree` or `MutableSubtree`, respectively.
// Because of alignment, for any valid pointer this will be 0, giving
// us the opportunity to make use of this bit to signify whether to use
// the pointer or the inline struct.
typedef struct SubtreeInlineData SubtreeInlineData;

#define SUBTREE_BITS    \
  bool visible : 1;     \
  bool named : 1;       \
  bool extra : 1;       \
  bool has_changes : 1; \
  bool is_missing : 1;  \
  bool is_keyword : 1;

#define SUBTREE_SIZE           \
  uint8_t padding_columns;     \
  uint8_t padding_rows : 4;    \
  uint8_t lookahead_bytes : 4; \
  uint8_t padding_bytes;       \
  uint8_t size_bytes;

#if TS_BIG_ENDIAN
#if TS_PTR_SIZE == 32

struct SubtreeInlineData {
  uint16_t parse_state;
  uint8_t symbol;
  SUBTREE_BITS
  bool unused : 1;
  bool is_inline : 1;
  SUBTREE_SIZE
};

#else

struct SubtreeInlineData {
  SUBTREE_SIZE
  uint16_t parse_state;
  uint8_t symbol;
  SUBTREE_BITS
  bool unused : 1;
  bool is_inline : 1;
};

#endif
#else

struct SubtreeInlineData {
  bool is_inline : 1;
  SUBTREE_BITS
  uint8_t symbol;
  uint16_t parse_state;
  SUBTREE_SIZE
};

#endif

#undef SUBTREE_BITS
#undef SUBTREE_SIZE

// A heap-allocated representation of a subtree.
//
// This representation is used for parent nodes, external tokens,
// errors, and other leaf nodes whose data is too large to fit into
// the inline representation.
typedef struct {
  volatile uint32_t ref_count;
  Length padding;
  Length size;
  uint32_t lookahead_bytes;
  uint32_t error_cost;
  uint32_t child_count;
  TSSymbol symbol;
  TSStateId parse_state;

  bool visible : 1;
  bool named : 1;
  bool extra : 1;
  bool fragile_left : 1;
  bool fragile_right : 1;
  bool has_changes : 1;
  bool has_external_tokens : 1;
  bool has_external_scanner_state_change : 1;
  bool depends_on_column: 1;
  bool is_missing : 1;
  bool is_keyword : 1;

  union {
    // Non-terminal subtrees (`child_count > 0`)
    struct {
      uint32_t visible_child_count;
      uint32_t named_child_count;
      uint32_t visible_descendant_count;
      int32_t dynamic_precedence;
      uint16_t repeat_depth;
      uint16_t production_id;
      struct {
        TSSymbol symbol;
        TSStateId parse_state;
      } first_leaf;
    };

    // External terminal subtrees (`child_count == 0 && has_external_tokens`)
    ExternalScannerState external_scanner_state;

    // Error terminal subtrees (`child_count == 0 && symbol == ts_builtin_sym_error`)
    int32_t lookahead_char;
  };
} SubtreeHeapData;

// The fundamental building block of a syntax tree.
typedef union {
  SubtreeInlineData data;
  const SubtreeHeapData *ptr;
} Subtree;

// Like Subtree, but mutable.
typedef union {
  SubtreeInlineData data;
  SubtreeHeapData *ptr;
} MutableSubtree;

typedef Array(Subtree) SubtreeArray;
typedef Array(MutableSubtree) MutableSubtreeArray;

typedef struct {
  MutableSubtreeArray free_trees;
  MutableSubtreeArray tree_stack;
} SubtreePool;

void ts_external_scanner_state_init(ExternalScannerState *self, const char *data, unsigned length);
const char *ts_external_scanner_state_data(const ExternalScannerState *self);
bool ts_external_scanner_state_eq(const ExternalScannerState *self, const char *buffer, unsigned length);
void ts_external_scanner_state_delete(ExternalScannerState *self);

void ts_subtree_array_copy(SubtreeArray self, SubtreeArray *dest);
void ts_subtree_array_clear(SubtreePool *pool, SubtreeArray *self);
void ts_subtree_array_delete(SubtreePool *pool, SubtreeArray *self);
void ts_subtree_array_remove_trailing_extras(SubtreeArray *self, SubtreeArray *destination);
void ts_subtree_array_reverse(SubtreeArray *self);

SubtreePool ts_subtree_pool_new(uint32_t capacity);
void ts_subtree_pool_delete(SubtreePool *self);

Subtree ts_subtree_new_leaf(
  SubtreePool *pool, TSSymbol symbol, Length padding, Length size,
  uint32_t lookahead_bytes, TSStateId parse_state,
  bool has_external_tokens, bool depends_on_column,
  bool is_keyword, const TSLanguage *language
);
Subtree ts_subtree_new_error(
  SubtreePool *pool, int32_t lookahead_char, Length padding, Length size,
  uint32_t bytes_scanned, TSStateId parse_state, const TSLanguage *language
);
MutableSubtree ts_subtree_new_node(
  TSSymbol symbol,
  SubtreeArray *chiildren,
  unsigned production_id,
  const TSLanguage *language
);
Subtree ts_subtree_new_error_node(
  SubtreeArray *children,
  bool extra,
  const TSLanguage * language
);
Subtree ts_subtree_new_missing_leaf(
  SubtreePool *pool,
  TSSymbol symbol,
  Length padding,
  uint32_t lookahead_bytes,
  const TSLanguage *language
);
MutableSubtree ts_subtree_make_mut(SubtreePool *pool, Subtree self);
void ts_subtree_retain(Subtree self);
void ts_subtree_release(SubtreePool *pool, Subtree self);
int ts_subtree_compare(Subtree left, Subtree right, SubtreePool *pool);
void ts_subtree_set_symbol(MutableSubtree *self, TSSymbol symbol, const TSLanguage *language);
void ts_subtree_compress(MutableSubtree self, unsigned count, const TSLanguage *language, MutableSubtreeArray *stack);
void ts_subtree_summarize_children(MutableSubtree self, const TSLanguage *language);
Subtree ts_subtree_edit(Subtree self, const TSInputEdit *edit, SubtreePool *pool);
char *ts_subtree_string(Subtree self, TSSymbol alias_symbol, bool alias_is_named, const TSLanguage *language, bool include_all);
void ts_subtree_print_dot_graph(Subtree self, const TSLanguage *language, FILE *f);
Subtree ts_subtree_last_external_token(Subtree tree);
const ExternalScannerState *ts_subtree_external_scanner_state(Subtree self);
bool ts_subtree_external_scanner_state_eq(Subtree self, Subtree other);

#define SUBTREE_GET(self, name) ((self).data.is_inline ? (self).data.name : (self).ptr->name)

static inline TSSymbol ts_subtree_symbol(Subtree self) { return SUBTREE_GET(self, symbol); }
static inline bool ts_subtree_visible(Subtree self) { return SUBTREE_GET(self, visible); }
static inline bool ts_subtree_named(Subtree self) { return SUBTREE_GET(self, named); }
static inline bool ts_subtree_extra(Subtree self) { return SUBTREE_GET(self, extra); }
static inline bool ts_subtree_has_changes(Subtree self) { return SUBTREE_GET(self, has_changes); }
static inline bool ts_subtree_missing(Subtree self) { return SUBTREE_GET(self, is_missing); }
static inline bool ts_subtree_is_keyword(Subtree self) { return SUBTREE_GET(self, is_keyword); }
static inline TSStateId ts_subtree_parse_state(Subtree self) { return SUBTREE_GET(self, parse_state); }
static inline uint32_t ts_subtree_lookahead_bytes(Subtree self) { return SUBTREE_GET(self, lookahead_bytes); }

#undef SUBTREE_GET

// Get the size needed to store a heap-allocated subtree with the given
// number of children.
static inline size_t ts_subtree_alloc_size(uint32_t child_count) {
  return child_count * sizeof(Subtree) + sizeof(SubtreeHeapData);
}

// Get a subtree's children, which are allocated immediately before the
// tree's own heap data.
#define ts_subtree_children(self) \
  ((self).data.is_inline ? NULL : (Subtree *)((self).ptr) - (self).ptr->child_count)

static inline void ts_subtree_set_extra(MutableSubtree *self, bool is_extra) {
  if (self->data.is_inline) {
    self->data.extra = is_extra;
  } else {
    self->ptr->extra = is_extra;
  }
}

static inline TSSymbol ts_subtree_leaf_symbol(Subtree self) {
  if (self.data.is_inline) return self.data.symbol;
  if (self.ptr->child_count == 0) return self.ptr->symbol;
  return self.ptr->first_leaf.symbol;
}

static inline TSStateId ts_subtree_leaf_parse_state(Subtree self) {
  if (self.data.is_inline) return self.data.parse_state;
  if (self.ptr->child_count == 0) return self.ptr->parse_state;
  return self.ptr->first_leaf.parse_state;
}

static inline Length ts_subtree_padding(Subtree self) {
  if (self.data.is_inline) {
    Length result = {self.data.padding_bytes, {self.data.padding_rows, self.data.padding_columns}};
    return result;
  } else {
    return self.ptr->padding;
  }
}

static inline Length ts_subtree_size(Subtree self) {
  if (self.data.is_inline) {
    Length result = {self.data.size_bytes, {0, self.data.size_bytes}};
    return result;
  } else {
    return self.ptr->size;
  }
}

static inline Length ts_subtree_total_size(Subtree self) {
  return length_add(ts_subtree_padding(self), ts_subtree_size(self));
}

static inline uint32_t ts_subtree_total_bytes(Subtree self) {
  return ts_subtree_total_size(self).bytes;
}

static inline uint32_t ts_subtree_child_count(Subtree self) {
  return self.data.is_inline ? 0 : self.ptr->child_count;
}

static inline uint32_t ts_subtree_repeat_depth(Subtree self) {
  return self.data.is_inline ? 0 : self.ptr->repeat_depth;
}

static inline uint32_t ts_subtree_is_repetition(Subtree self) {
  return self.data.is_inline
    ? 0
    : !self.ptr->named && !self.ptr->visible && self.ptr->child_count != 0;
}

static inline uint32_t ts_subtree_visible_descendant_count(Subtree self) {
  return (self.data.is_inline || self.ptr->child_count == 0)
    ? 0
    : self.ptr->visible_descendant_count;
}

static inline uint32_t ts_subtree_visible_child_count(Subtree self) {
  if (ts_subtree_child_count(self) > 0) {
    return self.ptr->visible_child_count;
  } else {
    return 0;
  }
}

static inline uint32_t ts_subtree_error_cost(Subtree self) {
  if (ts_subtree_missing(self)) {
    return ERROR_COST_PER_MISSING_TREE + ERROR_COST_PER_RECOVERY;
  } else {
    return self.data.is_inline ? 0 : self.ptr->error_cost;
  }
}

static inline int32_t ts_subtree_dynamic_precedence(Subtree self) {
  return (self.data.is_inline || self.ptr->child_count == 0) ? 0 : self.ptr->dynamic_precedence;
}

static inline uint16_t ts_subtree_production_id(Subtree self) {
  if (ts_subtree_child_count(self) > 0) {
    return self.ptr->production_id;
  } else {
    return 0;
  }
}

static inline bool ts_subtree_fragile_left(Subtree self) {
  return self.data.is_inline ? false : self.ptr->fragile_left;
}

static inline bool ts_subtree_fragile_right(Subtree self) {
  return self.data.is_inline ? false : self.ptr->fragile_right;
}

static inline bool ts_subtree_has_external_tokens(Subtree self) {
  return self.data.is_inline ? false : self.ptr->has_external_tokens;
}

static inline bool ts_subtree_has_external_scanner_state_change(Subtree self) {
  return self.data.is_inline ? false : self.ptr->has_external_scanner_state_change;
}

static inline bool ts_subtree_depends_on_column(Subtree self) {
  return self.data.is_inline ? false : self.ptr->depends_on_column;
}

static inline bool ts_subtree_is_fragile(Subtree self) {
  return self.data.is_inline ? false : (self.ptr->fragile_left || self.ptr->fragile_right);
}

static inline bool ts_subtree_is_error(Subtree self) {
  return ts_subtree_symbol(self) == ts_builtin_sym_error;
}

static inline bool ts_subtree_is_eof(Subtree self) {
  return ts_subtree_symbol(self) == ts_builtin_sym_end;
}

static inline Subtree ts_subtree_from_mut(MutableSubtree self) {
  Subtree result;
  result.data = self.data;
  return result;
}

static inline MutableSubtree ts_subtree_to_mut_unsafe(Subtree self) {
  MutableSubtree result;
  result.data = self.data;
  return result;
}

#ifdef __cplusplus
}
#endif

#endif  // TREE_SITTER_SUBTREE_H_



================================================
FILE: lib/src/tree.c
================================================
#include "tree_sitter/api.h"
#include "./array.h"
#include "./get_changed_ranges.h"
#include "./length.h"
#include "./subtree.h"
#include "./tree_cursor.h"
#include "./tree.h"

TSTree *ts_tree_new(
  Subtree root, const TSLanguage *language,
  const TSRange *included_ranges, unsigned included_range_count
) {
  TSTree *result = ts_malloc(sizeof(TSTree));
  result->root = root;
  result->language = ts_language_copy(language);
  result->included_ranges = ts_calloc(included_range_count, sizeof(TSRange));
  memcpy(result->included_ranges, included_ranges, included_range_count * sizeof(TSRange));
  result->included_range_count = included_range_count;
  return result;
}

TSTree *ts_tree_copy(const TSTree *self) {
  ts_subtree_retain(self->root);
  return ts_tree_new(self->root, self->language, self->included_ranges, self->included_range_count);
}

void ts_tree_delete(TSTree *self) {
  if (!self) return;

  SubtreePool pool = ts_subtree_pool_new(0);
  ts_subtree_release(&pool, self->root);
  ts_subtree_pool_delete(&pool);
  ts_language_delete(self->language);
  ts_free(self->included_ranges);
  ts_free(self);
}

TSNode ts_tree_root_node(const TSTree *self) {
  return ts_node_new(self, &self->root, ts_subtree_padding(self->root), 0);
}

TSNode ts_tree_root_node_with_offset(
  const TSTree *self,
  uint32_t offset_bytes,
  TSPoint offset_extent
) {
  Length offset = {offset_bytes, offset_extent};
  return ts_node_new(self, &self->root, length_add(offset, ts_subtree_padding(self->root)), 0);
}

const TSLanguage *ts_tree_language(const TSTree *self) {
  return self->language;
}

void ts_tree_edit(TSTree *self, const TSInputEdit *edit) {
  for (unsigned i = 0; i < self->included_range_count; i++) {
    TSRange *range = &self->included_ranges[i];
    if (range->end_byte >= edit->old_end_byte) {
      if (range->end_byte != UINT32_MAX) {
        range->end_byte = edit->new_end_byte + (range->end_byte - edit->old_end_byte);
        range->end_point = point_add(
          edit->new_end_point,
          point_sub(range->end_point, edit->old_end_point)
        );
        if (range->end_byte < edit->new_end_byte) {
          range->end_byte = UINT32_MAX;
          range->end_point = POINT_MAX;
        }
      }
    } else if (range->end_byte > edit->start_byte) {
      range->end_byte = edit->start_byte;
      range->end_point = edit->start_point;
    }
    if (range->start_byte >= edit->old_end_byte) {
      range->start_byte = edit->new_end_byte + (range->start_byte - edit->old_end_byte);
      range->start_point = point_add(
        edit->new_end_point,
        point_sub(range->start_point, edit->old_end_point)
      );
      if (range->start_byte < edit->new_end_byte) {
        range->start_byte = UINT32_MAX;
        range->start_point = POINT_MAX;
      }
    } else if (range->start_byte > edit->start_byte) {
      range->start_byte = edit->start_byte;
      range->start_point = edit->start_point;
    }
  }

  SubtreePool pool = ts_subtree_pool_new(0);
  self->root = ts_subtree_edit(self->root, edit, &pool);
  ts_subtree_pool_delete(&pool);
}

TSRange *ts_tree_included_ranges(const TSTree *self, uint32_t *length) {
  *length = self->included_range_count;
  TSRange *ranges = ts_calloc(self->included_range_count, sizeof(TSRange));
  memcpy(ranges, self->included_ranges, self->included_range_count * sizeof(TSRange));
  return ranges;
}

TSRange *ts_tree_get_changed_ranges(const TSTree *old_tree, const TSTree *new_tree, uint32_t *length) {
  TreeCursor cursor1 = {NULL, array_new(), 0};
  TreeCursor cursor2 = {NULL, array_new(), 0};
  ts_tree_cursor_init(&cursor1, ts_tree_root_node(old_tree));
  ts_tree_cursor_init(&cursor2, ts_tree_root_node(new_tree));

  TSRangeArray included_range_differences = array_new();
  ts_range_array_get_changed_ranges(
    old_tree->included_ranges, old_tree->included_range_count,
    new_tree->included_ranges, new_tree->included_range_count,
    &included_range_differences
  );

  TSRange *result;
  *length = ts_subtree_get_changed_ranges(
    &old_tree->root, &new_tree->root, &cursor1, &cursor2,
    old_tree->language, &included_range_differences, &result
  );

  array_delete(&included_range_differences);
  array_delete(&cursor1.stack);
  array_delete(&cursor2.stack);
  return result;
}

#ifdef _WIN32

#include <io.h>
#include <windows.h>

int _ts_dup(HANDLE handle) {
  HANDLE dup_handle;
  if (!DuplicateHandle(
    GetCurrentProcess(), handle,
    GetCurrentProcess(), &dup_handle,
    0, FALSE, DUPLICATE_SAME_ACCESS
  )) return -1;

  return _open_osfhandle((intptr_t)dup_handle, 0);
}

void ts_tree_print_dot_graph(const TSTree *self, int fd) {
  FILE *file = _fdopen(_ts_dup((HANDLE)_get_osfhandle(fd)), "a");
  ts_subtree_print_dot_graph(self->root, self->language, file);
  fclose(file);
}

#elif !defined(__wasi__) // WASI doesn't support dup

#include <unistd.h>

int _ts_dup(int file_descriptor) {
  return dup(file_descriptor);
}

void ts_tree_print_dot_graph(const TSTree *self, int file_descriptor) {
  FILE *file = fdopen(_ts_dup(file_descriptor), "a");
  ts_subtree_print_dot_graph(self->root, self->language, file);
  fclose(file);
}

#else

void ts_tree_print_dot_graph(const TSTree *self, int file_descriptor) {
  (void)self;
  (void)file_descriptor;
}

#endif



================================================
FILE: lib/src/tree.h
================================================
#ifndef TREE_SITTER_TREE_H_
#define TREE_SITTER_TREE_H_

#include "./subtree.h"

#ifdef __cplusplus
extern "C" {
#endif

typedef struct {
  const Subtree *child;
  const Subtree *parent;
  Length position;
  TSSymbol alias_symbol;
} ParentCacheEntry;

struct TSTree {
  Subtree root;
  const TSLanguage *language;
  TSRange *included_ranges;
  unsigned included_range_count;
};

TSTree *ts_tree_new(Subtree root, const TSLanguage *language, const TSRange *included_ranges, unsigned included_range_count);
TSNode ts_node_new(const TSTree *tree, const Subtree *subtree, Length position, TSSymbol alias);

#ifdef __cplusplus
}
#endif

#endif  // TREE_SITTER_TREE_H_



================================================
FILE: lib/src/tree_cursor.c
================================================
#include "tree_sitter/api.h"
#include "./tree_cursor.h"
#include "./language.h"
#include "./tree.h"

typedef struct {
  Subtree parent;
  const TSTree *tree;
  Length position;
  uint32_t child_index;
  uint32_t structural_child_index;
  uint32_t descendant_index;
  const TSSymbol *alias_sequence;
} CursorChildIterator;

// CursorChildIterator

static inline bool ts_tree_cursor_is_entry_visible(const TreeCursor *self, uint32_t index) {
  TreeCursorEntry *entry = array_get(&self->stack, index);
  if (index == 0 || ts_subtree_visible(*entry->subtree)) {
    return true;
  } else if (!ts_subtree_extra(*entry->subtree)) {
    TreeCursorEntry *parent_entry = array_get(&self->stack, index - 1);
    return ts_language_alias_at(
      self->tree->language,
      parent_entry->subtree->ptr->production_id,
      entry->structural_child_index
    );
  } else {
    return false;
  }
}

static inline CursorChildIterator ts_tree_cursor_iterate_children(const TreeCursor *self) {
  TreeCursorEntry *last_entry = array_back(&self->stack);
  if (ts_subtree_child_count(*last_entry->subtree) == 0) {
    return (CursorChildIterator) {NULL_SUBTREE, self->tree, length_zero(), 0, 0, 0, NULL};
  }
  const TSSymbol *alias_sequence = ts_language_alias_sequence(
    self->tree->language,
    last_entry->subtree->ptr->production_id
  );

  uint32_t descendant_index = last_entry->descendant_index;
  if (ts_tree_cursor_is_entry_visible(self, self->stack.size - 1)) {
    descendant_index += 1;
  }

  return (CursorChildIterator) {
    .tree = self->tree,
    .parent = *last_entry->subtree,
    .position = last_entry->position,
    .child_index = 0,
    .structural_child_index = 0,
    .descendant_index = descendant_index,
    .alias_sequence = alias_sequence,
  };
}

static inline bool ts_tree_cursor_child_iterator_next(
  CursorChildIterator *self,
  TreeCursorEntry *result,
  bool *visible
) {
  if (!self->parent.ptr || self->child_index == self->parent.ptr->child_count) return false;
  const Subtree *child = &ts_subtree_children(self->parent)[self->child_index];
  *result = (TreeCursorEntry) {
    .subtree = child,
    .position = self->position,
    .child_index = self->child_index,
    .structural_child_index = self->structural_child_index,
    .descendant_index = self->descendant_index,
  };
  *visible = ts_subtree_visible(*child);
  bool extra = ts_subtree_extra(*child);
  if (!extra) {
    if (self->alias_sequence) {
      *visible |= self->alias_sequence[self->structural_child_index];
    }
    self->structural_child_index++;
  }

  self->descendant_index += ts_subtree_visible_descendant_count(*child);
  if (*visible) {
    self->descendant_index += 1;
  }

  self->position = length_add(self->position, ts_subtree_size(*child));
  self->child_index++;

  if (self->child_index < self->parent.ptr->child_count) {
    Subtree next_child = ts_subtree_children(self->parent)[self->child_index];
    self->position = length_add(self->position, ts_subtree_padding(next_child));
  }

  return true;
}

// Return a position that, when `b` is added to it, yields `a`. This
// can only be computed if `b` has zero rows. Otherwise, this function
// returns `LENGTH_UNDEFINED`, and the caller needs to recompute
// the position some other way.
static inline Length length_backtrack(Length a, Length b) {
  if (length_is_undefined(a) || b.extent.row != 0) {
    return LENGTH_UNDEFINED;
  }

  Length result;
  result.bytes = a.bytes - b.bytes;
  result.extent.row = a.extent.row;
  result.extent.column = a.extent.column - b.extent.column;
  return result;
}

static inline bool ts_tree_cursor_child_iterator_previous(
  CursorChildIterator *self,
  TreeCursorEntry *result,
  bool *visible
) {
  // this is mostly a reverse `ts_tree_cursor_child_iterator_next` taking into
  // account unsigned underflow
  if (!self->parent.ptr || (int8_t)self->child_index == -1) return false;
  const Subtree *child = &ts_subtree_children(self->parent)[self->child_index];
  *result = (TreeCursorEntry) {
    .subtree = child,
    .position = self->position,
    .child_index = self->child_index,
    .structural_child_index = self->structural_child_index,
  };
  *visible = ts_subtree_visible(*child);
  bool extra = ts_subtree_extra(*child);

  self->position = length_backtrack(self->position, ts_subtree_padding(*child));
  self->child_index--;

  if (!extra && self->alias_sequence) {
    *visible |= self->alias_sequence[self->structural_child_index];
    if (self->structural_child_index > 0) {
      self->structural_child_index--;
    }
  }

  // unsigned can underflow so compare it to child_count
  if (self->child_index < self->parent.ptr->child_count) {
    Subtree previous_child = ts_subtree_children(self->parent)[self->child_index];
    Length size = ts_subtree_size(previous_child);
    self->position = length_backtrack(self->position, size);
  }

  return true;
}

// TSTreeCursor - lifecycle

TSTreeCursor ts_tree_cursor_new(TSNode node) {
  TSTreeCursor self = {NULL, NULL, {0, 0, 0}};
  ts_tree_cursor_init((TreeCursor *)&self, node);
  return self;
}

void ts_tree_cursor_reset(TSTreeCursor *_self, TSNode node) {
  ts_tree_cursor_init((TreeCursor *)_self, node);
}

void ts_tree_cursor_init(TreeCursor *self, TSNode node) {
  self->tree = node.tree;
  self->root_alias_symbol = node.context[3];
  array_clear(&self->stack);
  array_push(&self->stack, ((TreeCursorEntry) {
    .subtree = (const Subtree *)node.id,
    .position = {
      ts_node_start_byte(node),
      ts_node_start_point(node)
    },
    .child_index = 0,
    .structural_child_index = 0,
    .descendant_index = 0,
  }));
}

void ts_tree_cursor_delete(TSTreeCursor *_self) {
  TreeCursor *self = (TreeCursor *)_self;
  array_delete(&self->stack);
}

// TSTreeCursor - walking the tree

TreeCursorStep ts_tree_cursor_goto_first_child_internal(TSTreeCursor *_self) {
  TreeCursor *self = (TreeCursor *)_self;
  bool visible;
  TreeCursorEntry entry;
  CursorChildIterator iterator = ts_tree_cursor_iterate_children(self);
  while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) {
    if (visible) {
      array_push(&self->stack, entry);
      return TreeCursorStepVisible;
    }
    if (ts_subtree_visible_child_count(*entry.subtree) > 0) {
      array_push(&self->stack, entry);
      return TreeCursorStepHidden;
    }
  }
  return TreeCursorStepNone;
}

bool ts_tree_cursor_goto_first_child(TSTreeCursor *self) {
  for (;;) {
    switch (ts_tree_cursor_goto_first_child_internal(self)) {
      case TreeCursorStepHidden:
        continue;
      case TreeCursorStepVisible:
        return true;
      default:
        return false;
    }
  }
}

TreeCursorStep ts_tree_cursor_goto_last_child_internal(TSTreeCursor *_self) {
  TreeCursor *self = (TreeCursor *)_self;
  bool visible;
  TreeCursorEntry entry;
  CursorChildIterator iterator = ts_tree_cursor_iterate_children(self);
  if (!iterator.parent.ptr || iterator.parent.ptr->child_count == 0) return TreeCursorStepNone;

  TreeCursorEntry last_entry = {0};
  TreeCursorStep last_step = TreeCursorStepNone;
  while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) {
    if (visible) {
      last_entry = entry;
      last_step = TreeCursorStepVisible;
    }
    else if (ts_subtree_visible_child_count(*entry.subtree) > 0) {
      last_entry = entry;
      last_step = TreeCursorStepHidden;
    }
  }
  if (last_entry.subtree) {
    array_push(&self->stack, last_entry);
    return last_step;
  }

  return TreeCursorStepNone;
}

bool ts_tree_cursor_goto_last_child(TSTreeCursor *self) {
  for (;;) {
    switch (ts_tree_cursor_goto_last_child_internal(self)) {
      case TreeCursorStepHidden:
        continue;
      case TreeCursorStepVisible:
        return true;
      default:
        return false;
    }
  }
}

static inline int64_t ts_tree_cursor_goto_first_child_for_byte_and_point(
  TSTreeCursor *_self,
  uint32_t goal_byte,
  TSPoint goal_point
) {
  TreeCursor *self = (TreeCursor *)_self;
  uint32_t initial_size = self->stack.size;
  uint32_t visible_child_index = 0;

  bool did_descend;
  do {
    did_descend = false;

    bool visible;
    TreeCursorEntry entry;
    CursorChildIterator iterator = ts_tree_cursor_iterate_children(self);
    while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) {
      Length entry_end = length_add(entry.position, ts_subtree_size(*entry.subtree));
      bool at_goal = entry_end.bytes > goal_byte && point_gt(entry_end.extent, goal_point);
      uint32_t visible_child_count = ts_subtree_visible_child_count(*entry.subtree);
      if (at_goal) {
        if (visible) {
          array_push(&self->stack, entry);
          return visible_child_index;
        }
        if (visible_child_count > 0) {
          array_push(&self->stack, entry);
          did_descend = true;
          break;
        }
      } else if (visible) {
        visible_child_index++;
      } else {
        visible_child_index += visible_child_count;
      }
    }
  } while (did_descend);

  self->stack.size = initial_size;
  return -1;
}

int64_t ts_tree_cursor_goto_first_child_for_byte(TSTreeCursor *self, uint32_t goal_byte) {
  return ts_tree_cursor_goto_first_child_for_byte_and_point(self, goal_byte, POINT_ZERO);
}

int64_t ts_tree_cursor_goto_first_child_for_point(TSTreeCursor *self, TSPoint goal_point) {
  return ts_tree_cursor_goto_first_child_for_byte_and_point(self, 0, goal_point);
}

TreeCursorStep ts_tree_cursor_goto_sibling_internal(
  TSTreeCursor *_self,
  bool (*advance)(CursorChildIterator *, TreeCursorEntry *, bool *)
) {
  TreeCursor *self = (TreeCursor *)_self;
  uint32_t initial_size = self->stack.size;

  while (self->stack.size > 1) {
    TreeCursorEntry entry = array_pop(&self->stack);
    CursorChildIterator iterator = ts_tree_cursor_iterate_children(self);
    iterator.child_index = entry.child_index;
    iterator.structural_child_index = entry.structural_child_index;
    iterator.position = entry.position;
    iterator.descendant_index = entry.descendant_index;

    bool visible = false;
    advance(&iterator, &entry, &visible);
    if (visible && self->stack.size + 1 < initial_size) break;

    while (advance(&iterator, &entry, &visible)) {
      if (visible) {
        array_push(&self->stack, entry);
        return TreeCursorStepVisible;
      }

      if (ts_subtree_visible_child_count(*entry.subtree)) {
        array_push(&self->stack, entry);
        return TreeCursorStepHidden;
      }
    }
  }

  self->stack.size = initial_size;
  return TreeCursorStepNone;
}

TreeCursorStep ts_tree_cursor_goto_next_sibling_internal(TSTreeCursor *_self) {
  return ts_tree_cursor_goto_sibling_internal(_self, ts_tree_cursor_child_iterator_next);
}

bool ts_tree_cursor_goto_next_sibling(TSTreeCursor *self) {
  switch (ts_tree_cursor_goto_next_sibling_internal(self)) {
    case TreeCursorStepHidden:
      ts_tree_cursor_goto_first_child(self);
      return true;
    case TreeCursorStepVisible:
      return true;
    default:
      return false;
  }
}

TreeCursorStep ts_tree_cursor_goto_previous_sibling_internal(TSTreeCursor *_self) {
  // since subtracting across row loses column information, we may have to
  // restore it
  TreeCursor *self = (TreeCursor *)_self;

  // for that, save current position before traversing
  TreeCursorStep step = ts_tree_cursor_goto_sibling_internal(
      _self, ts_tree_cursor_child_iterator_previous);
  if (step == TreeCursorStepNone)
    return step;

  // if length is already valid, there's no need to recompute it
  if (!length_is_undefined(array_back(&self->stack)->position))
    return step;

  // restore position from the parent node
  const TreeCursorEntry *parent = array_get(&self->stack, self->stack.size - 2);
  Length position = parent->position;
  uint32_t child_index = array_back(&self->stack)->child_index;
  const Subtree *children = ts_subtree_children((*(parent->subtree)));

  if (child_index > 0) {
    // skip first child padding since its position should match the position of the parent
    position = length_add(position, ts_subtree_size(children[0]));
    for (uint32_t i = 1; i < child_index; ++i) {
      position = length_add(position, ts_subtree_total_size(children[i]));
    }
    position = length_add(position, ts_subtree_padding(children[child_index]));
  }

  array_back(&self->stack)->position = position;

  return step;
}

bool ts_tree_cursor_goto_previous_sibling(TSTreeCursor *self) {
  switch (ts_tree_cursor_goto_previous_sibling_internal(self)) {
    case TreeCursorStepHidden:
      ts_tree_cursor_goto_last_child(self);
      return true;
    case TreeCursorStepVisible:
      return true;
    default:
      return false;
  }
}

bool ts_tree_cursor_goto_parent(TSTreeCursor *_self) {
  TreeCursor *self = (TreeCursor *)_self;
  for (unsigned i = self->stack.size - 2; i + 1 > 0; i--) {
    if (ts_tree_cursor_is_entry_visible(self, i)) {
      self->stack.size = i + 1;
      return true;
    }
  }
  return false;
}

void ts_tree_cursor_goto_descendant(
  TSTreeCursor *_self,
  uint32_t goal_descendant_index
) {
  TreeCursor *self = (TreeCursor *)_self;

  // Ascend to the lowest ancestor that contains the goal node.
  for (;;) {
    uint32_t i = self->stack.size - 1;
    TreeCursorEntry *entry = array_get(&self->stack, i);
    uint32_t next_descendant_index =
      entry->descendant_index +
      (ts_tree_cursor_is_entry_visible(self, i) ? 1 : 0) +
      ts_subtree_visible_descendant_count(*entry->subtree);
    if (
      (entry->descendant_index <= goal_descendant_index) &&
      (next_descendant_index > goal_descendant_index)
    ) {
      break;
    } else if (self->stack.size <= 1) {
      return;
    } else {
      self->stack.size--;
    }
  }

  // Descend to the goal node.
  bool did_descend = true;
  do {
    did_descend = false;
    bool visible;
    TreeCursorEntry entry;
    CursorChildIterator iterator = ts_tree_cursor_iterate_children(self);
    if (iterator.descendant_index > goal_descendant_index) {
      return;
    }

    while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) {
      if (iterator.descendant_index > goal_descendant_index) {
        array_push(&self->stack, entry);
        if (visible && entry.descendant_index == goal_descendant_index) {
          return;
        } else {
          did_descend = true;
          break;
        }
      }
    }
  } while (did_descend);
}

uint32_t ts_tree_cursor_current_descendant_index(const TSTreeCursor *_self) {
  const TreeCursor *self = (const TreeCursor *)_self;
  TreeCursorEntry *last_entry = array_back(&self->stack);
  return last_entry->descendant_index;
}

TSNode ts_tree_cursor_current_node(const TSTreeCursor *_self) {
  const TreeCursor *self = (const TreeCursor *)_self;
  TreeCursorEntry *last_entry = array_back(&self->stack);
  bool is_extra = ts_subtree_extra(*last_entry->subtree);
  TSSymbol alias_symbol = is_extra ? 0 : self->root_alias_symbol;
  if (self->stack.size > 1 && !is_extra) {
    TreeCursorEntry *parent_entry = array_get(&self->stack, self->stack.size - 2);
    alias_symbol = ts_language_alias_at(
      self->tree->language,
      parent_entry->subtree->ptr->production_id,
      last_entry->structural_child_index
    );
  }
  return ts_node_new(
    self->tree,
    last_entry->subtree,
    last_entry->position,
    alias_symbol
  );
}

// Private - Get various facts about the current node that are needed
// when executing tree queries.
void ts_tree_cursor_current_status(
  const TSTreeCursor *_self,
  TSFieldId *field_id,
  bool *has_later_siblings,
  bool *has_later_named_siblings,
  bool *can_have_later_siblings_with_this_field,
  TSSymbol *supertypes,
  unsigned *supertype_count
) {
  const TreeCursor *self = (const TreeCursor *)_self;
  unsigned max_supertypes = *supertype_count;
  *field_id = 0;
  *supertype_count = 0;
  *has_later_siblings = false;
  *has_later_named_siblings = false;
  *can_have_later_siblings_with_this_field = false;

  // Walk up the tree, visiting the current node and its invisible ancestors,
  // because fields can refer to nodes through invisible *wrapper* nodes,
  for (unsigned i = self->stack.size - 1; i > 0; i--) {
    TreeCursorEntry *entry = array_get(&self->stack, i);
    TreeCursorEntry *parent_entry = array_get(&self->stack, i - 1);

    const TSSymbol *alias_sequence = ts_language_alias_sequence(
      self->tree->language,
      parent_entry->subtree->ptr->production_id
    );

    #define subtree_symbol(subtree, structural_child_index) \
      ((                                                    \
        !ts_subtree_extra(subtree) &&                       \
        alias_sequence &&                                   \
        alias_sequence[structural_child_index]              \
      ) ?                                                   \
        alias_sequence[structural_child_index] :            \
        ts_subtree_symbol(subtree))

    // Stop walking up when a visible ancestor is found.
    TSSymbol entry_symbol = subtree_symbol(
      *entry->subtree,
      entry->structural_child_index
    );
    TSSymbolMetadata entry_metadata = ts_language_symbol_metadata(
      self->tree->language,
      entry_symbol
    );
    if (i != self->stack.size - 1 && entry_metadata.visible) break;

    // Record any supertypes
    if (entry_metadata.supertype && *supertype_count < max_supertypes) {
      supertypes[*supertype_count] = entry_symbol;
      (*supertype_count)++;
    }

    // Determine if the current node has later siblings.
    if (!*has_later_siblings) {
      unsigned sibling_count = parent_entry->subtree->ptr->child_count;
      unsigned structural_child_index = entry->structural_child_index;
      if (!ts_subtree_extra(*entry->subtree)) structural_child_index++;
      for (unsigned j = entry->child_index + 1; j < sibling_count; j++) {
        Subtree sibling = ts_subtree_children(*parent_entry->subtree)[j];
        TSSymbolMetadata sibling_metadata = ts_language_symbol_metadata(
          self->tree->language,
          subtree_symbol(sibling, structural_child_index)
        );
        if (sibling_metadata.visible) {
          *has_later_siblings = true;
          if (*has_later_named_siblings) break;
          if (sibling_metadata.named) {
            *has_later_named_siblings = true;
            break;
          }
        } else if (ts_subtree_visible_child_count(sibling) > 0) {
          *has_later_siblings = true;
          if (*has_later_named_siblings) break;
          if (sibling.ptr->named_child_count > 0) {
            *has_later_named_siblings = true;
            break;
          }
        }
        if (!ts_subtree_extra(sibling)) structural_child_index++;
      }
    }

    #undef subtree_symbol

    if (!ts_subtree_extra(*entry->subtree)) {
      const TSFieldMapEntry *field_map, *field_map_end;
      ts_language_field_map(
        self->tree->language,
        parent_entry->subtree->ptr->production_id,
        &field_map, &field_map_end
      );

      // Look for a field name associated with the current node.
      if (!*field_id) {
        for (const TSFieldMapEntry *map = field_map; map < field_map_end; map++) {
          if (!map->inherited && map->child_index == entry->structural_child_index) {
            *field_id = map->field_id;
            break;
          }
        }
      }

      // Determine if the current node can have later siblings with the same field name.
      if (*field_id) {
        for (const TSFieldMapEntry *map = field_map; map < field_map_end; map++) {
          if (
            map->field_id == *field_id &&
            map->child_index > entry->structural_child_index
          ) {
            *can_have_later_siblings_with_this_field = true;
            break;
          }
        }
      }
    }
  }
}

uint32_t ts_tree_cursor_current_depth(const TSTreeCursor *_self) {
  const TreeCursor *self = (const TreeCursor *)_self;
  uint32_t depth = 0;
  for (unsigned i = 1; i < self->stack.size; i++) {
    if (ts_tree_cursor_is_entry_visible(self, i)) {
      depth++;
    }
  }
  return depth;
}

TSNode ts_tree_cursor_parent_node(const TSTreeCursor *_self) {
  const TreeCursor *self = (const TreeCursor *)_self;
  for (int i = (int)self->stack.size - 2; i >= 0; i--) {
    TreeCursorEntry *entry = array_get(&self->stack, i);
    bool is_visible = true;
    TSSymbol alias_symbol = 0;
    if (i > 0) {
      TreeCursorEntry *parent_entry = array_get(&self->stack, i - 1);
      alias_symbol = ts_language_alias_at(
        self->tree->language,
        parent_entry->subtree->ptr->production_id,
        entry->structural_child_index
      );
      is_visible = (alias_symbol != 0) || ts_subtree_visible(*entry->subtree);
    }
    if (is_visible) {
      return ts_node_new(
        self->tree,
        entry->subtree,
        entry->position,
        alias_symbol
      );
    }
  }
  return ts_node_new(NULL, NULL, length_zero(), 0);
}

TSFieldId ts_tree_cursor_current_field_id(const TSTreeCursor *_self) {
  const TreeCursor *self = (const TreeCursor *)_self;

  // Walk up the tree, visiting the current node and its invisible ancestors.
  for (unsigned i = self->stack.size - 1; i > 0; i--) {
    TreeCursorEntry *entry = array_get(&self->stack, i);
    TreeCursorEntry *parent_entry = array_get(&self->stack, i - 1);

    // Stop walking up when another visible node is found.
    if (
      i != self->stack.size - 1 &&
      ts_tree_cursor_is_entry_visible(self, i)
    ) break;

    if (ts_subtree_extra(*entry->subtree)) break;

    const TSFieldMapEntry *field_map, *field_map_end;
    ts_language_field_map(
      self->tree->language,
      parent_entry->subtree->ptr->production_id,
      &field_map, &field_map_end
    );
    for (const TSFieldMapEntry *map = field_map; map < field_map_end; map++) {
      if (!map->inherited && map->child_index == entry->structural_child_index) {
        return map->field_id;
      }
    }
  }
  return 0;
}

const char *ts_tree_cursor_current_field_name(const TSTreeCursor *_self) {
  TSFieldId id = ts_tree_cursor_current_field_id(_self);
  if (id) {
    const TreeCursor *self = (const TreeCursor *)_self;
    return self->tree->language->field_names[id];
  } else {
    return NULL;
  }
}

TSTreeCursor ts_tree_cursor_copy(const TSTreeCursor *_cursor) {
  const TreeCursor *cursor = (const TreeCursor *)_cursor;
  TSTreeCursor res = {NULL, NULL, {0, 0}};
  TreeCursor *copy = (TreeCursor *)&res;
  copy->tree = cursor->tree;
  copy->root_alias_symbol = cursor->root_alias_symbol;
  array_init(&copy->stack);
  array_push_all(&copy->stack, &cursor->stack);
  return res;
}

void ts_tree_cursor_reset_to(TSTreeCursor *_dst, const TSTreeCursor *_src) {
  const TreeCursor *cursor = (const TreeCursor *)_src;
  TreeCursor *copy = (TreeCursor *)_dst;
  copy->tree = cursor->tree;
  copy->root_alias_symbol = cursor->root_alias_symbol;
  array_clear(&copy->stack);
  array_push_all(&copy->stack, &cursor->stack);
}



================================================
FILE: lib/src/tree_cursor.h
================================================
#ifndef TREE_SITTER_TREE_CURSOR_H_
#define TREE_SITTER_TREE_CURSOR_H_

#include "./subtree.h"

typedef struct {
  const Subtree *subtree;
  Length position;
  uint32_t child_index;
  uint32_t structural_child_index;
  uint32_t descendant_index;
} TreeCursorEntry;

typedef struct {
  const TSTree *tree;
  Array(TreeCursorEntry) stack;
  TSSymbol root_alias_symbol;
} TreeCursor;

typedef enum {
  TreeCursorStepNone,
  TreeCursorStepHidden,
  TreeCursorStepVisible,
} TreeCursorStep;

void ts_tree_cursor_init(TreeCursor *self, TSNode node);
void ts_tree_cursor_current_status(
  const TSTreeCursor *_self,
  TSFieldId *field_id,
  bool *has_later_siblings,
  bool *has_later_named_siblings,
  bool *can_have_later_siblings_with_this_field,
  TSSymbol *supertypes,
  unsigned *supertype_count
);

TreeCursorStep ts_tree_cursor_goto_first_child_internal(TSTreeCursor *_self);
TreeCursorStep ts_tree_cursor_goto_next_sibling_internal(TSTreeCursor *_self);

static inline Subtree ts_tree_cursor_current_subtree(const TSTreeCursor *_self) {
  const TreeCursor *self = (const TreeCursor *)_self;
  TreeCursorEntry *last_entry = array_back(&self->stack);
  return *last_entry->subtree;
}

TSNode ts_tree_cursor_parent_node(const TSTreeCursor *_self);

#endif  // TREE_SITTER_TREE_CURSOR_H_



================================================
FILE: lib/src/ts_assert.h
================================================
#ifndef TREE_SITTER_ASSERT_H_
#define TREE_SITTER_ASSERT_H_

#ifdef NDEBUG
#define ts_assert(e) ((void)(e))
#else
#include <assert.h>
#define ts_assert(e) assert(e)
#endif

#endif // TREE_SITTER_ASSERT_H_



================================================
FILE: lib/src/unicode.h
================================================
#ifndef TREE_SITTER_UNICODE_H_
#define TREE_SITTER_UNICODE_H_

#ifdef __cplusplus
extern "C" {
#endif

#include <limits.h>
#include <stdint.h>

#define U_EXPORT
#define U_EXPORT2
#include "unicode/utf8.h"
#include "unicode/utf16.h"
#include "portable/endian.h"

#define U16_NEXT_LE(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
    (c)=le16toh((s)[(i)++]); \
    if(U16_IS_LEAD(c)) { \
        uint16_t __c2; \
        if((i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
            ++(i); \
            (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
        } \
    } \
} UPRV_BLOCK_MACRO_END

#define U16_NEXT_BE(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
    (c)=be16toh((s)[(i)++]); \
    if(U16_IS_LEAD(c)) { \
        uint16_t __c2; \
        if((i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
            ++(i); \
            (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
        } \
    } \
} UPRV_BLOCK_MACRO_END

static const int32_t TS_DECODE_ERROR = U_SENTINEL;

static inline uint32_t ts_decode_utf8(
  const uint8_t *string,
  uint32_t length,
  int32_t *code_point
) {
  uint32_t i = 0;
  U8_NEXT(string, i, length, *code_point);
  return i;
}

static inline uint32_t ts_decode_utf16_le(
  const uint8_t *string,
  uint32_t length,
  int32_t *code_point
) {
  uint32_t i = 0;
  U16_NEXT_LE(((uint16_t *)string), i, length, *code_point);
  return i * 2;
}

static inline uint32_t ts_decode_utf16_be(
  const uint8_t *string,
  uint32_t length,
  int32_t *code_point
) {
  uint32_t i = 0;
  U16_NEXT_BE(((uint16_t *)string), i, length, *code_point);
  return i * 2;
}

#ifdef __cplusplus
}
#endif

#endif  // TREE_SITTER_UNICODE_H_



================================================
FILE: lib/src/wasm_store.h
================================================
#ifndef TREE_SITTER_WASM_H_
#define TREE_SITTER_WASM_H_

#ifdef __cplusplus
extern "C" {
#endif

#include "tree_sitter/api.h"
#include "./parser.h"

bool ts_wasm_store_start(TSWasmStore *self, TSLexer *lexer, const TSLanguage *language);
void ts_wasm_store_reset(TSWasmStore *self);
bool ts_wasm_store_has_error(const TSWasmStore *self);

bool ts_wasm_store_call_lex_main(TSWasmStore *self, TSStateId state);
bool ts_wasm_store_call_lex_keyword(TSWasmStore *self, TSStateId state);

uint32_t ts_wasm_store_call_scanner_create(TSWasmStore *self);
void ts_wasm_store_call_scanner_destroy(TSWasmStore *self, uint32_t scanner_address);
bool ts_wasm_store_call_scanner_scan(TSWasmStore *self, uint32_t scanner_address, uint32_t valid_tokens_ix);
uint32_t ts_wasm_store_call_scanner_serialize(TSWasmStore *self, uint32_t scanner_address, char *buffer);
void ts_wasm_store_call_scanner_deserialize(TSWasmStore *self, uint32_t scanner, const char *buffer, unsigned length);

void ts_wasm_language_retain(const TSLanguage *self);
void ts_wasm_language_release(const TSLanguage *self);

#ifdef __cplusplus
}
#endif

#endif  // TREE_SITTER_WASM_H_



================================================
FILE: lib/src/portable/endian.h
================================================
// "License": Public Domain
// I, Mathias Panzenböck, place this file hereby into the public domain. Use it at your own risk for whatever you like.
// In case there are jurisdictions that don't support putting things in the public domain you can also consider it to
// be "dual licensed" under the BSD, MIT and Apache licenses, if you want to. This code is trivial anyway. Consider it
// an example on how to get the endian conversion functions on different platforms.

// updates from https://github.com/mikepb/endian.h/issues/4

#ifndef ENDIAN_H
#define ENDIAN_H

#if (defined(_WIN16) || defined(_WIN32) || defined(_WIN64)) && !defined(__WINDOWS__)

#    define __WINDOWS__

#endif

#if defined(HAVE_ENDIAN_H) || \
    defined(__linux__) || \
    defined(__GNU__) || \
    defined(__illumos__) || \
    defined(__NetBSD__) || \
    defined(__OpenBSD__) || \
    defined(__CYGWIN__) || \
    defined(__MSYS__) || \
    defined(__EMSCRIPTEN__) || \
    defined(__wasi__) || \
    defined(__wasm__)

#if defined(__NetBSD__)
#define _NETBSD_SOURCE 1
#endif

# include <endian.h>

#elif defined(HAVE_SYS_ENDIAN_H) || \
    defined(__FreeBSD__) || \
    defined(__DragonFly__)

# include <sys/endian.h>

#elif defined(__APPLE__)
#    define __BYTE_ORDER    BYTE_ORDER
#    define __BIG_ENDIAN    BIG_ENDIAN
#    define __LITTLE_ENDIAN LITTLE_ENDIAN
#    define __PDP_ENDIAN    PDP_ENDIAN

#    if !defined(_POSIX_C_SOURCE)
#        include <libkern/OSByteOrder.h>

#        define htobe16(x) OSSwapHostToBigInt16(x)
#        define htole16(x) OSSwapHostToLittleInt16(x)
#        define be16toh(x) OSSwapBigToHostInt16(x)
#        define le16toh(x) OSSwapLittleToHostInt16(x)

#        define htobe32(x) OSSwapHostToBigInt32(x)
#        define htole32(x) OSSwapHostToLittleInt32(x)
#        define be32toh(x) OSSwapBigToHostInt32(x)
#        define le32toh(x) OSSwapLittleToHostInt32(x)

#        define htobe64(x) OSSwapHostToBigInt64(x)
#        define htole64(x) OSSwapHostToLittleInt64(x)
#        define be64toh(x) OSSwapBigToHostInt64(x)
#        define le64toh(x) OSSwapLittleToHostInt64(x)
#    else
#        if BYTE_ORDER == LITTLE_ENDIAN
#            define htobe16(x) __builtin_bswap16(x)
#            define htole16(x) (x)
#            define be16toh(x) __builtin_bswap16(x)
#            define le16toh(x) (x)

#            define htobe32(x) __builtin_bswap32(x)
#            define htole32(x) (x)
#            define be32toh(x) __builtin_bswap32(x)
#            define le32toh(x) (x)

#            define htobe64(x) __builtin_bswap64(x)
#            define htole64(x) (x)
#            define be64toh(x) __builtin_bswap64(x)
#            define le64toh(x) (x)
#        elif BYTE_ORDER == BIG_ENDIAN
#            define htobe16(x) (x)
#            define htole16(x) __builtin_bswap16(x)
#            define be16toh(x) (x)
#            define le16toh(x) __builtin_bswap16(x)

#            define htobe32(x) (x)
#            define htole32(x) __builtin_bswap32(x)
#            define be32toh(x) (x)
#            define le32toh(x) __builtin_bswap32(x)

#            define htobe64(x) (x)
#            define htole64(x) __builtin_bswap64(x)
#            define be64toh(x) (x)
#            define le64toh(x) __builtin_bswap64(x)
#        else
#            error byte order not supported
#        endif
#    endif

#elif defined(__WINDOWS__)

#    if defined(_MSC_VER) && !defined(__clang__)
#        include <stdlib.h>
#        define B_SWAP_16(x) _byteswap_ushort(x)
#        define B_SWAP_32(x) _byteswap_ulong(x)
#        define B_SWAP_64(x) _byteswap_uint64(x)
#    else
#        define B_SWAP_16(x) __builtin_bswap16(x)
#        define B_SWAP_32(x) __builtin_bswap32(x)
#        define B_SWAP_64(x) __builtin_bswap64(x)
#    endif

# if defined(__MINGW32__) || defined(HAVE_SYS_PARAM_H)
#   include <sys/param.h>
# endif

#    ifndef BIG_ENDIAN
#        ifdef __BIG_ENDIAN
#            define BIG_ENDIAN __BIG_ENDIAN
#        elif defined(__ORDER_BIG_ENDIAN__)
#            define BIG_ENDIAN __ORDER_BIG_ENDIAN__
#        else
#            define BIG_ENDIAN 4321
#        endif
#    endif

#    ifndef LITTLE_ENDIAN
#        ifdef __LITTLE_ENDIAN
#            define LITTLE_ENDIAN __LITTLE_ENDIAN
#        elif defined(__ORDER_LITTLE_ENDIAN__)
#            define LITTLE_ENDIAN __ORDER_LITTLE_ENDIAN__
#        else
#            define LITTLE_ENDIAN 1234
#        endif
#    endif

#    ifndef BYTE_ORDER
#        ifdef __BYTE_ORDER
#            define BYTE_ORDER __BYTE_ORDER
#        elif defined(__BYTE_ORDER__)
#            define BYTE_ORDER __BYTE_ORDER__
#        else
             /* assume LE on Windows if nothing was defined */
#            define BYTE_ORDER LITTLE_ENDIAN
#        endif
#    endif

#    if BYTE_ORDER == LITTLE_ENDIAN

#        define htobe16(x) B_SWAP_16(x)
#        define htole16(x) (x)
#        define be16toh(x) B_SWAP_16(x)
#        define le16toh(x) (x)

#        define htobe32(x) B_SWAP_32(x)
#        define htole32(x) (x)
#        define be32toh(x) B_SWAP_32(x)
#        define le32toh(x) (x)

#        define htobe64(x) B_SWAP_64(x)
#        define htole64(x) (x)
#        define be64toh(x) B_SWAP_64(x)
#        define le64toh(x) (x)

#    elif BYTE_ORDER == BIG_ENDIAN

#        define htobe16(x) (x)
#        define htole16(x) B_SWAP_16(x)
#        define be16toh(x) (x)
#        define le16toh(x) B_SWAP_16(x)

#        define htobe32(x) (x)
#        define htole32(x) B_SWAP_32(x)
#        define be32toh(x) (x)
#        define le32toh(x) B_SWAP_32(x)

#        define htobe64(x) (x)
#        define htole64(x) B_SWAP_64(x)
#        define be64toh(x) (x)
#        define le64toh(x) B_SWAP_64(x)

#    else

#        error byte order not supported

#    endif

#elif defined(__QNXNTO__)

#    include <gulliver.h>

#    define __LITTLE_ENDIAN 1234
#    define __BIG_ENDIAN    4321
#    define __PDP_ENDIAN    3412

#    if defined(__BIGENDIAN__)

#        define __BYTE_ORDER __BIG_ENDIAN

#        define htobe16(x) (x)
#        define htobe32(x) (x)
#        define htobe64(x) (x)

#        define htole16(x) ENDIAN_SWAP16(x)
#        define htole32(x) ENDIAN_SWAP32(x)
#        define htole64(x) ENDIAN_SWAP64(x)

#    elif defined(__LITTLEENDIAN__)

#        define __BYTE_ORDER __LITTLE_ENDIAN

#        define htole16(x) (x)
#        define htole32(x) (x)
#        define htole64(x) (x)

#        define htobe16(x) ENDIAN_SWAP16(x)
#        define htobe32(x) ENDIAN_SWAP32(x)
#        define htobe64(x) ENDIAN_SWAP64(x)

#    else

#        error byte order not supported

#    endif

#    define be16toh(x) ENDIAN_BE16(x)
#    define be32toh(x) ENDIAN_BE32(x)
#    define be64toh(x) ENDIAN_BE64(x)
#    define le16toh(x) ENDIAN_LE16(x)
#    define le32toh(x) ENDIAN_LE32(x)
#    define le64toh(x) ENDIAN_LE64(x)

#else

#    error platform not supported

#endif

#endif



================================================
FILE: lib/src/unicode/README.md
================================================
# ICU Parts

This directory contains a small subset of files from the Unicode organization's [ICU repository](https://github.com/unicode-org/icu).

### License

The license for these files is contained in the `LICENSE` file within this directory.

### Contents

* Source files taken from the [`icu4c/source/common/unicode`](https://github.com/unicode-org/icu/tree/552b01f61127d30d6589aa4bf99468224979b661/icu4c/source/common/unicode) directory:
  * `utf8.h`
  * `utf16.h`
  * `umachine.h`
* Empty source files that are referenced by the above source files, but whose original contents in `libicu` are not needed:
  * `ptypes.h`
  * `urename.h`
  * `utf.h`
* `ICU_SHA` - File containing the Git SHA of the commit in the `icu` repository from which the files were obtained.
* `LICENSE` - The license file from the [`icu4c`](https://github.com/unicode-org/icu/tree/552b01f61127d30d6589aa4bf99468224979b661/icu4c) directory of the `icu` repository.
* `README.md` - This text file.

### Updating ICU

To incorporate changes from the upstream `icu` repository:

* Update `ICU_SHA` with the new Git SHA.
* Update `LICENSE` with the license text from the directory mentioned above.
* Update `utf8.h`, `utf16.h`, and `umachine.h` with their new contents in the `icu` repository.



================================================
FILE: lib/src/unicode/ICU_SHA
================================================
552b01f61127d30d6589aa4bf99468224979b661



================================================
FILE: lib/src/unicode/LICENSE
================================================
COPYRIGHT AND PERMISSION NOTICE (ICU 58 and later)

Copyright © 1991-2019 Unicode, Inc. All rights reserved.
Distributed under the Terms of Use in https://www.unicode.org/copyright.html.

Permission is hereby granted, free of charge, to any person obtaining
a copy of the Unicode data files and any associated documentation
(the "Data Files") or Unicode software and any associated documentation
(the "Software") to deal in the Data Files or Software
without restriction, including without limitation the rights to use,
copy, modify, merge, publish, distribute, and/or sell copies of
the Data Files or Software, and to permit persons to whom the Data Files
or Software are furnished to do so, provided that either
(a) this copyright and permission notice appear with all copies
of the Data Files or Software, or
(b) this copyright and permission notice appear in associated
Documentation.

THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT OF THIRD PARTY RIGHTS.
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS
NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL
DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THE DATA FILES OR SOFTWARE.

Except as contained in this notice, the name of a copyright holder
shall not be used in advertising or otherwise to promote the sale,
use or other dealings in these Data Files or Software without prior
written authorization of the copyright holder.

---------------------

Third-Party Software Licenses

This section contains third-party software notices and/or additional
terms for licensed third-party software components included within ICU
libraries.

1. ICU License - ICU 1.8.1 to ICU 57.1

COPYRIGHT AND PERMISSION NOTICE

Copyright (c) 1995-2016 International Business Machines Corporation and others
All rights reserved.

Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, and/or sell copies of the Software, and to permit persons
to whom the Software is furnished to do so, provided that the above
copyright notice(s) and this permission notice appear in all copies of
the Software and that both the above copyright notice(s) and this
permission notice appear in supporting documentation.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY
SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER
RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF
CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.

Except as contained in this notice, the name of a copyright holder
shall not be used in advertising or otherwise to promote the sale, use
or other dealings in this Software without prior written authorization
of the copyright holder.

All trademarks and registered trademarks mentioned herein are the
property of their respective owners.

2. Chinese/Japanese Word Break Dictionary Data (cjdict.txt)

 #     The Google Chrome software developed by Google is licensed under
 # the BSD license. Other software included in this distribution is
 # provided under other licenses, as set forth below.
 #
 #  The BSD License
 #  http://opensource.org/licenses/bsd-license.php
 #  Copyright (C) 2006-2008, Google Inc.
 #
 #  All rights reserved.
 #
 #  Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions are met:
 #
 #  Redistributions of source code must retain the above copyright notice,
 # this list of conditions and the following disclaimer.
 #  Redistributions in binary form must reproduce the above
 # copyright notice, this list of conditions and the following
 # disclaimer in the documentation and/or other materials provided with
 # the distribution.
 #  Neither the name of  Google Inc. nor the names of its
 # contributors may be used to endorse or promote products derived from
 # this software without specific prior written permission.
 #
 #
 #  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
 # CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
 # INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
 # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
 # BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #
 #
 #  The word list in cjdict.txt are generated by combining three word lists
 # listed below with further processing for compound word breaking. The
 # frequency is generated with an iterative training against Google web
 # corpora.
 #
 #  * Libtabe (Chinese)
 #    - https://sourceforge.net/project/?group_id=1519
 #    - Its license terms and conditions are shown below.
 #
 #  * IPADIC (Japanese)
 #    - http://chasen.aist-nara.ac.jp/chasen/distribution.html
 #    - Its license terms and conditions are shown below.
 #
 #  ---------COPYING.libtabe ---- BEGIN--------------------
 #
 #  /*
 #   * Copyright (c) 1999 TaBE Project.
 #   * Copyright (c) 1999 Pai-Hsiang Hsiao.
 #   * All rights reserved.
 #   *
 #   * Redistribution and use in source and binary forms, with or without
 #   * modification, are permitted provided that the following conditions
 #   * are met:
 #   *
 #   * . Redistributions of source code must retain the above copyright
 #   *   notice, this list of conditions and the following disclaimer.
 #   * . Redistributions in binary form must reproduce the above copyright
 #   *   notice, this list of conditions and the following disclaimer in
 #   *   the documentation and/or other materials provided with the
 #   *   distribution.
 #   * . Neither the name of the TaBE Project nor the names of its
 #   *   contributors may be used to endorse or promote products derived
 #   *   from this software without specific prior written permission.
 #   *
 #   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 #   * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 #   * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
 #   * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
 #   * REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 #   * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 #   * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 #   * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 #   * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
 #   * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 #   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
 #   * OF THE POSSIBILITY OF SUCH DAMAGE.
 #   */
 #
 #  /*
 #   * Copyright (c) 1999 Computer Systems and Communication Lab,
 #   *                    Institute of Information Science, Academia
 #       *                    Sinica. All rights reserved.
 #   *
 #   * Redistribution and use in source and binary forms, with or without
 #   * modification, are permitted provided that the following conditions
 #   * are met:
 #   *
 #   * . Redistributions of source code must retain the above copyright
 #   *   notice, this list of conditions and the following disclaimer.
 #   * . Redistributions in binary form must reproduce the above copyright
 #   *   notice, this list of conditions and the following disclaimer in
 #   *   the documentation and/or other materials provided with the
 #   *   distribution.
 #   * . Neither the name of the Computer Systems and Communication Lab
 #   *   nor the names of its contributors may be used to endorse or
 #   *   promote products derived from this software without specific
 #   *   prior written permission.
 #   *
 #   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 #   * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 #   * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
 #   * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
 #   * REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 #   * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 #   * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 #   * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 #   * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
 #   * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 #   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
 #   * OF THE POSSIBILITY OF SUCH DAMAGE.
 #   */
 #
 #  Copyright 1996 Chih-Hao Tsai @ Beckman Institute,
 #      University of Illinois
 #  c-tsai4@uiuc.edu  http://casper.beckman.uiuc.edu/~c-tsai4
 #
 #  ---------------COPYING.libtabe-----END--------------------------------
 #
 #
 #  ---------------COPYING.ipadic-----BEGIN-------------------------------
 #
 #  Copyright 2000, 2001, 2002, 2003 Nara Institute of Science
 #  and Technology.  All Rights Reserved.
 #
 #  Use, reproduction, and distribution of this software is permitted.
 #  Any copy of this software, whether in its original form or modified,
 #  must include both the above copyright notice and the following
 #  paragraphs.
 #
 #  Nara Institute of Science and Technology (NAIST),
 #  the copyright holders, disclaims all warranties with regard to this
 #  software, including all implied warranties of merchantability and
 #  fitness, in no event shall NAIST be liable for
 #  any special, indirect or consequential damages or any damages
 #  whatsoever resulting from loss of use, data or profits, whether in an
 #  action of contract, negligence or other tortuous action, arising out
 #  of or in connection with the use or performance of this software.
 #
 #  A large portion of the dictionary entries
 #  originate from ICOT Free Software.  The following conditions for ICOT
 #  Free Software applies to the current dictionary as well.
 #
 #  Each User may also freely distribute the Program, whether in its
 #  original form or modified, to any third party or parties, PROVIDED
 #  that the provisions of Section 3 ("NO WARRANTY") will ALWAYS appear
 #  on, or be attached to, the Program, which is distributed substantially
 #  in the same form as set out herein and that such intended
 #  distribution, if actually made, will neither violate or otherwise
 #  contravene any of the laws and regulations of the countries having
 #  jurisdiction over the User or the intended distribution itself.
 #
 #  NO WARRANTY
 #
 #  The program was produced on an experimental basis in the course of the
 #  research and development conducted during the project and is provided
 #  to users as so produced on an experimental basis.  Accordingly, the
 #  program is provided without any warranty whatsoever, whether express,
 #  implied, statutory or otherwise.  The term "warranty" used herein
 #  includes, but is not limited to, any warranty of the quality,
 #  performance, merchantability and fitness for a particular purpose of
 #  the program and the nonexistence of any infringement or violation of
 #  any right of any third party.
 #
 #  Each user of the program will agree and understand, and be deemed to
 #  have agreed and understood, that there is no warranty whatsoever for
 #  the program and, accordingly, the entire risk arising from or
 #  otherwise connected with the program is assumed by the user.
 #
 #  Therefore, neither ICOT, the copyright holder, or any other
 #  organization that participated in or was otherwise related to the
 #  development of the program and their respective officials, directors,
 #  officers and other employees shall be held liable for any and all
 #  damages, including, without limitation, general, special, incidental
 #  and consequential damages, arising out of or otherwise in connection
 #  with the use or inability to use the program or any product, material
 #  or result produced or otherwise obtained by using the program,
 #  regardless of whether they have been advised of, or otherwise had
 #  knowledge of, the possibility of such damages at any time during the
 #  project or thereafter.  Each user will be deemed to have agreed to the
 #  foregoing by his or her commencement of use of the program.  The term
 #  "use" as used herein includes, but is not limited to, the use,
 #  modification, copying and distribution of the program and the
 #  production of secondary products from the program.
 #
 #  In the case where the program, whether in its original form or
 #  modified, was distributed or delivered to or received by a user from
 #  any person, organization or entity other than ICOT, unless it makes or
 #  grants independently of ICOT any specific warranty to the user in
 #  writing, such person, organization or entity, will also be exempted
 #  from and not be held liable to the user for any such damages as noted
 #  above as far as the program is concerned.
 #
 #  ---------------COPYING.ipadic-----END----------------------------------

3. Lao Word Break Dictionary Data (laodict.txt)

 #  Copyright (c) 2013 International Business Machines Corporation
 #  and others. All Rights Reserved.
 #
 # Project: http://code.google.com/p/lao-dictionary/
 # Dictionary: http://lao-dictionary.googlecode.com/git/Lao-Dictionary.txt
 # License: http://lao-dictionary.googlecode.com/git/Lao-Dictionary-LICENSE.txt
 #              (copied below)
 #
 #  This file is derived from the above dictionary, with slight
 #  modifications.
 #  ----------------------------------------------------------------------
 #  Copyright (C) 2013 Brian Eugene Wilson, Robert Martin Campbell.
 #  All rights reserved.
 #
 #  Redistribution and use in source and binary forms, with or without
 #  modification,
 #  are permitted provided that the following conditions are met:
 #
 #
 # Redistributions of source code must retain the above copyright notice, this
 #  list of conditions and the following disclaimer. Redistributions in
 #  binary form must reproduce the above copyright notice, this list of
 #  conditions and the following disclaimer in the documentation and/or
 #  other materials provided with the distribution.
 #
 #
 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
 # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
 # COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
 # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
 # STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
 # OF THE POSSIBILITY OF SUCH DAMAGE.
 #  --------------------------------------------------------------------------

4. Burmese Word Break Dictionary Data (burmesedict.txt)

 #  Copyright (c) 2014 International Business Machines Corporation
 #  and others. All Rights Reserved.
 #
 #  This list is part of a project hosted at:
 #    github.com/kanyawtech/myanmar-karen-word-lists
 #
 #  --------------------------------------------------------------------------
 #  Copyright (c) 2013, LeRoy Benjamin Sharon
 #  All rights reserved.
 #
 #  Redistribution and use in source and binary forms, with or without
 #  modification, are permitted provided that the following conditions
 #  are met: Redistributions of source code must retain the above
 #  copyright notice, this list of conditions and the following
 #  disclaimer.  Redistributions in binary form must reproduce the
 #  above copyright notice, this list of conditions and the following
 #  disclaimer in the documentation and/or other materials provided
 #  with the distribution.
 #
 #    Neither the name Myanmar Karen Word Lists, nor the names of its
 #    contributors may be used to endorse or promote products derived
 #    from this software without specific prior written permission.
 #
 #  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
 #  CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
 #  INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
 #  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 #  DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
 #  BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 #  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
 #  TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 #  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
 #  ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
 #  TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
 #  THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 #  SUCH DAMAGE.
 #  --------------------------------------------------------------------------

5. Time Zone Database

  ICU uses the public domain data and code derived from Time Zone
Database for its time zone support. The ownership of the TZ database
is explained in BCP 175: Procedure for Maintaining the Time Zone
Database section 7.

 # 7.  Database Ownership
 #
 #    The TZ database itself is not an IETF Contribution or an IETF
 #    document.  Rather it is a pre-existing and regularly updated work
 #    that is in the public domain, and is intended to remain in the
 #    public domain.  Therefore, BCPs 78 [RFC5378] and 79 [RFC3979] do
 #    not apply to the TZ Database or contributions that individuals make
 #    to it.  Should any claims be made and substantiated against the TZ
 #    Database, the organization that is providing the IANA
 #    Considerations defined in this RFC, under the memorandum of
 #    understanding with the IETF, currently ICANN, may act in accordance
 #    with all competent court orders.  No ownership claims will be made
 #    by ICANN or the IETF Trust on the database or the code.  Any person
 #    making a contribution to the database or code waives all rights to
 #    future claims in that contribution or in the TZ Database.

6. Google double-conversion

Copyright 2006-2011, the V8 project authors. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:

    * Redistributions of source code must retain the above copyright
      notice, this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above
      copyright notice, this list of conditions and the following
      disclaimer in the documentation and/or other materials provided
      with the distribution.
    * Neither the name of Google Inc. nor the names of its
      contributors may be used to endorse or promote products derived
      from this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.



================================================
FILE: lib/src/unicode/ptypes.h
================================================
// This file must exist in order for `utf8.h` and `utf16.h` to be used.



================================================
FILE: lib/src/unicode/umachine.h
================================================
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
******************************************************************************
*
*   Copyright (C) 1999-2015, International Business Machines
*   Corporation and others.  All Rights Reserved.
*
******************************************************************************
*   file name:  umachine.h
*   encoding:   UTF-8
*   tab size:   8 (not used)
*   indentation:4
*
*   created on: 1999sep13
*   created by: Markus W. Scherer
*
*   This file defines basic types and constants for ICU to be
*   platform-independent. umachine.h and utf.h are included into
*   utypes.h to provide all the general definitions for ICU.
*   All of these definitions used to be in utypes.h before
*   the UTF-handling macros made this unmaintainable.
*/

#ifndef __UMACHINE_H__
#define __UMACHINE_H__


/**
 * \file
 * \brief Basic types and constants for UTF
 *
 * <h2> Basic types and constants for UTF </h2>
 *   This file defines basic types and constants for utf.h to be
 *   platform-independent. umachine.h and utf.h are included into
 *   utypes.h to provide all the general definitions for ICU.
 *   All of these definitions used to be in utypes.h before
 *   the UTF-handling macros made this unmaintainable.
 *
 */
/*==========================================================================*/
/* Include platform-dependent definitions                                   */
/* which are contained in the platform-specific file platform.h             */
/*==========================================================================*/

#include "unicode/ptypes.h" /* platform.h is included in ptypes.h */

/*
 * ANSI C headers:
 * stddef.h defines wchar_t
 */
#include <stddef.h>

/*==========================================================================*/
/* For C wrappers, we use the symbol U_STABLE.                                */
/* This works properly if the includer is C or C++.                         */
/* Functions are declared   U_STABLE return-type U_EXPORT2 function-name()... */
/*==========================================================================*/

/**
 * \def U_CFUNC
 * This is used in a declaration of a library private ICU C function.
 * @stable ICU 2.4
 */

/**
 * \def U_CDECL_BEGIN
 * This is used to begin a declaration of a library private ICU C API.
 * @stable ICU 2.4
 */

/**
 * \def U_CDECL_END
 * This is used to end a declaration of a library private ICU C API
 * @stable ICU 2.4
 */

#ifdef __cplusplus
#   define U_CFUNC extern "C"
#   define U_CDECL_BEGIN extern "C" {
#   define U_CDECL_END   }
#else
#   define U_CFUNC extern
#   define U_CDECL_BEGIN
#   define U_CDECL_END
#endif

#ifndef U_ATTRIBUTE_DEPRECATED
/**
 * \def U_ATTRIBUTE_DEPRECATED
 *  This is used for GCC specific attributes
 * @internal
 */
#if U_GCC_MAJOR_MINOR >= 302
#    define U_ATTRIBUTE_DEPRECATED __attribute__ ((deprecated))
/**
 * \def U_ATTRIBUTE_DEPRECATED
 * This is used for Visual C++ specific attributes
 * @internal
 */
#elif defined(_MSC_VER) && (_MSC_VER >= 1400)
#    define U_ATTRIBUTE_DEPRECATED __declspec(deprecated)
#else
#    define U_ATTRIBUTE_DEPRECATED
#endif
#endif

/** This is used to declare a function as a public ICU C API @stable ICU 2.0*/
#define U_CAPI U_CFUNC U_EXPORT
/** This is used to declare a function as a stable public ICU C API*/
#define U_STABLE U_CAPI
/** This is used to declare a function as a draft public ICU C API  */
#define U_DRAFT  U_CAPI
/** This is used to declare a function as a deprecated public ICU C API  */
#define U_DEPRECATED U_CAPI U_ATTRIBUTE_DEPRECATED
/** This is used to declare a function as an obsolete public ICU C API  */
#define U_OBSOLETE U_CAPI
/** This is used to declare a function as an internal ICU C API  */
#define U_INTERNAL U_CAPI

/**
 * \def U_OVERRIDE
 * Defined to the C++11 "override" keyword if available.
 * Denotes a class or member which is an override of the base class.
 * May result in an error if it applied to something not an override.
 * @internal
 */
#ifndef U_OVERRIDE
#define U_OVERRIDE override
#endif

/**
 * \def U_FINAL
 * Defined to the C++11 "final" keyword if available.
 * Denotes a class or member which may not be overridden in subclasses.
 * May result in an error if subclasses attempt to override.
 * @internal
 */
#if !defined(U_FINAL) || defined(U_IN_DOXYGEN)
#define U_FINAL final
#endif

// Before ICU 65, function-like, multi-statement ICU macros were just defined as
// series of statements wrapped in { } blocks and the caller could choose to
// either treat them as if they were actual functions and end the invocation
// with a trailing ; creating an empty statement after the block or else omit
// this trailing ; using the knowledge that the macro would expand to { }.
//
// But doing so doesn't work well with macros that look like functions and
// compiler warnings about empty statements (ICU-20601) and ICU 65 therefore
// switches to the standard solution of wrapping such macros in do { } while.
//
// This will however break existing code that depends on being able to invoke
// these macros without a trailing ; so to be able to remain compatible with
// such code the wrapper is itself defined as macros so that it's possible to
// build ICU 65 and later with the old macro behaviour, like this:
//
// CPPFLAGS='-DUPRV_BLOCK_MACRO_BEGIN="" -DUPRV_BLOCK_MACRO_END=""'
// runConfigureICU ...

/**
 * \def UPRV_BLOCK_MACRO_BEGIN
 * Defined as the "do" keyword by default.
 * @internal
 */
#ifndef UPRV_BLOCK_MACRO_BEGIN
#define UPRV_BLOCK_MACRO_BEGIN do
#endif

/**
 * \def UPRV_BLOCK_MACRO_END
 * Defined as "while (FALSE)" by default.
 * @internal
 */
#ifndef UPRV_BLOCK_MACRO_END
#define UPRV_BLOCK_MACRO_END while (FALSE)
#endif

/*==========================================================================*/
/* limits for int32_t etc., like in POSIX inttypes.h                        */
/*==========================================================================*/

#ifndef INT8_MIN
/** The smallest value an 8 bit signed integer can hold @stable ICU 2.0 */
#   define INT8_MIN        ((int8_t)(-128))
#endif
#ifndef INT16_MIN
/** The smallest value a 16 bit signed integer can hold @stable ICU 2.0 */
#   define INT16_MIN       ((int16_t)(-32767-1))
#endif
#ifndef INT32_MIN
/** The smallest value a 32 bit signed integer can hold @stable ICU 2.0 */
#   define INT32_MIN       ((int32_t)(-2147483647-1))
#endif

#ifndef INT8_MAX
/** The largest value an 8 bit signed integer can hold @stable ICU 2.0 */
#   define INT8_MAX        ((int8_t)(127))
#endif
#ifndef INT16_MAX
/** The largest value a 16 bit signed integer can hold @stable ICU 2.0 */
#   define INT16_MAX       ((int16_t)(32767))
#endif
#ifndef INT32_MAX
/** The largest value a 32 bit signed integer can hold @stable ICU 2.0 */
#   define INT32_MAX       ((int32_t)(2147483647))
#endif

#ifndef UINT8_MAX
/** The largest value an 8 bit unsigned integer can hold @stable ICU 2.0 */
#   define UINT8_MAX       ((uint8_t)(255U))
#endif
#ifndef UINT16_MAX
/** The largest value a 16 bit unsigned integer can hold @stable ICU 2.0 */
#   define UINT16_MAX      ((uint16_t)(65535U))
#endif
#ifndef UINT32_MAX
/** The largest value a 32 bit unsigned integer can hold @stable ICU 2.0 */
#   define UINT32_MAX      ((uint32_t)(4294967295U))
#endif

#if defined(U_INT64_T_UNAVAILABLE)
# error int64_t is required for decimal format and rule-based number format.
#else
# ifndef INT64_C
/**
 * Provides a platform independent way to specify a signed 64-bit integer constant.
 * note: may be wrong for some 64 bit platforms - ensure your compiler provides INT64_C
 * @stable ICU 2.8
 */
#   define INT64_C(c) c ## LL
# endif
# ifndef UINT64_C
/**
 * Provides a platform independent way to specify an unsigned 64-bit integer constant.
 * note: may be wrong for some 64 bit platforms - ensure your compiler provides UINT64_C
 * @stable ICU 2.8
 */
#   define UINT64_C(c) c ## ULL
# endif
# ifndef U_INT64_MIN
/** The smallest value a 64 bit signed integer can hold @stable ICU 2.8 */
#     define U_INT64_MIN       ((int64_t)(INT64_C(-9223372036854775807)-1))
# endif
# ifndef U_INT64_MAX
/** The largest value a 64 bit signed integer can hold @stable ICU 2.8 */
#     define U_INT64_MAX       ((int64_t)(INT64_C(9223372036854775807)))
# endif
# ifndef U_UINT64_MAX
/** The largest value a 64 bit unsigned integer can hold @stable ICU 2.8 */
#     define U_UINT64_MAX      ((uint64_t)(UINT64_C(18446744073709551615)))
# endif
#endif

/*==========================================================================*/
/* Boolean data type                                                        */
/*==========================================================================*/

/** The ICU boolean type @stable ICU 2.0 */
typedef int8_t UBool;

#ifndef TRUE
/** The TRUE value of a UBool @stable ICU 2.0 */
#   define TRUE  1
#endif
#ifndef FALSE
/** The FALSE value of a UBool @stable ICU 2.0 */
#   define FALSE 0
#endif


/*==========================================================================*/
/* Unicode data types                                                       */
/*==========================================================================*/

/* wchar_t-related definitions -------------------------------------------- */

/*
 * \def U_WCHAR_IS_UTF16
 * Defined if wchar_t uses UTF-16.
 *
 * @stable ICU 2.0
 */
/*
 * \def U_WCHAR_IS_UTF32
 * Defined if wchar_t uses UTF-32.
 *
 * @stable ICU 2.0
 */
#if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32)
#   ifdef __STDC_ISO_10646__
#       if (U_SIZEOF_WCHAR_T==2)
#           define U_WCHAR_IS_UTF16
#       elif (U_SIZEOF_WCHAR_T==4)
#           define  U_WCHAR_IS_UTF32
#       endif
#   elif defined __UCS2__
#       if (U_PF_OS390 <= U_PLATFORM && U_PLATFORM <= U_PF_OS400) && (U_SIZEOF_WCHAR_T==2)
#           define U_WCHAR_IS_UTF16
#       endif
#   elif defined(__UCS4__) || (U_PLATFORM == U_PF_OS400 && defined(__UTF32__))
#       if (U_SIZEOF_WCHAR_T==4)
#           define U_WCHAR_IS_UTF32
#       endif
#   elif U_PLATFORM_IS_DARWIN_BASED || (U_SIZEOF_WCHAR_T==4 && U_PLATFORM_IS_LINUX_BASED)
#       define U_WCHAR_IS_UTF32
#   elif U_PLATFORM_HAS_WIN32_API
#       define U_WCHAR_IS_UTF16
#   endif
#endif

/* UChar and UChar32 definitions -------------------------------------------- */

/** Number of bytes in a UChar. @stable ICU 2.0 */
#define U_SIZEOF_UCHAR 2

/**
 * \def U_CHAR16_IS_TYPEDEF
 * If 1, then char16_t is a typedef and not a real type (yet)
 * @internal
 */
#if (U_PLATFORM == U_PF_AIX) && defined(__cplusplus) &&(U_CPLUSPLUS_VERSION < 11)
// for AIX, uchar.h needs to be included
# include <uchar.h>
# define U_CHAR16_IS_TYPEDEF 1
#elif defined(_MSC_VER) && (_MSC_VER < 1900)
// Versions of Visual Studio/MSVC below 2015 do not support char16_t as a real type,
// and instead use a typedef.  https://msdn.microsoft.com/library/bb531344.aspx
# define U_CHAR16_IS_TYPEDEF 1
#else
# define U_CHAR16_IS_TYPEDEF 0
#endif


/**
 * \var UChar
 *
 * The base type for UTF-16 code units and pointers.
 * Unsigned 16-bit integer.
 * Starting with ICU 59, C++ API uses char16_t directly, while C API continues to use UChar.
 *
 * UChar is configurable by defining the macro UCHAR_TYPE
 * on the preprocessor or compiler command line:
 * -DUCHAR_TYPE=uint16_t or -DUCHAR_TYPE=wchar_t (if U_SIZEOF_WCHAR_T==2) etc.
 * (The UCHAR_TYPE can also be \#defined earlier in this file, for outside the ICU library code.)
 * This is for transitional use from application code that uses uint16_t or wchar_t for UTF-16.
 *
 * The default is UChar=char16_t.
 *
 * C++11 defines char16_t as bit-compatible with uint16_t, but as a distinct type.
 *
 * In C, char16_t is a simple typedef of uint_least16_t.
 * ICU requires uint_least16_t=uint16_t for data memory mapping.
 * On macOS, char16_t is not available because the uchar.h standard header is missing.
 *
 * @stable ICU 4.4
 */

#if 1
    // #if 1 is normal. UChar defaults to char16_t in C++.
    // For configuration testing of UChar=uint16_t temporarily change this to #if 0.
    // The intltest Makefile #defines UCHAR_TYPE=char16_t,
    // so we only #define it to uint16_t if it is undefined so far.
#elif !defined(UCHAR_TYPE)
#   define UCHAR_TYPE uint16_t
#endif

#if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || \
        defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
    // Inside the ICU library code, never configurable.
    typedef char16_t UChar;
#elif defined(UCHAR_TYPE)
    typedef UCHAR_TYPE UChar;
#elif defined(__cplusplus)
    typedef char16_t UChar;
#else
    typedef uint16_t UChar;
#endif

/**
 * \var OldUChar
 * Default ICU 58 definition of UChar.
 * A base type for UTF-16 code units and pointers.
 * Unsigned 16-bit integer.
 *
 * Define OldUChar to be wchar_t if that is 16 bits wide.
 * If wchar_t is not 16 bits wide, then define UChar to be uint16_t.
 *
 * This makes the definition of OldUChar platform-dependent
 * but allows direct string type compatibility with platforms with
 * 16-bit wchar_t types.
 *
 * This is how UChar was defined in ICU 58, for transition convenience.
 * Exception: ICU 58 UChar was defined to UCHAR_TYPE if that macro was defined.
 * The current UChar responds to UCHAR_TYPE but OldUChar does not.
 *
 * @stable ICU 59
 */
#if U_SIZEOF_WCHAR_T==2
    typedef wchar_t OldUChar;
#elif defined(__CHAR16_TYPE__)
    typedef __CHAR16_TYPE__ OldUChar;
#else
    typedef uint16_t OldUChar;
#endif

/**
 * Define UChar32 as a type for single Unicode code points.
 * UChar32 is a signed 32-bit integer (same as int32_t).
 *
 * The Unicode code point range is 0..0x10ffff.
 * All other values (negative or >=0x110000) are illegal as Unicode code points.
 * They may be used as sentinel values to indicate "done", "error"
 * or similar non-code point conditions.
 *
 * Before ICU 2.4 (Jitterbug 2146), UChar32 was defined
 * to be wchar_t if that is 32 bits wide (wchar_t may be signed or unsigned)
 * or else to be uint32_t.
 * That is, the definition of UChar32 was platform-dependent.
 *
 * @see U_SENTINEL
 * @stable ICU 2.4
 */
typedef int32_t UChar32;

/**
 * This value is intended for sentinel values for APIs that
 * (take or) return single code points (UChar32).
 * It is outside of the Unicode code point range 0..0x10ffff.
 *
 * For example, a "done" or "error" value in a new API
 * could be indicated with U_SENTINEL.
 *
 * ICU APIs designed before ICU 2.4 usually define service-specific "done"
 * values, mostly 0xffff.
 * Those may need to be distinguished from
 * actual U+ffff text contents by calling functions like
 * CharacterIterator::hasNext() or UnicodeString::length().
 *
 * @return -1
 * @see UChar32
 * @stable ICU 2.4
 */
#define U_SENTINEL (-1)

#include "unicode/urename.h"

#endif



================================================
FILE: lib/src/unicode/urename.h
================================================
// This file must exist in order for `utf8.h` and `utf16.h` to be used.



================================================
FILE: lib/src/unicode/utf.h
================================================
// This file must exist in order for `utf8.h` and `utf16.h` to be used.



================================================
FILE: lib/src/unicode/utf16.h
================================================
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
*
*   Copyright (C) 1999-2012, International Business Machines
*   Corporation and others.  All Rights Reserved.
*
*******************************************************************************
*   file name:  utf16.h
*   encoding:   UTF-8
*   tab size:   8 (not used)
*   indentation:4
*
*   created on: 1999sep09
*   created by: Markus W. Scherer
*/

/**
 * \file
 * \brief C API: 16-bit Unicode handling macros
 *
 * This file defines macros to deal with 16-bit Unicode (UTF-16) code units and strings.
 *
 * For more information see utf.h and the ICU User Guide Strings chapter
 * (http://userguide.icu-project.org/strings).
 *
 * <em>Usage:</em>
 * ICU coding guidelines for if() statements should be followed when using these macros.
 * Compound statements (curly braces {}) must be used  for if-else-while...
 * bodies and all macro statements should be terminated with semicolon.
 */

#ifndef __UTF16_H__
#define __UTF16_H__

#include "unicode/umachine.h"
#ifndef __UTF_H__
#   include "unicode/utf.h"
#endif

/* single-code point definitions -------------------------------------------- */

/**
 * Does this code unit alone encode a code point (BMP, not a surrogate)?
 * @param c 16-bit code unit
 * @return TRUE or FALSE
 * @stable ICU 2.4
 */
#define U16_IS_SINGLE(c) !U_IS_SURROGATE(c)

/**
 * Is this code unit a lead surrogate (U+d800..U+dbff)?
 * @param c 16-bit code unit
 * @return TRUE or FALSE
 * @stable ICU 2.4
 */
#define U16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800)

/**
 * Is this code unit a trail surrogate (U+dc00..U+dfff)?
 * @param c 16-bit code unit
 * @return TRUE or FALSE
 * @stable ICU 2.4
 */
#define U16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00)

/**
 * Is this code unit a surrogate (U+d800..U+dfff)?
 * @param c 16-bit code unit
 * @return TRUE or FALSE
 * @stable ICU 2.4
 */
#define U16_IS_SURROGATE(c) U_IS_SURROGATE(c)

/**
 * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)),
 * is it a lead surrogate?
 * @param c 16-bit code unit
 * @return TRUE or FALSE
 * @stable ICU 2.4
 */
#define U16_IS_SURROGATE_LEAD(c) (((c)&0x400)==0)

/**
 * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)),
 * is it a trail surrogate?
 * @param c 16-bit code unit
 * @return TRUE or FALSE
 * @stable ICU 4.2
 */
#define U16_IS_SURROGATE_TRAIL(c) (((c)&0x400)!=0)

/**
 * Helper constant for U16_GET_SUPPLEMENTARY.
 * @internal
 */
#define U16_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000)

/**
 * Get a supplementary code point value (U+10000..U+10ffff)
 * from its lead and trail surrogates.
 * The result is undefined if the input values are not
 * lead and trail surrogates.
 *
 * @param lead lead surrogate (U+d800..U+dbff)
 * @param trail trail surrogate (U+dc00..U+dfff)
 * @return supplementary code point (U+10000..U+10ffff)
 * @stable ICU 2.4
 */
#define U16_GET_SUPPLEMENTARY(lead, trail) \
    (((UChar32)(lead)<<10UL)+(UChar32)(trail)-U16_SURROGATE_OFFSET)


/**
 * Get the lead surrogate (0xd800..0xdbff) for a
 * supplementary code point (0x10000..0x10ffff).
 * @param supplementary 32-bit code point (U+10000..U+10ffff)
 * @return lead surrogate (U+d800..U+dbff) for supplementary
 * @stable ICU 2.4
 */
#define U16_LEAD(supplementary) (UChar)(((supplementary)>>10)+0xd7c0)

/**
 * Get the trail surrogate (0xdc00..0xdfff) for a
 * supplementary code point (0x10000..0x10ffff).
 * @param supplementary 32-bit code point (U+10000..U+10ffff)
 * @return trail surrogate (U+dc00..U+dfff) for supplementary
 * @stable ICU 2.4
 */
#define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00)

/**
 * How many 16-bit code units are used to encode this Unicode code point? (1 or 2)
 * The result is not defined if c is not a Unicode code point (U+0000..U+10ffff).
 * @param c 32-bit code point
 * @return 1 or 2
 * @stable ICU 2.4
 */
#define U16_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2)

/**
 * The maximum number of 16-bit code units per Unicode code point (U+0000..U+10ffff).
 * @return 2
 * @stable ICU 2.4
 */
#define U16_MAX_LENGTH 2

/**
 * Get a code point from a string at a random-access offset,
 * without changing the offset.
 * "Unsafe" macro, assumes well-formed UTF-16.
 *
 * The offset may point to either the lead or trail surrogate unit
 * for a supplementary code point, in which case the macro will read
 * the adjacent matching surrogate as well.
 * The result is undefined if the offset points to a single, unpaired surrogate.
 * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT.
 *
 * @param s const UChar * string
 * @param i string offset
 * @param c output UChar32 variable
 * @see U16_GET
 * @stable ICU 2.4
 */
#define U16_GET_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
    (c)=(s)[i]; \
    if(U16_IS_SURROGATE(c)) { \
        if(U16_IS_SURROGATE_LEAD(c)) { \
            (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)+1]); \
        } else { \
            (c)=U16_GET_SUPPLEMENTARY((s)[(i)-1], (c)); \
        } \
    } \
} UPRV_BLOCK_MACRO_END

/**
 * Get a code point from a string at a random-access offset,
 * without changing the offset.
 * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
 *
 * The offset may point to either the lead or trail surrogate unit
 * for a supplementary code point, in which case the macro will read
 * the adjacent matching surrogate as well.
 *
 * The length can be negative for a NUL-terminated string.
 *
 * If the offset points to a single, unpaired surrogate, then
 * c is set to that unpaired surrogate.
 * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT.
 *
 * @param s const UChar * string
 * @param start starting string offset (usually 0)
 * @param i string offset, must be start<=i<length
 * @param length string length
 * @param c output UChar32 variable
 * @see U16_GET_UNSAFE
 * @stable ICU 2.4
 */
#define U16_GET(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
    (c)=(s)[i]; \
    if(U16_IS_SURROGATE(c)) { \
        uint16_t __c2; \
        if(U16_IS_SURROGATE_LEAD(c)) { \
            if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \
                (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
            } \
        } else { \
            if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
                (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
            } \
        } \
    } \
} UPRV_BLOCK_MACRO_END

/**
 * Get a code point from a string at a random-access offset,
 * without changing the offset.
 * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
 *
 * The offset may point to either the lead or trail surrogate unit
 * for a supplementary code point, in which case the macro will read
 * the adjacent matching surrogate as well.
 *
 * The length can be negative for a NUL-terminated string.
 *
 * If the offset points to a single, unpaired surrogate, then
 * c is set to U+FFFD.
 * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT_OR_FFFD.
 *
 * @param s const UChar * string
 * @param start starting string offset (usually 0)
 * @param i string offset, must be start<=i<length
 * @param length string length
 * @param c output UChar32 variable
 * @see U16_GET_UNSAFE
 * @stable ICU 60
 */
#define U16_GET_OR_FFFD(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
    (c)=(s)[i]; \
    if(U16_IS_SURROGATE(c)) { \
        uint16_t __c2; \
        if(U16_IS_SURROGATE_LEAD(c)) { \
            if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \
                (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
            } else { \
                (c)=0xfffd; \
            } \
        } else { \
            if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
                (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
            } else { \
                (c)=0xfffd; \
            } \
        } \
    } \
} UPRV_BLOCK_MACRO_END

/* definitions with forward iteration --------------------------------------- */

/**
 * Get a code point from a string at a code point boundary offset,
 * and advance the offset to the next code point boundary.
 * (Post-incrementing forward iteration.)
 * "Unsafe" macro, assumes well-formed UTF-16.
 *
 * The offset may point to the lead surrogate unit
 * for a supplementary code point, in which case the macro will read
 * the following trail surrogate as well.
 * If the offset points to a trail surrogate, then that itself
 * will be returned as the code point.
 * The result is undefined if the offset points to a single, unpaired lead surrogate.
 *
 * @param s const UChar * string
 * @param i string offset
 * @param c output UChar32 variable
 * @see U16_NEXT
 * @stable ICU 2.4
 */
#define U16_NEXT_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
    (c)=(s)[(i)++]; \
    if(U16_IS_LEAD(c)) { \
        (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)++]); \
    } \
} UPRV_BLOCK_MACRO_END

/**
 * Get a code point from a string at a code point boundary offset,
 * and advance the offset to the next code point boundary.
 * (Post-incrementing forward iteration.)
 * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
 *
 * The length can be negative for a NUL-terminated string.
 *
 * The offset may point to the lead surrogate unit
 * for a supplementary code point, in which case the macro will read
 * the following trail surrogate as well.
 * If the offset points to a trail surrogate or
 * to a single, unpaired lead surrogate, then c is set to that unpaired surrogate.
 *
 * @param s const UChar * string
 * @param i string offset, must be i<length
 * @param length string length
 * @param c output UChar32 variable
 * @see U16_NEXT_UNSAFE
 * @stable ICU 2.4
 */
#define U16_NEXT(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
    (c)=(s)[(i)++]; \
    if(U16_IS_LEAD(c)) { \
        uint16_t __c2; \
        if((i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
            ++(i); \
            (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
        } \
    } \
} UPRV_BLOCK_MACRO_END

/**
 * Get a code point from a string at a code point boundary offset,
 * and advance the offset to the next code point boundary.
 * (Post-incrementing forward iteration.)
 * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
 *
 * The length can be negative for a NUL-terminated string.
 *
 * The offset may point to the lead surrogate unit
 * for a supplementary code point, in which case the macro will read
 * the following trail surrogate as well.
 * If the offset points to a trail surrogate or
 * to a single, unpaired lead surrogate, then c is set to U+FFFD.
 *
 * @param s const UChar * string
 * @param i string offset, must be i<length
 * @param length string length
 * @param c output UChar32 variable
 * @see U16_NEXT_UNSAFE
 * @stable ICU 60
 */
#define U16_NEXT_OR_FFFD(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
    (c)=(s)[(i)++]; \
    if(U16_IS_SURROGATE(c)) { \
        uint16_t __c2; \
        if(U16_IS_SURROGATE_LEAD(c) && (i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
            ++(i); \
            (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
        } else { \
            (c)=0xfffd; \
        } \
    } \
} UPRV_BLOCK_MACRO_END

/**
 * Append a code point to a string, overwriting 1 or 2 code units.
 * The offset points to the current end of the string contents
 * and is advanced (post-increment).
 * "Unsafe" macro, assumes a valid code point and sufficient space in the string.
 * Otherwise, the result is undefined.
 *
 * @param s const UChar * string buffer
 * @param i string offset
 * @param c code point to append
 * @see U16_APPEND
 * @stable ICU 2.4
 */
#define U16_APPEND_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
    if((uint32_t)(c)<=0xffff) { \
        (s)[(i)++]=(uint16_t)(c); \
    } else { \
        (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
        (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
    } \
} UPRV_BLOCK_MACRO_END

/**
 * Append a code point to a string, overwriting 1 or 2 code units.
 * The offset points to the current end of the string contents
 * and is advanced (post-increment).
 * "Safe" macro, checks for a valid code point.
 * If a surrogate pair is written, checks for sufficient space in the string.
 * If the code point is not valid or a trail surrogate does not fit,
 * then isError is set to TRUE.
 *
 * @param s const UChar * string buffer
 * @param i string offset, must be i<capacity
 * @param capacity size of the string buffer
 * @param c code point to append
 * @param isError output UBool set to TRUE if an error occurs, otherwise not modified
 * @see U16_APPEND_UNSAFE
 * @stable ICU 2.4
 */
#define U16_APPEND(s, i, capacity, c, isError) UPRV_BLOCK_MACRO_BEGIN { \
    if((uint32_t)(c)<=0xffff) { \
        (s)[(i)++]=(uint16_t)(c); \
    } else if((uint32_t)(c)<=0x10ffff && (i)+1<(capacity)) { \
        (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
        (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
    } else /* c>0x10ffff or not enough space */ { \
        (isError)=TRUE; \
    } \
} UPRV_BLOCK_MACRO_END

/**
 * Advance the string offset from one code point boundary to the next.
 * (Post-incrementing iteration.)
 * "Unsafe" macro, assumes well-formed UTF-16.
 *
 * @param s const UChar * string
 * @param i string offset
 * @see U16_FWD_1
 * @stable ICU 2.4
 */
#define U16_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
    if(U16_IS_LEAD((s)[(i)++])) { \
        ++(i); \
    } \
} UPRV_BLOCK_MACRO_END

/**
 * Advance the string offset from one code point boundary to the next.
 * (Post-incrementing iteration.)
 * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
 *
 * The length can be negative for a NUL-terminated string.
 *
 * @param s const UChar * string
 * @param i string offset, must be i<length
 * @param length string length
 * @see U16_FWD_1_UNSAFE
 * @stable ICU 2.4
 */
#define U16_FWD_1(s, i, length) UPRV_BLOCK_MACRO_BEGIN { \
    if(U16_IS_LEAD((s)[(i)++]) && (i)!=(length) && U16_IS_TRAIL((s)[i])) { \
        ++(i); \
    } \
} UPRV_BLOCK_MACRO_END

/**
 * Advance the string offset from one code point boundary to the n-th next one,
 * i.e., move forward by n code points.
 * (Post-incrementing iteration.)
 * "Unsafe" macro, assumes well-formed UTF-16.
 *
 * @param s const UChar * string
 * @param i string offset
 * @param n number of code points to skip
 * @see U16_FWD_N
 * @stable ICU 2.4
 */
#define U16_FWD_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
    int32_t __N=(n); \
    while(__N>0) { \
        U16_FWD_1_UNSAFE(s, i); \
        --__N; \
    } \
} UPRV_BLOCK_MACRO_END

/**
 * Advance the string offset from one code point boundary to the n-th next one,
 * i.e., move forward by n code points.
 * (Post-incrementing iteration.)
 * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
 *
 * The length can be negative for a NUL-terminated string.
 *
 * @param s const UChar * string
 * @param i int32_t string offset, must be i<length
 * @param length int32_t string length
 * @param n number of code points to skip
 * @see U16_FWD_N_UNSAFE
 * @stable ICU 2.4
 */
#define U16_FWD_N(s, i, length, n) UPRV_BLOCK_MACRO_BEGIN { \
    int32_t __N=(n); \
    while(__N>0 && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \
        U16_FWD_1(s, i, length); \
        --__N; \
    } \
} UPRV_BLOCK_MACRO_END

/**
 * Adjust a random-access offset to a code point boundary
 * at the start of a code point.
 * If the offset points to the trail surrogate of a surrogate pair,
 * then the offset is decremented.
 * Otherwise, it is not modified.
 * "Unsafe" macro, assumes well-formed UTF-16.
 *
 * @param s const UChar * string
 * @param i string offset
 * @see U16_SET_CP_START
 * @stable ICU 2.4
 */
#define U16_SET_CP_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
    if(U16_IS_TRAIL((s)[i])) { \
        --(i); \
    } \
} UPRV_BLOCK_MACRO_END

/**
 * Adjust a random-access offset to a code point boundary
 * at the start of a code point.
 * If the offset points to the trail surrogate of a surrogate pair,
 * then the offset is decremented.
 * Otherwise, it is not modified.
 * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
 *
 * @param s const UChar * string
 * @param start starting string offset (usually 0)
 * @param i string offset, must be start<=i
 * @see U16_SET_CP_START_UNSAFE
 * @stable ICU 2.4
 */
#define U16_SET_CP_START(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
    if(U16_IS_TRAIL((s)[i]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \
        --(i); \
    } \
} UPRV_BLOCK_MACRO_END

/* definitions with backward iteration -------------------------------------- */

/**
 * Move the string offset from one code point boundary to the previous one
 * and get the code point between them.
 * (Pre-decrementing backward iteration.)
 * "Unsafe" macro, assumes well-formed UTF-16.
 *
 * The input offset may be the same as the string length.
 * If the offset is behind a trail surrogate unit
 * for a supplementary code point, then the macro will read
 * the preceding lead surrogate as well.
 * If the offset is behind a lead surrogate, then that itself
 * will be returned as the code point.
 * The result is undefined if the offset is behind a single, unpaired trail surrogate.
 *
 * @param s const UChar * string
 * @param i string offset
 * @param c output UChar32 variable
 * @see U16_PREV
 * @stable ICU 2.4
 */
#define U16_PREV_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
    (c)=(s)[--(i)]; \
    if(U16_IS_TRAIL(c)) { \
        (c)=U16_GET_SUPPLEMENTARY((s)[--(i)], (c)); \
    } \
} UPRV_BLOCK_MACRO_END

/**
 * Move the string offset from one code point boundary to the previous one
 * and get the code point between them.
 * (Pre-decrementing backward iteration.)
 * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
 *
 * The input offset may be the same as the string length.
 * If the offset is behind a trail surrogate unit
 * for a supplementary code point, then the macro will read
 * the preceding lead surrogate as well.
 * If the offset is behind a lead surrogate or behind a single, unpaired
 * trail surrogate, then c is set to that unpaired surrogate.
 *
 * @param s const UChar * string
 * @param start starting string offset (usually 0)
 * @param i string offset, must be start<i
 * @param c output UChar32 variable
 * @see U16_PREV_UNSAFE
 * @stable ICU 2.4
 */
#define U16_PREV(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
    (c)=(s)[--(i)]; \
    if(U16_IS_TRAIL(c)) { \
        uint16_t __c2; \
        if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
            --(i); \
            (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
        } \
    } \
} UPRV_BLOCK_MACRO_END

/**
 * Move the string offset from one code point boundary to the previous one
 * and get the code point between them.
 * (Pre-decrementing backward iteration.)
 * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
 *
 * The input offset may be the same as the string length.
 * If the offset is behind a trail surrogate unit
 * for a supplementary code point, then the macro will read
 * the preceding lead surrogate as well.
 * If the offset is behind a lead surrogate or behind a single, unpaired
 * trail surrogate, then c is set to U+FFFD.
 *
 * @param s const UChar * string
 * @param start starting string offset (usually 0)
 * @param i string offset, must be start<i
 * @param c output UChar32 variable
 * @see U16_PREV_UNSAFE
 * @stable ICU 60
 */
#define U16_PREV_OR_FFFD(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
    (c)=(s)[--(i)]; \
    if(U16_IS_SURROGATE(c)) { \
        uint16_t __c2; \
        if(U16_IS_SURROGATE_TRAIL(c) && (i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
            --(i); \
            (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
        } else { \
            (c)=0xfffd; \
        } \
    } \
} UPRV_BLOCK_MACRO_END

/**
 * Move the string offset from one code point boundary to the previous one.
 * (Pre-decrementing backward iteration.)
 * The input offset may be the same as the string length.
 * "Unsafe" macro, assumes well-formed UTF-16.
 *
 * @param s const UChar * string
 * @param i string offset
 * @see U16_BACK_1
 * @stable ICU 2.4
 */
#define U16_BACK_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
    if(U16_IS_TRAIL((s)[--(i)])) { \
        --(i); \
    } \
} UPRV_BLOCK_MACRO_END

/**
 * Move the string offset from one code point boundary to the previous one.
 * (Pre-decrementing backward iteration.)
 * The input offset may be the same as the string length.
 * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
 *
 * @param s const UChar * string
 * @param start starting string offset (usually 0)
 * @param i string offset, must be start<i
 * @see U16_BACK_1_UNSAFE
 * @stable ICU 2.4
 */
#define U16_BACK_1(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
    if(U16_IS_TRAIL((s)[--(i)]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \
        --(i); \
    } \
} UPRV_BLOCK_MACRO_END

/**
 * Move the string offset from one code point boundary to the n-th one before it,
 * i.e., move backward by n code points.
 * (Pre-decrementing backward iteration.)
 * The input offset may be the same as the string length.
 * "Unsafe" macro, assumes well-formed UTF-16.
 *
 * @param s const UChar * string
 * @param i string offset
 * @param n number of code points to skip
 * @see U16_BACK_N
 * @stable ICU 2.4
 */
#define U16_BACK_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
    int32_t __N=(n); \
    while(__N>0) { \
        U16_BACK_1_UNSAFE(s, i); \
        --__N; \
    } \
} UPRV_BLOCK_MACRO_END

/**
 * Move the string offset from one code point boundary to the n-th one before it,
 * i.e., move backward by n code points.
 * (Pre-decrementing backward iteration.)
 * The input offset may be the same as the string length.
 * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
 *
 * @param s const UChar * string
 * @param start start of string
 * @param i string offset, must be start<i
 * @param n number of code points to skip
 * @see U16_BACK_N_UNSAFE
 * @stable ICU 2.4
 */
#define U16_BACK_N(s, start, i, n) UPRV_BLOCK_MACRO_BEGIN { \
    int32_t __N=(n); \
    while(__N>0 && (i)>(start)) { \
        U16_BACK_1(s, start, i); \
        --__N; \
    } \
} UPRV_BLOCK_MACRO_END

/**
 * Adjust a random-access offset to a code point boundary after a code point.
 * If the offset is behind the lead surrogate of a surrogate pair,
 * then the offset is incremented.
 * Otherwise, it is not modified.
 * The input offset may be the same as the string length.
 * "Unsafe" macro, assumes well-formed UTF-16.
 *
 * @param s const UChar * string
 * @param i string offset
 * @see U16_SET_CP_LIMIT
 * @stable ICU 2.4
 */
#define U16_SET_CP_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
    if(U16_IS_LEAD((s)[(i)-1])) { \
        ++(i); \
    } \
} UPRV_BLOCK_MACRO_END

/**
 * Adjust a random-access offset to a code point boundary after a code point.
 * If the offset is behind the lead surrogate of a surrogate pair,
 * then the offset is incremented.
 * Otherwise, it is not modified.
 * The input offset may be the same as the string length.
 * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
 *
 * The length can be negative for a NUL-terminated string.
 *
 * @param s const UChar * string
 * @param start int32_t starting string offset (usually 0)
 * @param i int32_t string offset, start<=i<=length
 * @param length int32_t string length
 * @see U16_SET_CP_LIMIT_UNSAFE
 * @stable ICU 2.4
 */
#define U16_SET_CP_LIMIT(s, start, i, length) UPRV_BLOCK_MACRO_BEGIN { \
    if((start)<(i) && ((i)<(length) || (length)<0) && U16_IS_LEAD((s)[(i)-1]) && U16_IS_TRAIL((s)[i])) { \
        ++(i); \
    } \
} UPRV_BLOCK_MACRO_END

#endif



================================================
FILE: lib/src/unicode/utf8.h
================================================
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
*
*   Copyright (C) 1999-2015, International Business Machines
*   Corporation and others.  All Rights Reserved.
*
*******************************************************************************
*   file name:  utf8.h
*   encoding:   UTF-8
*   tab size:   8 (not used)
*   indentation:4
*
*   created on: 1999sep13
*   created by: Markus W. Scherer
*/

/**
 * \file
 * \brief C API: 8-bit Unicode handling macros
 *
 * This file defines macros to deal with 8-bit Unicode (UTF-8) code units (bytes) and strings.
 *
 * For more information see utf.h and the ICU User Guide Strings chapter
 * (http://userguide.icu-project.org/strings).
 *
 * <em>Usage:</em>
 * ICU coding guidelines for if() statements should be followed when using these macros.
 * Compound statements (curly braces {}) must be used  for if-else-while...
 * bodies and all macro statements should be terminated with semicolon.
 */

#ifndef __UTF8_H__
#define __UTF8_H__

#include "unicode/umachine.h"
#ifndef __UTF_H__
#   include "unicode/utf.h"
#endif

/* internal definitions ----------------------------------------------------- */

/**
 * Counts the trail bytes for a UTF-8 lead byte.
 * Returns 0 for 0..0xc1 as well as for 0xf5..0xff.
 * leadByte might be evaluated multiple times.
 *
 * This is internal since it is not meant to be called directly by external clients;
 * however it is called by public macros in this file and thus must remain stable.
 *
 * @param leadByte The first byte of a UTF-8 sequence. Must be 0..0xff.
 * @internal
 */
#define U8_COUNT_TRAIL_BYTES(leadByte) \
    (U8_IS_LEAD(leadByte) ? \
        ((uint8_t)(leadByte)>=0xe0)+((uint8_t)(leadByte)>=0xf0)+1 : 0)

/**
 * Counts the trail bytes for a UTF-8 lead byte of a valid UTF-8 sequence.
 * Returns 0 for 0..0xc1. Undefined for 0xf5..0xff.
 * leadByte might be evaluated multiple times.
 *
 * This is internal since it is not meant to be called directly by external clients;
 * however it is called by public macros in this file and thus must remain stable.
 *
 * @param leadByte The first byte of a UTF-8 sequence. Must be 0..0xff.
 * @internal
 */
#define U8_COUNT_TRAIL_BYTES_UNSAFE(leadByte) \
    (((uint8_t)(leadByte)>=0xc2)+((uint8_t)(leadByte)>=0xe0)+((uint8_t)(leadByte)>=0xf0))

/**
 * Mask a UTF-8 lead byte, leave only the lower bits that form part of the code point value.
 *
 * This is internal since it is not meant to be called directly by external clients;
 * however it is called by public macros in this file and thus must remain stable.
 * @internal
 */
#define U8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1)

/**
 * Internal bit vector for 3-byte UTF-8 validity check, for use in U8_IS_VALID_LEAD3_AND_T1.
 * Each bit indicates whether one lead byte + first trail byte pair starts a valid sequence.
 * Lead byte E0..EF bits 3..0 are used as byte index,
 * first trail byte bits 7..5 are used as bit index into that byte.
 * @see U8_IS_VALID_LEAD3_AND_T1
 * @internal
 */
#define U8_LEAD3_T1_BITS "\x20\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x10\x30\x30"

/**
 * Internal 3-byte UTF-8 validity check.
 * Non-zero if lead byte E0..EF and first trail byte 00..FF start a valid sequence.
 * @internal
 */
#define U8_IS_VALID_LEAD3_AND_T1(lead, t1) (U8_LEAD3_T1_BITS[(lead)&0xf]&(1<<((uint8_t)(t1)>>5)))

/**
 * Internal bit vector for 4-byte UTF-8 validity check, for use in U8_IS_VALID_LEAD4_AND_T1.
 * Each bit indicates whether one lead byte + first trail byte pair starts a valid sequence.
 * First trail byte bits 7..4 are used as byte index,
 * lead byte F0..F4 bits 2..0 are used as bit index into that byte.
 * @see U8_IS_VALID_LEAD4_AND_T1
 * @internal
 */
#define U8_LEAD4_T1_BITS "\x00\x00\x00\x00\x00\x00\x00\x00\x1E\x0F\x0F\x0F\x00\x00\x00\x00"

/**
 * Internal 4-byte UTF-8 validity check.
 * Non-zero if lead byte F0..F4 and first trail byte 00..FF start a valid sequence.
 * @internal
 */
#define U8_IS_VALID_LEAD4_AND_T1(lead, t1) (U8_LEAD4_T1_BITS[(uint8_t)(t1)>>4]&(1<<((lead)&7)))

/**
 * Function for handling "next code point" with error-checking.
 *
 * This is internal since it is not meant to be called directly by external clients;
 * however it is U_STABLE (not U_INTERNAL) since it is called by public macros in this
 * file and thus must remain stable, and should not be hidden when other internal
 * functions are hidden (otherwise public macros would fail to compile).
 * @internal
 */
U_STABLE UChar32 U_EXPORT2
utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c, UBool strict);

/**
 * Function for handling "append code point" with error-checking.
 *
 * This is internal since it is not meant to be called directly by external clients;
 * however it is U_STABLE (not U_INTERNAL) since it is called by public macros in this
 * file and thus must remain stable, and should not be hidden when other internal
 * functions are hidden (otherwise public macros would fail to compile).
 * @internal
 */
U_STABLE int32_t U_EXPORT2
utf8_appendCharSafeBody(uint8_t *s, int32_t i, int32_t length, UChar32 c, UBool *pIsError);

/**
 * Function for handling "previous code point" with error-checking.
 *
 * This is internal since it is not meant to be called directly by external clients;
 * however it is U_STABLE (not U_INTERNAL) since it is called by public macros in this
 * file and thus must remain stable, and should not be hidden when other internal
 * functions are hidden (otherwise public macros would fail to compile).
 * @internal
 */
U_STABLE UChar32 U_EXPORT2
utf8_prevCharSafeBody(const uint8_t *s, int32_t start, int32_t *pi, UChar32 c, UBool strict);

/**
 * Function for handling "skip backward one code point" with error-checking.
 *
 * This is internal since it is not meant to be called directly by external clients;
 * however it is U_STABLE (not U_INTERNAL) since it is called by public macros in this
 * file and thus must remain stable, and should not be hidden when other internal
 * functions are hidden (otherwise public macros would fail to compile).
 * @internal
 */
U_STABLE int32_t U_EXPORT2
utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);

/* single-code point definitions -------------------------------------------- */

/**
 * Does this code unit (byte) encode a code point by itself (US-ASCII 0..0x7f)?
 * @param c 8-bit code unit (byte)
 * @return TRUE or FALSE
 * @stable ICU 2.4
 */
#define U8_IS_SINGLE(c) (((c)&0x80)==0)

/**
 * Is this code unit (byte) a UTF-8 lead byte? (0xC2..0xF4)
 * @param c 8-bit code unit (byte)
 * @return TRUE or FALSE
 * @stable ICU 2.4
 */
#define U8_IS_LEAD(c) ((uint8_t)((c)-0xc2)<=0x32)
// 0x32=0xf4-0xc2

/**
 * Is this code unit (byte) a UTF-8 trail byte? (0x80..0xBF)
 * @param c 8-bit code unit (byte)
 * @return TRUE or FALSE
 * @stable ICU 2.4
 */
#define U8_IS_TRAIL(c) ((int8_t)(c)<-0x40)

/**
 * How many code units (bytes) are used for the UTF-8 encoding
 * of this Unicode code point?
 * @param c 32-bit code point
 * @return 1..4, or 0 if c is a surrogate or not a Unicode code point
 * @stable ICU 2.4
 */
#define U8_LENGTH(c) \
    ((uint32_t)(c)<=0x7f ? 1 : \
        ((uint32_t)(c)<=0x7ff ? 2 : \
            ((uint32_t)(c)<=0xd7ff ? 3 : \
                ((uint32_t)(c)<=0xdfff || (uint32_t)(c)>0x10ffff ? 0 : \
                    ((uint32_t)(c)<=0xffff ? 3 : 4)\
                ) \
            ) \
        ) \
    )

/**
 * The maximum number of UTF-8 code units (bytes) per Unicode code point (U+0000..U+10ffff).
 * @return 4
 * @stable ICU 2.4
 */
#define U8_MAX_LENGTH 4

/**
 * Get a code point from a string at a random-access offset,
 * without changing the offset.
 * The offset may point to either the lead byte or one of the trail bytes
 * for a code point, in which case the macro will read all of the bytes
 * for the code point.
 * The result is undefined if the offset points to an illegal UTF-8
 * byte sequence.
 * Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT.
 *
 * @param s const uint8_t * string
 * @param i string offset
 * @param c output UChar32 variable
 * @see U8_GET
 * @stable ICU 2.4
 */
#define U8_GET_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
    int32_t _u8_get_unsafe_index=(int32_t)(i); \
    U8_SET_CP_START_UNSAFE(s, _u8_get_unsafe_index); \
    U8_NEXT_UNSAFE(s, _u8_get_unsafe_index, c); \
} UPRV_BLOCK_MACRO_END

/**
 * Get a code point from a string at a random-access offset,
 * without changing the offset.
 * The offset may point to either the lead byte or one of the trail bytes
 * for a code point, in which case the macro will read all of the bytes
 * for the code point.
 *
 * The length can be negative for a NUL-terminated string.
 *
 * If the offset points to an illegal UTF-8 byte sequence, then
 * c is set to a negative value.
 * Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT.
 *
 * @param s const uint8_t * string
 * @param start int32_t starting string offset
 * @param i int32_t string offset, must be start<=i<length
 * @param length int32_t string length
 * @param c output UChar32 variable, set to <0 in case of an error
 * @see U8_GET_UNSAFE
 * @stable ICU 2.4
 */
#define U8_GET(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
    int32_t _u8_get_index=(i); \
    U8_SET_CP_START(s, start, _u8_get_index); \
    U8_NEXT(s, _u8_get_index, length, c); \
} UPRV_BLOCK_MACRO_END

/**
 * Get a code point from a string at a random-access offset,
 * without changing the offset.
 * The offset may point to either the lead byte or one of the trail bytes
 * for a code point, in which case the macro will read all of the bytes
 * for the code point.
 *
 * The length can be negative for a NUL-terminated string.
 *
 * If the offset points to an illegal UTF-8 byte sequence, then
 * c is set to U+FFFD.
 * Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT_OR_FFFD.
 *
 * This macro does not distinguish between a real U+FFFD in the text
 * and U+FFFD returned for an ill-formed sequence.
 * Use U8_GET() if that distinction is important.
 *
 * @param s const uint8_t * string
 * @param start int32_t starting string offset
 * @param i int32_t string offset, must be start<=i<length
 * @param length int32_t string length
 * @param c output UChar32 variable, set to U+FFFD in case of an error
 * @see U8_GET
 * @stable ICU 51
 */
#define U8_GET_OR_FFFD(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
    int32_t _u8_get_index=(i); \
    U8_SET_CP_START(s, start, _u8_get_index); \
    U8_NEXT_OR_FFFD(s, _u8_get_index, length, c); \
} UPRV_BLOCK_MACRO_END

/* definitions with forward iteration --------------------------------------- */

/**
 * Get a code point from a string at a code point boundary offset,
 * and advance the offset to the next code point boundary.
 * (Post-incrementing forward iteration.)
 * "Unsafe" macro, assumes well-formed UTF-8.
 *
 * The offset may point to the lead byte of a multi-byte sequence,
 * in which case the macro will read the whole sequence.
 * The result is undefined if the offset points to a trail byte
 * or an illegal UTF-8 sequence.
 *
 * @param s const uint8_t * string
 * @param i string offset
 * @param c output UChar32 variable
 * @see U8_NEXT
 * @stable ICU 2.4
 */
#define U8_NEXT_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
    (c)=(uint8_t)(s)[(i)++]; \
    if(!U8_IS_SINGLE(c)) { \
        if((c)<0xe0) { \
            (c)=(((c)&0x1f)<<6)|((s)[(i)++]&0x3f); \
        } else if((c)<0xf0) { \
            /* no need for (c&0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ \
            (c)=(UChar)(((c)<<12)|(((s)[i]&0x3f)<<6)|((s)[(i)+1]&0x3f)); \
            (i)+=2; \
        } else { \
            (c)=(((c)&7)<<18)|(((s)[i]&0x3f)<<12)|(((s)[(i)+1]&0x3f)<<6)|((s)[(i)+2]&0x3f); \
            (i)+=3; \
        } \
    } \
} UPRV_BLOCK_MACRO_END

/**
 * Get a code point from a string at a code point boundary offset,
 * and advance the offset to the next code point boundary.
 * (Post-incrementing forward iteration.)
 * "Safe" macro, checks for illegal sequences and for string boundaries.
 *
 * The length can be negative for a NUL-terminated string.
 *
 * The offset may point to the lead byte of a multi-byte sequence,
 * in which case the macro will read the whole sequence.
 * If the offset points to a trail byte or an illegal UTF-8 sequence, then
 * c is set to a negative value.
 *
 * @param s const uint8_t * string
 * @param i int32_t string offset, must be i<length
 * @param length int32_t string length
 * @param c output UChar32 variable, set to <0 in case of an error
 * @see U8_NEXT_UNSAFE
 * @stable ICU 2.4
 */
#define U8_NEXT(s, i, length, c) U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, U_SENTINEL)

/**
 * Get a code point from a string at a code point boundary offset,
 * and advance the offset to the next code point boundary.
 * (Post-incrementing forward iteration.)
 * "Safe" macro, checks for illegal sequences and for string boundaries.
 *
 * The length can be negative for a NUL-terminated string.
 *
 * The offset may point to the lead byte of a multi-byte sequence,
 * in which case the macro will read the whole sequence.
 * If the offset points to a trail byte or an illegal UTF-8 sequence, then
 * c is set to U+FFFD.
 *
 * This macro does not distinguish between a real U+FFFD in the text
 * and U+FFFD returned for an ill-formed sequence.
 * Use U8_NEXT() if that distinction is important.
 *
 * @param s const uint8_t * string
 * @param i int32_t string offset, must be i<length
 * @param length int32_t string length
 * @param c output UChar32 variable, set to U+FFFD in case of an error
 * @see U8_NEXT
 * @stable ICU 51
 */
#define U8_NEXT_OR_FFFD(s, i, length, c) U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, 0xfffd)

/** @internal */
#define U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, sub) UPRV_BLOCK_MACRO_BEGIN { \
    (c)=(uint8_t)(s)[(i)++]; \
    if(!U8_IS_SINGLE(c)) { \
        uint8_t __t = 0; \
        if((i)!=(length) && \
            /* fetch/validate/assemble all but last trail byte */ \
            ((c)>=0xe0 ? \
                ((c)<0xf0 ?  /* U+0800..U+FFFF except surrogates */ \
                    U8_LEAD3_T1_BITS[(c)&=0xf]&(1<<((__t=(s)[i])>>5)) && \
                    (__t&=0x3f, 1) \
                :  /* U+10000..U+10FFFF */ \
                    ((c)-=0xf0)<=4 && \
                    U8_LEAD4_T1_BITS[(__t=(s)[i])>>4]&(1<<(c)) && \
                    ((c)=((c)<<6)|(__t&0x3f), ++(i)!=(length)) && \
                    (__t=(s)[i]-0x80)<=0x3f) && \
                /* valid second-to-last trail byte */ \
                ((c)=((c)<<6)|__t, ++(i)!=(length)) \
            :  /* U+0080..U+07FF */ \
                (c)>=0xc2 && ((c)&=0x1f, 1)) && \
            /* last trail byte */ \
            (__t=(s)[i]-0x80)<=0x3f && \
            ((c)=((c)<<6)|__t, ++(i), 1)) { \
        } else { \
            (c)=(sub);  /* ill-formed*/ \
        } \
    } \
} UPRV_BLOCK_MACRO_END

/**
 * Append a code point to a string, overwriting 1 to 4 bytes.
 * The offset points to the current end of the string contents
 * and is advanced (post-increment).
 * "Unsafe" macro, assumes a valid code point and sufficient space in the string.
 * Otherwise, the result is undefined.
 *
 * @param s const uint8_t * string buffer
 * @param i string offset
 * @param c code point to append
 * @see U8_APPEND
 * @stable ICU 2.4
 */
#define U8_APPEND_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
    uint32_t __uc=(c); \
    if(__uc<=0x7f) { \
        (s)[(i)++]=(uint8_t)__uc; \
    } else { \
        if(__uc<=0x7ff) { \
            (s)[(i)++]=(uint8_t)((__uc>>6)|0xc0); \
        } else { \
            if(__uc<=0xffff) { \
                (s)[(i)++]=(uint8_t)((__uc>>12)|0xe0); \
            } else { \
                (s)[(i)++]=(uint8_t)((__uc>>18)|0xf0); \
                (s)[(i)++]=(uint8_t)(((__uc>>12)&0x3f)|0x80); \
            } \
            (s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \
        } \
        (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
    } \
} UPRV_BLOCK_MACRO_END

/**
 * Append a code point to a string, overwriting 1 to 4 bytes.
 * The offset points to the current end of the string contents
 * and is advanced (post-increment).
 * "Safe" macro, checks for a valid code point.
 * If a non-ASCII code point is written, checks for sufficient space in the string.
 * If the code point is not valid or trail bytes do not fit,
 * then isError is set to TRUE.
 *
 * @param s const uint8_t * string buffer
 * @param i int32_t string offset, must be i<capacity
 * @param capacity int32_t size of the string buffer
 * @param c UChar32 code point to append
 * @param isError output UBool set to TRUE if an error occurs, otherwise not modified
 * @see U8_APPEND_UNSAFE
 * @stable ICU 2.4
 */
#define U8_APPEND(s, i, capacity, c, isError) UPRV_BLOCK_MACRO_BEGIN { \
    uint32_t __uc=(c); \
    if(__uc<=0x7f) { \
        (s)[(i)++]=(uint8_t)__uc; \
    } else if(__uc<=0x7ff && (i)+1<(capacity)) { \
        (s)[(i)++]=(uint8_t)((__uc>>6)|0xc0); \
        (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
    } else if((__uc<=0xd7ff || (0xe000<=__uc && __uc<=0xffff)) && (i)+2<(capacity)) { \
        (s)[(i)++]=(uint8_t)((__uc>>12)|0xe0); \
        (s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \
        (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
    } else if(0xffff<__uc && __uc<=0x10ffff && (i)+3<(capacity)) { \
        (s)[(i)++]=(uint8_t)((__uc>>18)|0xf0); \
        (s)[(i)++]=(uint8_t)(((__uc>>12)&0x3f)|0x80); \
        (s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \
        (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
    } else { \
        (isError)=TRUE; \
    } \
} UPRV_BLOCK_MACRO_END

/**
 * Advance the string offset from one code point boundary to the next.
 * (Post-incrementing iteration.)
 * "Unsafe" macro, assumes well-formed UTF-8.
 *
 * @param s const uint8_t * string
 * @param i string offset
 * @see U8_FWD_1
 * @stable ICU 2.4
 */
#define U8_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
    (i)+=1+U8_COUNT_TRAIL_BYTES_UNSAFE((s)[i]); \
} UPRV_BLOCK_MACRO_END

/**
 * Advance the string offset from one code point boundary to the next.
 * (Post-incrementing iteration.)
 * "Safe" macro, checks for illegal sequences and for string boundaries.
 *
 * The length can be negative for a NUL-terminated string.
 *
 * @param s const uint8_t * string
 * @param i int32_t string offset, must be i<length
 * @param length int32_t string length
 * @see U8_FWD_1_UNSAFE
 * @stable ICU 2.4
 */
#define U8_FWD_1(s, i, length) UPRV_BLOCK_MACRO_BEGIN { \
    uint8_t __b=(s)[(i)++]; \
    if(U8_IS_LEAD(__b) && (i)!=(length)) { \
        uint8_t __t1=(s)[i]; \
        if((0xe0<=__b && __b<0xf0)) { \
            if(U8_IS_VALID_LEAD3_AND_T1(__b, __t1) && \
                    ++(i)!=(length) && U8_IS_TRAIL((s)[i])) { \
                ++(i); \
            } \
        } else if(__b<0xe0) { \
            if(U8_IS_TRAIL(__t1)) { \
                ++(i); \
            } \
        } else /* c>=0xf0 */ { \
            if(U8_IS_VALID_LEAD4_AND_T1(__b, __t1) && \
                    ++(i)!=(length) && U8_IS_TRAIL((s)[i]) && \
                    ++(i)!=(length) && U8_IS_TRAIL((s)[i])) { \
                ++(i); \
            } \
        } \
    } \
} UPRV_BLOCK_MACRO_END

/**
 * Advance the string offset from one code point boundary to the n-th next one,
 * i.e., move forward by n code points.
 * (Post-incrementing iteration.)
 * "Unsafe" macro, assumes well-formed UTF-8.
 *
 * @param s const uint8_t * string
 * @param i string offset
 * @param n number of code points to skip
 * @see U8_FWD_N
 * @stable ICU 2.4
 */
#define U8_FWD_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
    int32_t __N=(n); \
    while(__N>0) { \
        U8_FWD_1_UNSAFE(s, i); \
        --__N; \
    } \
} UPRV_BLOCK_MACRO_END

/**
 * Advance the string offset from one code point boundary to the n-th next one,
 * i.e., move forward by n code points.
 * (Post-incrementing iteration.)
 * "Safe" macro, checks for illegal sequences and for string boundaries.
 *
 * The length can be negative for a NUL-terminated string.
 *
 * @param s const uint8_t * string
 * @param i int32_t string offset, must be i<length
 * @param length int32_t string length
 * @param n number of code points to skip
 * @see U8_FWD_N_UNSAFE
 * @stable ICU 2.4
 */
#define U8_FWD_N(s, i, length, n) UPRV_BLOCK_MACRO_BEGIN { \
    int32_t __N=(n); \
    while(__N>0 && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \
        U8_FWD_1(s, i, length); \
        --__N; \
    } \
} UPRV_BLOCK_MACRO_END

/**
 * Adjust a random-access offset to a code point boundary
 * at the start of a code point.
 * If the offset points to a UTF-8 trail byte,
 * then the offset is moved backward to the corresponding lead byte.
 * Otherwise, it is not modified.
 * "Unsafe" macro, assumes well-formed UTF-8.
 *
 * @param s const uint8_t * string
 * @param i string offset
 * @see U8_SET_CP_START
 * @stable ICU 2.4
 */
#define U8_SET_CP_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
    while(U8_IS_TRAIL((s)[i])) { --(i); } \
} UPRV_BLOCK_MACRO_END

/**
 * Adjust a random-access offset to a code point boundary
 * at the start of a code point.
 * If the offset points to a UTF-8 trail byte,
 * then the offset is moved backward to the corresponding lead byte.
 * Otherwise, it is not modified.
 *
 * "Safe" macro, checks for illegal sequences and for string boundaries.
 * Unlike U8_TRUNCATE_IF_INCOMPLETE(), this macro always reads s[i].
 *
 * @param s const uint8_t * string
 * @param start int32_t starting string offset (usually 0)
 * @param i int32_t string offset, must be start<=i
 * @see U8_SET_CP_START_UNSAFE
 * @see U8_TRUNCATE_IF_INCOMPLETE
 * @stable ICU 2.4
 */
#define U8_SET_CP_START(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
    if(U8_IS_TRAIL((s)[(i)])) { \
        (i)=utf8_back1SafeBody(s, start, (i)); \
    } \
} UPRV_BLOCK_MACRO_END

/**
 * If the string ends with a UTF-8 byte sequence that is valid so far
 * but incomplete, then reduce the length of the string to end before
 * the lead byte of that incomplete sequence.
 * For example, if the string ends with E1 80, the length is reduced by 2.
 *
 * In all other cases (the string ends with a complete sequence, or it is not
 * possible for any further trail byte to extend the trailing sequence)
 * the length remains unchanged.
 *
 * Useful for processing text split across multiple buffers
 * (save the incomplete sequence for later)
 * and for optimizing iteration
 * (check for string length only once per character).
 *
 * "Safe" macro, checks for illegal sequences and for string boundaries.
 * Unlike U8_SET_CP_START(), this macro never reads s[length].
 *
 * (In UTF-16, simply check for U16_IS_LEAD(last code unit).)
 *
 * @param s const uint8_t * string
 * @param start int32_t starting string offset (usually 0)
 * @param length int32_t string length (usually start<=length)
 * @see U8_SET_CP_START
 * @stable ICU 61
 */
#define U8_TRUNCATE_IF_INCOMPLETE(s, start, length) UPRV_BLOCK_MACRO_BEGIN { \
    if((length)>(start)) { \
        uint8_t __b1=s[(length)-1]; \
        if(U8_IS_SINGLE(__b1)) { \
            /* common ASCII character */ \
        } else if(U8_IS_LEAD(__b1)) { \
            --(length); \
        } else if(U8_IS_TRAIL(__b1) && ((length)-2)>=(start)) { \
            uint8_t __b2=s[(length)-2]; \
            if(0xe0<=__b2 && __b2<=0xf4) { \
                if(__b2<0xf0 ? U8_IS_VALID_LEAD3_AND_T1(__b2, __b1) : \
                        U8_IS_VALID_LEAD4_AND_T1(__b2, __b1)) { \
                    (length)-=2; \
                } \
            } else if(U8_IS_TRAIL(__b2) && ((length)-3)>=(start)) { \
                uint8_t __b3=s[(length)-3]; \
                if(0xf0<=__b3 && __b3<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(__b3, __b2)) { \
                    (length)-=3; \
                } \
            } \
        } \
    } \
} UPRV_BLOCK_MACRO_END

/* definitions with backward iteration -------------------------------------- */

/**
 * Move the string offset from one code point boundary to the previous one
 * and get the code point between them.
 * (Pre-decrementing backward iteration.)
 * "Unsafe" macro, assumes well-formed UTF-8.
 *
 * The input offset may be the same as the string length.
 * If the offset is behind a multi-byte sequence, then the macro will read
 * the whole sequence.
 * If the offset is behind a lead byte, then that itself
 * will be returned as the code point.
 * The result is undefined if the offset is behind an illegal UTF-8 sequence.
 *
 * @param s const uint8_t * string
 * @param i string offset
 * @param c output UChar32 variable
 * @see U8_PREV
 * @stable ICU 2.4
 */
#define U8_PREV_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
    (c)=(uint8_t)(s)[--(i)]; \
    if(U8_IS_TRAIL(c)) { \
        uint8_t __b, __count=1, __shift=6; \
\
        /* c is a trail byte */ \
        (c)&=0x3f; \
        for(;;) { \
            __b=(s)[--(i)]; \
            if(__b>=0xc0) { \
                U8_MASK_LEAD_BYTE(__b, __count); \
                (c)|=(UChar32)__b<<__shift; \
                break; \
            } else { \
                (c)|=(UChar32)(__b&0x3f)<<__shift; \
                ++__count; \
                __shift+=6; \
            } \
        } \
    } \
} UPRV_BLOCK_MACRO_END

/**
 * Move the string offset from one code point boundary to the previous one
 * and get the code point between them.
 * (Pre-decrementing backward iteration.)
 * "Safe" macro, checks for illegal sequences and for string boundaries.
 *
 * The input offset may be the same as the string length.
 * If the offset is behind a multi-byte sequence, then the macro will read
 * the whole sequence.
 * If the offset is behind a lead byte, then that itself
 * will be returned as the code point.
 * If the offset is behind an illegal UTF-8 sequence, then c is set to a negative value.
 *
 * @param s const uint8_t * string
 * @param start int32_t starting string offset (usually 0)
 * @param i int32_t string offset, must be start<i
 * @param c output UChar32 variable, set to <0 in case of an error
 * @see U8_PREV_UNSAFE
 * @stable ICU 2.4
 */
#define U8_PREV(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
    (c)=(uint8_t)(s)[--(i)]; \
    if(!U8_IS_SINGLE(c)) { \
        (c)=utf8_prevCharSafeBody((const uint8_t *)s, start, &(i), c, -1); \
    } \
} UPRV_BLOCK_MACRO_END

/**
 * Move the string offset from one code point boundary to the previous one
 * and get the code point between them.
 * (Pre-decrementing backward iteration.)
 * "Safe" macro, checks for illegal sequences and for string boundaries.
 *
 * The input offset may be the same as the string length.
 * If the offset is behind a multi-byte sequence, then the macro will read
 * the whole sequence.
 * If the offset is behind a lead byte, then that itself
 * will be returned as the code point.
 * If the offset is behind an illegal UTF-8 sequence, then c is set to U+FFFD.
 *
 * This macro does not distinguish between a real U+FFFD in the text
 * and U+FFFD returned for an ill-formed sequence.
 * Use U8_PREV() if that distinction is important.
 *
 * @param s const uint8_t * string
 * @param start int32_t starting string offset (usually 0)
 * @param i int32_t string offset, must be start<i
 * @param c output UChar32 variable, set to U+FFFD in case of an error
 * @see U8_PREV
 * @stable ICU 51
 */
#define U8_PREV_OR_FFFD(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
    (c)=(uint8_t)(s)[--(i)]; \
    if(!U8_IS_SINGLE(c)) { \
        (c)=utf8_prevCharSafeBody((const uint8_t *)s, start, &(i), c, -3); \
    } \
} UPRV_BLOCK_MACRO_END

/**
 * Move the string offset from one code point boundary to the previous one.
 * (Pre-decrementing backward iteration.)
 * The input offset may be the same as the string length.
 * "Unsafe" macro, assumes well-formed UTF-8.
 *
 * @param s const uint8_t * string
 * @param i string offset
 * @see U8_BACK_1
 * @stable ICU 2.4
 */
#define U8_BACK_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
    while(U8_IS_TRAIL((s)[--(i)])) {} \
} UPRV_BLOCK_MACRO_END

/**
 * Move the string offset from one code point boundary to the previous one.
 * (Pre-decrementing backward iteration.)
 * The input offset may be the same as the string length.
 * "Safe" macro, checks for illegal sequences and for string boundaries.
 *
 * @param s const uint8_t * string
 * @param start int32_t starting string offset (usually 0)
 * @param i int32_t string offset, must be start<i
 * @see U8_BACK_1_UNSAFE
 * @stable ICU 2.4
 */
#define U8_BACK_1(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
    if(U8_IS_TRAIL((s)[--(i)])) { \
        (i)=utf8_back1SafeBody(s, start, (i)); \
    } \
} UPRV_BLOCK_MACRO_END

/**
 * Move the string offset from one code point boundary to the n-th one before it,
 * i.e., move backward by n code points.
 * (Pre-decrementing backward iteration.)
 * The input offset may be the same as the string length.
 * "Unsafe" macro, assumes well-formed UTF-8.
 *
 * @param s const uint8_t * string
 * @param i string offset
 * @param n number of code points to skip
 * @see U8_BACK_N
 * @stable ICU 2.4
 */
#define U8_BACK_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
    int32_t __N=(n); \
    while(__N>0) { \
        U8_BACK_1_UNSAFE(s, i); \
        --__N; \
    } \
} UPRV_BLOCK_MACRO_END

/**
 * Move the string offset from one code point boundary to the n-th one before it,
 * i.e., move backward by n code points.
 * (Pre-decrementing backward iteration.)
 * The input offset may be the same as the string length.
 * "Safe" macro, checks for illegal sequences and for string boundaries.
 *
 * @param s const uint8_t * string
 * @param start int32_t index of the start of the string
 * @param i int32_t string offset, must be start<i
 * @param n number of code points to skip
 * @see U8_BACK_N_UNSAFE
 * @stable ICU 2.4
 */
#define U8_BACK_N(s, start, i, n) UPRV_BLOCK_MACRO_BEGIN { \
    int32_t __N=(n); \
    while(__N>0 && (i)>(start)) { \
        U8_BACK_1(s, start, i); \
        --__N; \
    } \
} UPRV_BLOCK_MACRO_END

/**
 * Adjust a random-access offset to a code point boundary after a code point.
 * If the offset is behind a partial multi-byte sequence,
 * then the offset is incremented to behind the whole sequence.
 * Otherwise, it is not modified.
 * The input offset may be the same as the string length.
 * "Unsafe" macro, assumes well-formed UTF-8.
 *
 * @param s const uint8_t * string
 * @param i string offset
 * @see U8_SET_CP_LIMIT
 * @stable ICU 2.4
 */
#define U8_SET_CP_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
    U8_BACK_1_UNSAFE(s, i); \
    U8_FWD_1_UNSAFE(s, i); \
} UPRV_BLOCK_MACRO_END

/**
 * Adjust a random-access offset to a code point boundary after a code point.
 * If the offset is behind a partial multi-byte sequence,
 * then the offset is incremented to behind the whole sequence.
 * Otherwise, it is not modified.
 * The input offset may be the same as the string length.
 * "Safe" macro, checks for illegal sequences and for string boundaries.
 *
 * The length can be negative for a NUL-terminated string.
 *
 * @param s const uint8_t * string
 * @param start int32_t starting string offset (usually 0)
 * @param i int32_t string offset, must be start<=i<=length
 * @param length int32_t string length
 * @see U8_SET_CP_LIMIT_UNSAFE
 * @stable ICU 2.4
 */
#define U8_SET_CP_LIMIT(s, start, i, length) UPRV_BLOCK_MACRO_BEGIN { \
    if((start)<(i) && ((i)<(length) || (length)<0)) { \
        U8_BACK_1(s, start, i); \
        U8_FWD_1(s, i, length); \
    } \
} UPRV_BLOCK_MACRO_END

#endif



================================================
FILE: lib/src/wasm/stdlib-symbols.txt
================================================
"calloc",
"free",
"iswalnum",
"iswalpha",
"iswblank",
"iswdigit",
"iswlower",
"iswspace",
"iswupper",
"iswxdigit",
"malloc",
"memchr",
"memcmp",
"memcpy",
"memmove",
"memset",
"realloc",
"strcmp",
"strlen",
"strncat",
"strncmp",
"strncpy",
"towlower",
"towupper",



================================================
FILE: lib/src/wasm/stdlib.c
================================================
// This file implements a very simple allocator for external scanners running
// in WASM. Allocation is just bumping a static pointer and growing the heap
// as needed, and freeing is mostly a noop. But in the special case of freeing
// the last-allocated pointer, we'll reuse that pointer again.

#ifdef TREE_SITTER_FEATURE_WASM

#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>

extern void tree_sitter_debug_message(const char *, size_t);

#define PAGESIZE 0x10000
#define MAX_HEAP_SIZE (4 * 1024 * 1024)

typedef struct {
  size_t size;
  char data[0];
} Region;

static Region *heap_end = NULL;
static Region *heap_start = NULL;
static Region *next = NULL;

// Get the region metadata for the given heap pointer.
static inline Region *region_for_ptr(void *ptr) {
  return ((Region *)ptr) - 1;
}

// Get the location of the next region after the given region,
// if the given region had the given size.
static inline Region *region_after(Region *self, size_t len) {
  char *address = self->data + len;
  char *aligned = (char *)((uintptr_t)(address + 3) & ~0x3);
  return (Region *)aligned;
}

static void *get_heap_end() {
  return (void *)(__builtin_wasm_memory_size(0) * PAGESIZE);
}

static int grow_heap(size_t size) {
  size_t new_page_count = ((size - 1) / PAGESIZE) + 1;
  return __builtin_wasm_memory_grow(0, new_page_count) != SIZE_MAX;
}

// Clear out the heap, and move it to the given address.
void reset_heap(void *new_heap_start) {
  heap_start = new_heap_start;
  next = new_heap_start;
  heap_end = get_heap_end();
}

void *malloc(size_t size) {
  Region *region_end = region_after(next, size);

  if (region_end > heap_end) {
    if ((char *)region_end - (char *)heap_start > MAX_HEAP_SIZE) {
      return NULL;
    }
    if (!grow_heap(size)) return NULL;
    heap_end = get_heap_end();
  }

  void *result = &next->data;
  next->size = size;
  next = region_end;

  return result;
}

void free(void *ptr) {
  if (ptr == NULL) return;

  Region *region = region_for_ptr(ptr);
  Region *region_end = region_after(region, region->size);

  // When freeing the last allocated pointer, re-use that
  // pointer for the next allocation.
  if (region_end == next) {
    next = region;
  }
}

void *calloc(size_t count, size_t size) {
  void *result = malloc(count * size);
  memset(result, 0, count * size);
  return result;
}

void *realloc(void *ptr, size_t new_size) {
  if (ptr == NULL) {
    return malloc(new_size);
  }

  Region *region = region_for_ptr(ptr);
  Region *region_end = region_after(region, region->size);

  // When reallocating the last allocated region, return
  // the same pointer, and skip copying the data.
  if (region_end == next) {
    next = region;
    return malloc(new_size);
  }

  void *result = malloc(new_size);
  memcpy(result, &region->data, region->size);
  return result;
}

#endif



================================================
FILE: test/fixtures/error_corpus/readme.md
================================================
The Error Corpus
================

This directory contains corpus tests that exercise error recovery in a variety of languages.

These corpus tests provide a simple way of asserting that error recoveries are "reasonable" in a variety of situations. But they are also somewhat *overspecified*. It isn't critical that error recovery behaves *exactly* as these tests specify, just that most of the syntax tree is preserved despite the error.

Sometimes these tests can start failing when changes are pushed to the parser repositories like `tree-sitter-ruby`, `tree-sitter-javascript`, etc. Usually, we just need to tweak the expected syntax tree.



================================================
FILE: test/fixtures/error_corpus/c_errors.txt
================================================
=======================================
Statements with missing semicolons
=======================================

int main() {
  puts("hello")
  puts("world")
}

---

(translation_unit
  (function_definition
    (primitive_type)
    (function_declarator (identifier) (parameter_list))
    (compound_statement
      (expression_statement (call_expression (identifier) (argument_list (string_literal (string_content)))) (MISSING ";"))
      (expression_statement (call_expression (identifier) (argument_list (string_literal (string_content)))) (MISSING ";")))))

==============================================
Top-level declarations with missing semicolons
==============================================

int x
static int b

---

(translation_unit
  (declaration (primitive_type) (identifier) (MISSING ";"))
  (declaration (storage_class_specifier) (primitive_type) (identifier) (MISSING ";")))

==========================================
Partial declaration lists inside ifdefs
==========================================

#ifdef __cplusplus
extern "C" {
#endif

// ok
int b;

int c() {
  return 5;
}

#ifdef __cplusplus
}
#endif

---

(translation_unit
  (preproc_ifdef (identifier)
    (linkage_specification (string_literal (string_content)) (declaration_list
      (preproc_call (preproc_directive))
      (comment)
      (declaration (primitive_type) (identifier))
      (function_definition (primitive_type) (function_declarator (identifier) (parameter_list)) (compound_statement (return_statement (number_literal))))
      (preproc_ifdef (identifier) (MISSING "#endif"))))))

==========================================
If statements with incomplete expressions
==========================================

int main() {
  if (a.) {
    b();
    c();

    if (*) d();
  }
}

---

(translation_unit
  (function_definition
    (primitive_type)
    (function_declarator (identifier) (parameter_list))
    (compound_statement
      (if_statement
        (parenthesized_expression (field_expression
          (identifier)
          (MISSING field_identifier)))
        (compound_statement
          (expression_statement (call_expression (identifier) (argument_list)))
          (expression_statement (call_expression (identifier) (argument_list)))
          (if_statement
            (parenthesized_expression (pointer_expression (MISSING identifier)))
            (expression_statement (call_expression (identifier) (argument_list)))))))))

====================================
Invalid characters in declarations
====================================

int main() {
  int x;
  int %$#@
}

---

(translation_unit
  (function_definition
    (primitive_type)
    (function_declarator (identifier) (parameter_list))
    (compound_statement
      (declaration (primitive_type) (identifier))
      (ERROR (primitive_type) (ERROR) (identifier) (UNEXPECTED '@')))))

=========================================
Extra values in parenthesized expressions
=========================================

int main() {
  int x = (123 123);
}

---

(translation_unit
  (function_definition
    (primitive_type)
    (function_declarator (identifier) (parameter_list))
    (compound_statement
      (declaration (primitive_type) (init_declarator
        (identifier)
        (parenthesized_expression
          (ERROR (number_literal))
          (number_literal)))))))

========================================
Extra identifiers in declarations
========================================

float x WTF;
int y = 5;

---

(translation_unit
  (declaration (primitive_type) (ERROR (identifier)) (identifier))
  (declaration (primitive_type) (init_declarator (identifier) (number_literal))))

==========================================
Declarations with missing variable names
==========================================

int a() {
  struct x = 1;
  int = 2;
}

---

(translation_unit
  (function_definition
    (primitive_type)
    (function_declarator
      (identifier)
      (parameter_list))
    (compound_statement
      (declaration
        (struct_specifier (type_identifier))
        (init_declarator
          (MISSING identifier)
          (number_literal)))
      (declaration
        (primitive_type)
        (init_declarator
          (MISSING identifier)
          (number_literal))))))



================================================
FILE: test/fixtures/error_corpus/javascript_errors.txt
================================================
===================================================
Missing default values for function parameters
===================================================

class A {
  constructor (a, b = ) {
    this.a = a
  }

  foo() {}
}

---

(program
  (class_declaration (identifier) (class_body
    (method_definition
      (property_identifier)
      (formal_parameters (identifier) (identifier) (ERROR))
      (statement_block (expression_statement (assignment_expression (member_expression (this) (property_identifier)) (identifier)))))
    (method_definition
      (property_identifier)
      (formal_parameters)
      (statement_block)))))

===================================================
Missing object-literal values
===================================================

{
  a: b,
  c:
}

---

(program (expression_statement (object
  (pair (property_identifier) (identifier))
  (pair (property_identifier) (MISSING identifier)))))

===================================================
Extra identifiers in expressions
===================================================

if (a b) {
  c d;
}
e f;

---

(program
  (if_statement
    (parenthesized_expression
      (identifier)
      (ERROR (identifier)))
    (statement_block
      (ERROR (identifier))
      (expression_statement (identifier))))
  (expression_statement
    (identifier)
    (ERROR (identifier))))

===================================================
Extra complex literals in expressions
===================================================

if ({a: 'b'} {c: 'd'}) {
  x = function(a) { b; } function(c) { d; }
}

---

(program
  (if_statement
    (parenthesized_expression
      (ERROR (object (pair (property_identifier) (string (string_fragment)))))
      (object (pair (property_identifier) (string (string_fragment)))))
    (statement_block
      (expression_statement
        (assignment_expression
          (identifier)
          (function_expression (formal_parameters (identifier)) (statement_block (expression_statement (identifier)))))
        (MISSING ";"))
      (expression_statement
        (function_expression (formal_parameters (identifier)) (statement_block (expression_statement (identifier))))))))

===================================================
Extra tokens at the end of the file
===================================================

// skip the equals sign
a.b =
---

(program
  (comment)
  (ERROR (member_expression (identifier) (property_identifier))))

===================================================
Errors after a sequence of function declarations
===================================================

/*
 * The JS grammar has an ambiguity such that these functions
 * can be parsed either as function declarations or as
 * function expressions. This ambiguity causes a lot of
 * splitting and merging in the parse stack. When iterating
 * the parse stack during an error repair, there would then
 * be a very large number (> 2^16) of paths through the parse
 * stack.
 */
function a() {}
function b() {}
function c() {}
function e() {}
function f() {}
function g() {}
function h() {}
function i() {}

var x = !!!

---

(program
  (comment)
  (function_declaration (identifier) (formal_parameters) (statement_block))
  (function_declaration (identifier) (formal_parameters) (statement_block))
  (function_declaration (identifier) (formal_parameters) (statement_block))
  (function_declaration (identifier) (formal_parameters) (statement_block))
  (function_declaration (identifier) (formal_parameters) (statement_block))
  (function_declaration (identifier) (formal_parameters) (statement_block))
  (function_declaration (identifier) (formal_parameters) (statement_block))
  (function_declaration (identifier) (formal_parameters) (statement_block))
  (ERROR (identifier)))

=========================================================
Errors inside of a template string substitution
=========================================================

const a = `b c ${d += } f g`
const h = `i ${j(k} l`

---

(program
  (lexical_declaration
    (variable_declarator
      (identifier)
      (template_string (string_fragment) (template_substitution
        (augmented_assignment_expression (identifier) (MISSING identifier))) (string_fragment))))
  (lexical_declaration
    (variable_declarator
      (identifier)
      (template_string (string_fragment) (template_substitution (call_expression
        (identifier)
        (arguments (identifier) (MISSING ")")))) (string_fragment)))))

=========================================================
Long sequences of invalid tokens
=========================================================

function main(x) {
  console.log('a');
  what??????????????????????????????????????????????????
  console.log('b');
  return {};
}

---

(program
  (function_declaration
    (identifier)
    (formal_parameters (identifier))
    (statement_block
      (expression_statement
        (call_expression
          (member_expression (identifier) (property_identifier))
          (arguments (string (string_fragment)))))
      (expression_statement
        (binary_expression
          (identifier)
          (ERROR)
          (call_expression
            (member_expression (identifier) (property_identifier))
            (arguments (string (string_fragment))))))
      (return_statement (object)))))



================================================
FILE: test/fixtures/error_corpus/json_errors.txt
================================================
==========================================
top-level errors
==========================================

[}

---

(document
  (ERROR))

==========================================
unexpected tokens
==========================================

barf

---

(document
  (ERROR
    (UNEXPECTED 'b')))

==========================================
errors inside arrays
==========================================

[1, , 2]

---

(document
  (array
    (number)
    (ERROR)
    (number)))

==========================================
errors inside objects
==========================================

{ "key1": 1, oops }

---

(document
  (object
    (pair
      (string
        (string_content))
      (number))
    (ERROR
      (UNEXPECTED 'o'))))

==========================================
errors inside nested objects
==========================================

{ "key1": { "key2": 1, 2 }, "key3": 3 [ }

---

(document
  (object
    (pair
      (string
        (string_content))
      (object
        (pair
          (string
            (string_content))
          (number))
        (ERROR
          (number))))
    (pair
      (string
        (string_content))
      (number))
    (ERROR)))

===============================
incomplete tokens at EOF
========================

nul
---

(document
  (ERROR
    (UNEXPECTED '\0')))



================================================
FILE: test/fixtures/error_corpus/python_errors.txt
================================================
=============================================
incomplete condition in if statement
=============================================

if a is:
  print b
  print c
print d

---

(module
  (if_statement
    condition: (identifier)
    (ERROR)
    consequence: (block
      (print_statement argument: (identifier))
      (print_statement argument: (identifier))))
  (print_statement argument: (identifier)))

==========================================
extra colon in function definition
==========================================

def a()::
  b
  c
d

---

(module
  (function_definition
    name: (identifier)
    parameters: (parameters)
    (ERROR)
    body: (block
      (expression_statement (identifier))
      (expression_statement (identifier))))
  (expression_statement (identifier)))

========================================================
stray if keyword in function definition
========================================================

def a():
  if

---

(module
  (function_definition
    name: (identifier)
    parameters: (parameters)
    (ERROR)
    body: (block)))

========================================================
incomplete if statement in function definition
========================================================

def a():
  if a

---

(module
  (function_definition
    name: (identifier)
    parameters: (parameters)
    (ERROR (identifier))
    body: (block)))

========================================================
incomplete expression before triple-quoted string
========================================================

def a():
  b.
  """
  c
  """

---

(module
  (function_definition
    name: (identifier)
    parameters: (parameters)
    (ERROR (identifier))
    body: (block
      (expression_statement (string
        (string_start)
        (string_content)
        (string_end))))))

===========================================
incomplete definition in class definition
===========================================

class A:
  def

b

---

(module
  (class_definition
    name: (identifier)
    (ERROR)
    body: (block))
  (expression_statement
    (identifier)))



================================================
FILE: test/fixtures/error_corpus/ruby_errors.txt
================================================
==========================
Heredocs with errors 2
==========================

joins <<~SQL
  b
SQL
)
c

---

(program
  (call
    method: (identifier)
    arguments: (argument_list
      (heredoc_beginning)))
  (heredoc_body
    (heredoc_content)
    (heredoc_end))
  (ERROR)
  (identifier))



================================================
FILE: test/fixtures/grammars/.gitkeep
================================================
[Empty file]


================================================
FILE: test/fixtures/template_corpus/readme.md
================================================
The Template Corpus
===================

This directory contains corpus tests that exercise parsing a set of disjoint ranges within a file.

Each of these input files contains source code surrounded by the delimiters `<%` and `%>`. The content outside of these delimiters is meant to be ignored.


================================================
FILE: test/fixtures/template_corpus/ruby_templates.txt
================================================
==============================
Templates with errors
==============================

<div>
  <% if notice.present? %>
    <p id="notice"><% notice %></p>
  <% end %>
  <div>
    <h1>Foods</h1>
    <div>
      <% link_to 'New food', new_food_path, class: "block font-medium" %>
      <% link_to 'Search Database', database_foods_search_path, class: "block font-medium" %>
    </div>
  </div>

  <% . render partial: "form", locals: { food: @new_food } %>

  <% form_with url: "/search", method: :get do |form| %>
    <% form.label :previous_query, 'Search previous foods:' %>
    <% form.text_field :previous_query %>
    <% form.submit "Search" %>
  <% end %>

  <div id="recipes">
    <% render @foods %>
  </div>
</div>

---

(program
  (if
    (call (identifier) (identifier))
    (then (identifier)))
  (call
    (identifier)
    (argument_list
      (string (string_content))
      (identifier)
      (pair (hash_key_symbol) (string (string_content)))))
  (call
    (identifier)
    (argument_list
      (string (string_content))
      (identifier)
      (pair (hash_key_symbol) (string (string_content)))))
  (ERROR)
  (call
    (identifier)
    (argument_list
      (pair (hash_key_symbol) (string (string_content)))
      (pair (hash_key_symbol) (hash (pair (hash_key_symbol) (instance_variable))))))
  (call
    (identifier)
    (argument_list
      (pair (hash_key_symbol) (string (string_content)))
      (pair (hash_key_symbol) (simple_symbol)))
    (do_block
      (block_parameters
        (identifier))
      (body_statement
        (call
          (identifier)
          (identifier)
          (argument_list (simple_symbol) (string (string_content))))
  (call
    (identifier)
    (identifier)
    (argument_list
      (simple_symbol)))
  (call
    (identifier)
    (identifier)
    (argument_list (string (string_content)))))))
  (call
    (identifier)
    (argument_list (instance_variable))))


================================================
FILE: test/fixtures/test_grammars/readme.md
================================================
These small grammars demonstrate specific features or test for certain specific regressions.

For some of them, compilation is expected to fail with a given error message. For others, the resulting parser is expected to produce certain trees.


================================================
FILE: test/fixtures/test_grammars/aliased_inlined_rules/corpus.txt
================================================
=========================
OK
=========================

a.b.c;

---

(statement
  (member_expression
    (member_expression
      (variable_name)
      (property_name))
    (property_name)))



================================================
FILE: test/fixtures/test_grammars/aliased_inlined_rules/grammar.js
================================================
// This grammar shows that `ALIAS` rules can *contain* a rule that is marked as `inline`. It also
// shows that you can alias a rule that would otherwise be anonymous, and it will then appear as a
// named node.

module.exports = grammar({
    name: 'aliased_inlined_rules',

    extras: $ => [/\s/],

    inline: $ => [$.identifier],

    rules: {
        statement: $ => seq($._expression, ';'),

        _expression: $ => choice(
            $.member_expression,
            alias($.identifier, $.variable_name),
        ),

        member_expression: $ => prec.left(1, seq(
            $._expression,
            '.',
            alias($.identifier, $.property_name)
        )),

        identifier: $ => choice('a', 'b', 'c')
    }
});


================================================
FILE: test/fixtures/test_grammars/aliased_rules/corpus.txt
================================================
======================================
Method calls
======================================

*a.b(c(d.e));

---

(statement
  (star)
  (call_expression
    (member_expression
      (variable_name)
      (property_name))
    (call_expression
      (variable_name)
      (member_expression
        (variable_name)
        (property_name)))))



================================================
FILE: test/fixtures/test_grammars/aliased_rules/grammar.js
================================================
module.exports = grammar({
    name: 'aliased_rules',

    extras: $ => [
      /\s/,
      $.star,
    ],

    rules: {
        statement: $ => seq($._expression, ';'),

        _expression: $ => choice(
            $.call_expression,
            $.member_expression,
            alias($.identifier, $.variable_name),
        ),

        call_expression: $ => prec.left(seq(
            $._expression,
            '(',
            $._expression,
            ')'
        )),

        member_expression: $ => prec.left(1, seq(
            $._expression,
            '.',
            alias($.identifier, $.property_name)
        )),

        identifier: $ => /[a-z]+/,

        // Tests for https://github.com/tree-sitter/tree-sitter/issues/1834
        //
        // Even though the alias is unused, that issue causes all instances of
        // the extra that appear in the tree to be renamed to `star_aliased`.
        //
        // Instead, this alias should have no effect because it is unused.
        star: $ => '*',
        unused: $ => alias($.star, $.star_aliased),
    }
});



================================================
FILE: test/fixtures/test_grammars/aliased_token_rules/corpus.txt
================================================
======================
Aliased token rules
======================

abcde

---

(expression (X) (Y))



================================================
FILE: test/fixtures/test_grammars/aliased_token_rules/grammar.js
================================================
// This grammar shows that `ALIAS` rules can be applied directly to `TOKEN` and `IMMEDIATE_TOKEN`
// rules.

module.exports = grammar({
    name: 'aliased_token_rules',

    extras: $ => [/\s/],

    rules: {
        expression: $ => seq(
            'a',
            alias(token(seq('b', 'c')), $.X),
            alias(token.immediate(seq('d', 'e')), $.Y),
        ),
    }
});


================================================
FILE: test/fixtures/test_grammars/aliased_unit_reductions/corpus.txt
================================================
==========================================
Aliases on rules that are unit reductions
==========================================

one two three four;

---

(statement
  (identifier)
  (b_prime (identifier))
  (c_prime (identifier))
  (identifier))



================================================
FILE: test/fixtures/test_grammars/aliased_unit_reductions/grammar.js
================================================
// Normally, when there are invisible rules (rules whose names start with an `_`) that simply wrap
// another rule, there is an optimization at parser-generation time called *Unit Reduction
// Elimination* that avoids creating nodes for those rules at runtime. One case where this
// optimization must *not* be applied is when those invisible rules are going to be aliased within
// their parent rule. In that situation, eliminating the invisible node could cause the alias to be
// incorrectly applied to its child.

module.exports = grammar({
    name: 'aliased_unit_reductions',

    extras: $ => [/\s/],

    rules: {
        statement: $ => seq(
            $._a,

            // The `_b` rule is always aliased to `b_prime`, so it is internally treated
            // as a simple alias.
            alias($._b, $.b_prime),

            // The `_c` rule is used without an alias in addition to being aliased to `c_prime`,
            // so it is not a simple alias.
            alias($._c, $.c_prime),

            $._c,
            ';'
        ),

        _a: $ => $._A,
        _b: $ => $._B,
        _c: $ => $._C,
        _A: $ => $.identifier,
        _B: $ => $.identifier,
        _C: $ => $.identifier,

        identifier: $ => /[a-z]+/,
    }
});


================================================
FILE: test/fixtures/test_grammars/aliases_in_root/corpus.txt
================================================
======================================
Aliases within the root node
======================================

# this is a comment
foo foo

---

(document
  (comment)
  (bar)
  (foo))



================================================
FILE: test/fixtures/test_grammars/aliases_in_root/grammar.js
================================================
module.exports = grammar({
    name: 'aliases_in_root',

    extras: $ => [
      /\s/,
      $.comment,
    ],

    rules: {
        document: $ => seq(
          alias($.foo, $.bar),
          $.foo,
        ),

        foo: $ => "foo",

        comment: $ => /#.*/
    }
});



================================================
FILE: test/fixtures/test_grammars/anonymous_tokens_with_escaped_chars/corpus.txt
================================================
================================================
anonymous tokens defined with character classes
================================================
1234
---

(first_rule)

=================================================
anonymous tokens defined with LF escape sequence
=================================================


---

(first_rule)

=================================================
anonymous tokens defined with CR escape sequence
=================================================


---

(first_rule)

================================================
anonymous tokens with quotes
================================================
'hello'
---

(first_rule)



================================================
FILE: test/fixtures/test_grammars/anonymous_tokens_with_escaped_chars/grammar.js
================================================
// Every token in a grammar is given a name in the generated parser. Anonymous tokens (tokens
// specified directly in the body of some larger rule) are named according their content. So when
// tokens contains characters that aren't valid in a C string literal, we need to escape those
// characters. This grammar tests that this escaping works. The test is basically that the generated
// parser compiles successfully.

module.exports = grammar({
    name: "anonymous_tokens_with_escaped_chars",
    rules: {
        first_rule: $ => choice(
            "\n",
            "\r\n",
            "'hello'",
            /\d+/,
        )
    }
})



================================================
FILE: test/fixtures/test_grammars/associativity_left/corpus.txt
================================================
===================
chained operations
===================
x+y+z
---
(expression (math_operation
  (expression (math_operation (expression (identifier)) (expression (identifier))))
  (expression (identifier))))


================================================
FILE: test/fixtures/test_grammars/associativity_left/grammar.js
================================================
module.exports = grammar({
    name: 'associativity_left',

    rules: {
        expression: $ => choice(
            $.math_operation,
            $.identifier
        ),

        math_operation: $ => prec.left(seq(
            $.expression,
            '+',
            $.expression,
        )),

        identifier: $ => /[a-z]+/,
    }
});


================================================
FILE: test/fixtures/test_grammars/associativity_missing/expected_error.txt
================================================
Unresolved conflict for symbol sequence:

  expression  '+'  expression  •  '+'  …

Possible interpretations:

  1:  (math_operation  expression  '+'  expression)  •  '+'  …
  2:  expression  '+'  (math_operation  expression  •  '+'  expression)

Possible resolutions:

  1:  Specify a left or right associativity in `math_operation`
  2:  Add a conflict for these rules: `math_operation`



================================================
FILE: test/fixtures/test_grammars/associativity_missing/grammar.js
================================================
module.exports = grammar({
    name: 'associativity_missing',

    rules: {
        expression: $ => choice(
            $.math_operation,
            $.identifier
        ),

        math_operation: $ => seq(
            $.expression,
            '+',
            $.expression,
        ),

        identifier: $ => /[a-z]+/,
    }
});


================================================
FILE: test/fixtures/test_grammars/associativity_right/corpus.txt
================================================
===================
chained operations
===================
x+y+z
---
(expression (math_operation
  (expression (identifier))
  (expression (math_operation (expression (identifier)) (expression (identifier))))))



================================================
FILE: test/fixtures/test_grammars/associativity_right/grammar.js
================================================
module.exports = grammar({
    name: 'associativity_right',

    rules: {
        expression: $ => choice(
            $.math_operation,
            $.identifier
        ),

        math_operation: $ => prec.right(seq(
            $.expression,
            '+',
            $.expression,
        )),

        identifier: $ => /[a-z]+/,
    }
});


================================================
FILE: test/fixtures/test_grammars/conflict_in_repeat_rule/expected_error.txt
================================================
Unresolved conflict for symbol sequence:

  '['  identifier  •  ']'  …

Possible interpretations:

  1:  '['  (array_repeat1  identifier)  •  ']'  …
  2:  '['  (array_type_repeat1  identifier)  •  ']'  …

Possible resolutions:

  1:  Specify a higher precedence in `array_repeat1` than in the other rules.
  2:  Specify a higher precedence in `array_type_repeat1` than in the other rules.
  3:  Add a conflict for these rules: `array`, `array_type`



================================================
FILE: test/fixtures/test_grammars/conflict_in_repeat_rule/grammar.js
================================================
// This grammar has a conflict that involves *repeat rules*: auxiliary rules that are added by the
// parser generator in order to implement repetition. There is no way of referring to these rules in
// the grammar DSL, so these conflicts must be resolved by referring to their parent rules.

module.exports = grammar({
    name: 'conflict_in_repeat_rule',

    rules: {
        statement: $ => choice(
            seq($.array, ';'),
            seq($.array_type, $.identifier, ';'),
        ),

        array: $ => seq(
            '[',
            repeat(choice($.identifier, '0')),
            ']',
        ),

        array_type: $ => seq(
            '[',
            repeat(choice($.identifier, 'void')),
            ']',
        ),

        identifier: $ => /[a-z]+/
    }
});


================================================
FILE: test/fixtures/test_grammars/conflict_in_repeat_rule_after_external_token/expected_error.txt
================================================
Unresolved conflict for symbol sequence:

  _program_start  '['  identifier  •  ']'  …

Possible interpretations:

  1:  _program_start  '['  (array_repeat1  identifier)  •  ']'  …
  2:  _program_start  '['  (array_type_repeat1  identifier)  •  ']'  …

Possible resolutions:

  1:  Specify a higher precedence in `array_repeat1` than in the other rules.
  2:  Specify a higher precedence in `array_type_repeat1` than in the other rules.
  3:  Add a conflict for these rules: `array`, `array_type`



================================================
FILE: test/fixtures/test_grammars/conflict_in_repeat_rule_after_external_token/grammar.js
================================================
// This grammar is similar to the `conflict_in_repeat_rule` grammar, except that the conflict occurs
// after an external token is consumed. This tests that the logic for determining the repeat rule's
// "parent" rule works in the presence of external tokens.

module.exports = grammar({
    name: 'conflict_in_repeat_rule_after_external_token',

    externals: $ => [
        $._program_start,
    ],

    rules: {
        statement: $ => choice(
            seq($._program_start, $.array, ';'),
            seq($._program_start, $.array_type, $.identifier, ';'),
        ),

        array: $ => seq(
            '[',
            repeat(choice($.identifier, '0')),
            ']',
        ),

        array_type: $ => seq(
            '[',
            repeat(choice($.identifier, 'void')),
            ']',
        ),

        identifier: $ => /[a-z]+/
    }
});


================================================
FILE: test/fixtures/test_grammars/conflicting_precedence/expected_error.txt
================================================
Unresolved conflict for symbol sequence:

  expression  '+'  expression  •  '*'  …

Possible interpretations:

  1:  (sum  expression  '+'  expression)  •  '*'  …               (precedence: 0, associativity: Left)
  2:  expression  '+'  (other_thing  expression  •  '*'  '*')     (precedence: -1, associativity: Left)
  3:  expression  '+'  (product  expression  •  '*'  expression)  (precedence: 1, associativity: Left)

Possible resolutions:

  1:  Specify a higher precedence in `product` and `other_thing` than in the other rules.
  2:  Specify a higher precedence in `sum` than in the other rules.
  3:  Add a conflict for these rules: `sum`, `product`, `other_thing`



================================================
FILE: test/fixtures/test_grammars/conflicting_precedence/grammar.js
================================================
module.exports = grammar({
    name: 'conflicting_precedence',

    rules: {
        expression: $ => choice(
            $.sum,
            $.product,
            $.other_thing,
        ),

        sum: $ => prec.left(0, seq($.expression, '+', $.expression)),
        product: $ => prec.left(1, seq($.expression, '*', $.expression)),
        other_thing: $ => prec.left(-1, seq($.expression, '*', '*')),
        identifier: $ => /[a-zA-Z]+/
    }
});


================================================
FILE: test/fixtures/test_grammars/depends_on_column/corpus.txt
================================================
==================
X is at odd column
==================

 x

---

(x_is_at
  (odd_column))

===================
X is at even column
===================

  x

---

(x_is_at
  (even_column))



================================================
FILE: test/fixtures/test_grammars/depends_on_column/grammar.js
================================================
module.exports = grammar({
  name: "depends_on_column",
  rules: {
    x_is_at: ($) => seq(/[ \r\n]*/, choice($.odd_column, $.even_column), "x"),
  },
  externals: ($) => [$.odd_column, $.even_column],
});



================================================
FILE: test/fixtures/test_grammars/depends_on_column/scanner.c
================================================
#include "tree_sitter/parser.h"

enum TokenType { ODD_COLUMN, EVEN_COLUMN };

// The scanner is stateless

void *tree_sitter_depends_on_column_external_scanner_create() {
    return NULL;
}

void tree_sitter_depends_on_column_external_scanner_destroy(
    void *payload
) {
    // no-op
}

unsigned tree_sitter_depends_on_column_external_scanner_serialize(
    void *payload,
    char *buffer
) {
    return 0;
}

void tree_sitter_depends_on_column_external_scanner_deserialize(
    void *payload,
    const char *buffer,
    unsigned length
) {
    // no-op
}

bool tree_sitter_depends_on_column_external_scanner_scan(
    void *payload,
    TSLexer *lexer,
    const bool *valid_symbols
) {
    lexer->result_symbol =
        lexer->get_column(lexer) % 2 ? ODD_COLUMN : EVEN_COLUMN;
    return true;
}



================================================
FILE: test/fixtures/test_grammars/dynamic_precedence/readme.md
================================================
This grammar contains a conflict that is resolved at runtime. The PREC_DYNAMIC rule is used to indicate that the `declarator` rule should be preferred to the `expression` rule at runtime.



================================================
FILE: test/fixtures/test_grammars/dynamic_precedence/corpus.txt
================================================
===============================
Declarations
===============================

T * x

---

(program (declaration
  (type (identifier))
  (declarator (identifier))))

===============================
Expressions
===============================

w * x * y

---

(program (expression
  (expression
    (expression (identifier))
    (expression (identifier)))
  (expression (identifier))))



================================================
FILE: test/fixtures/test_grammars/dynamic_precedence/grammar.js
================================================
module.exports = grammar({
    name: 'dynamic_precedence',

    extras: $ => [/\s/],

    conflicts: $ => [[$.expression, $.type]],

    rules: {
        program: $ => choice(
            $.declaration,
            $.expression,
        ),

        expression: $ => choice(
            prec.left(seq($.expression, '*', $.expression)),
            $.identifier
        ),

        declaration: $ => seq(
            $.type,
            $.declarator,
        ),

        declarator: $ => choice(
            prec.dynamic(1, seq('*', $.identifier)),
            $.identifier,
        ),

        type: $ => $.identifier,
        identifier: $ => /[a-z-A-Z]+/
    }
});


================================================
FILE: test/fixtures/test_grammars/epsilon_external_extra_tokens/corpus.txt
================================================
==========================
A document
==========================

a b

---

(document)



================================================
FILE: test/fixtures/test_grammars/epsilon_external_extra_tokens/grammar.js
================================================
module.exports = grammar({
    name: 'epsilon_external_extra_tokens',

    extras: $ => [/\s/, $.comment],

    externals: $ => [$.comment],

    rules: {
        document: $ => seq('a', 'b'),
    }
});



================================================
FILE: test/fixtures/test_grammars/epsilon_external_extra_tokens/scanner.c
================================================
#include "tree_sitter/parser.h"

enum TokenType {
  COMMENT
};

void *tree_sitter_epsilon_external_extra_tokens_external_scanner_create(void) {
  return NULL;
}

bool tree_sitter_epsilon_external_extra_tokens_external_scanner_scan(
  void *payload,
  TSLexer *lexer,
  const bool *valid_symbols
) {
  lexer->result_symbol = COMMENT;
  return true;
}

unsigned tree_sitter_epsilon_external_extra_tokens_external_scanner_serialize(
  void *payload,
  char *buffer
) {
  return 0;
}

void tree_sitter_epsilon_external_extra_tokens_external_scanner_deserialize(
  void *payload,
  const char *buffer,
  unsigned length
) {}

void tree_sitter_epsilon_external_extra_tokens_external_scanner_destroy(void *payload) {}



================================================
FILE: test/fixtures/test_grammars/epsilon_external_tokens/corpus.txt
================================================
==========================
A leading zero-width token
==========================

hello

---

(document (zero_width))



================================================
FILE: test/fixtures/test_grammars/epsilon_external_tokens/grammar.js
================================================
module.exports = grammar({
    name: 'epsilon_external_tokens',

    extras: $ => [/\s/],
    externals: $ => [$.zero_width],

    rules: {
        document: $ => seq($.zero_width, 'hello'),
    }
});


================================================
FILE: test/fixtures/test_grammars/epsilon_external_tokens/scanner.c
================================================
#include "tree_sitter/parser.h"

enum TokenType {
  ZERO_WIDTH_TOKEN
};

void *tree_sitter_epsilon_external_tokens_external_scanner_create() {
  return NULL;
}

bool tree_sitter_epsilon_external_tokens_external_scanner_scan(
  void *payload,
  TSLexer *lexer,
  const bool *valid_symbols
) {
  lexer->result_symbol = ZERO_WIDTH_TOKEN;
  return true;
}

unsigned tree_sitter_epsilon_external_tokens_external_scanner_serialize(
  void *payload,
  char *buffer
) {
  return 0;
}

void tree_sitter_epsilon_external_tokens_external_scanner_deserialize(
  void *payload,
  const char *buffer,
  unsigned length
) {}

void tree_sitter_epsilon_external_tokens_external_scanner_destroy(void *payload) {}



================================================
FILE: test/fixtures/test_grammars/epsilon_rules/expected_error.txt
================================================
The rule `rule_2` matches the empty string.

Tree-sitter does not support syntactic rules that match the empty string
unless they are used only as the grammar's start rule.



================================================
FILE: test/fixtures/test_grammars/epsilon_rules/grammar.js
================================================
module.exports = grammar({
    name: 'epsilon_rules',

    rules: {
        rule_1: $ => $.rule_2,

        rule_2: $ => optional($.rule_3),

        rule_3: $ => 'x'
    }
});


================================================
FILE: test/fixtures/test_grammars/external_and_internal_anonymous_tokens/readme.md
================================================
This grammar is just like the `external_and_internal_tokens` grammar, except that the shared external token is *anonymous*; it's specified as a string in the grammar.


================================================
FILE: test/fixtures/test_grammars/external_and_internal_anonymous_tokens/corpus.txt
================================================
=========================================
single-line statements - internal tokens
=========================================

a b

---

(statement (variable) (variable))

=========================================
multi-line statements - internal tokens
=========================================

a
b

---

(statement (variable) (variable))

=========================================
single-line statements - external tokens
=========================================

'hello' 'world'

---

(statement (string) (string))

=========================================
multi-line statements - external tokens
=========================================

'hello'
'world'

---

(statement (string) (string))



================================================
FILE: test/fixtures/test_grammars/external_and_internal_anonymous_tokens/grammar.js
================================================
module.exports = grammar({
    name: 'external_and_internal_anonymous_tokens',

    externals: $ => [
        $.string,
        '\n'
    ],

    extras: $ => [/\s/],

    rules: {
        statement: $ => seq(
            $._expression,
            $._expression,
            '\n'
        ),

        _expression: $ => choice(
            $.string,
            $.variable,
            $.number
        ),

        variable: $ => /[a-z]+/,

        number: $ => /\d+/
    }
})


================================================
FILE: test/fixtures/test_grammars/external_and_internal_anonymous_tokens/scanner.c
================================================
#include "tree_sitter/parser.h"

enum {
  STRING,
  LINE_BREAK
};

void *tree_sitter_external_and_internal_anonymous_tokens_external_scanner_create() {
  return NULL;
}

void tree_sitter_external_and_internal_anonymous_tokens_external_scanner_destroy(
  void *payload
) {}

unsigned tree_sitter_external_and_internal_anonymous_tokens_external_scanner_serialize(
  void *payload,
  char *buffer
) { return 0; }

void tree_sitter_external_and_internal_anonymous_tokens_external_scanner_deserialize(
  void *payload,
  const char *buffer,
  unsigned length
) {}

bool tree_sitter_external_and_internal_anonymous_tokens_external_scanner_scan(
  void *payload,
  TSLexer *lexer,
  const bool *valid_symbols
) {
  // If a line-break is a valid lookahead token, only skip spaces.
  if (valid_symbols[LINE_BREAK]) {
    while (lexer->lookahead == ' ' || lexer->lookahead == '\r') {
      lexer->advance(lexer, true);
    }

    if (lexer->lookahead == '\n') {
      lexer->advance(lexer, false);
      lexer->result_symbol = LINE_BREAK;
      return true;
    }
  }

  // If a line-break is not a valid lookahead token, skip line breaks as well
  // as spaces.
  if (valid_symbols[STRING]) {
    while (lexer->lookahead == ' ' || lexer->lookahead == '\r' || lexer->lookahead == '\n') {
      lexer->advance(lexer, true);
    }

    if (lexer->lookahead == '\'') {
      lexer->advance(lexer, false);

      while (lexer->lookahead != '\'') {
        lexer->advance(lexer, false);
      }

      lexer->advance(lexer, false);
      lexer->result_symbol = STRING;
      return true;
    }
  }

  return false;
}



================================================
FILE: test/fixtures/test_grammars/external_and_internal_tokens/corpus.txt
================================================
=========================================
single-line statements - internal tokens
=========================================

a b

---

(statement (variable) (variable) (line_break))

=========================================
multi-line statements - internal tokens
=========================================

a
b

---

(statement (variable) (variable) (line_break))

=========================================
single-line statements - external tokens
=========================================

'hello' 'world'

---

(statement (string) (string) (line_break))

=========================================
multi-line statements - external tokens
=========================================

'hello'
'world'

---

(statement (string) (string) (line_break))



================================================
FILE: test/fixtures/test_grammars/external_and_internal_tokens/grammar.js
================================================
// This grammar has an external scanner whose `scan` method needs to be able to check for the
// validity of an *internal* token. This is done by including the names of that internal token
// (`line_break`) in the grammar's `externals` field.

module.exports = grammar({
    name: 'external_and_internal_tokens',

    externals: $ => [
        $.string,
        $.line_break,
    ],

    extras: $ => [/\s/],

    rules: {
        statement: $ => seq(
            $._expression,
            $._expression,
            $.line_break,
        ),

        _expression: $ => choice(
            $.string,
            $.variable,
            $.number,
        ),

        variable: $ => /[a-z]+/,
        number: $ => /\d+/,
        line_break: $ => '\n',
    }
});


================================================
FILE: test/fixtures/test_grammars/external_and_internal_tokens/scanner.c
================================================
#include "tree_sitter/parser.h"

enum {
  STRING,
  LINE_BREAK
};

void *tree_sitter_external_and_internal_tokens_external_scanner_create() {
  return NULL;
}

void tree_sitter_external_and_internal_tokens_external_scanner_destroy(void *payload) {}

unsigned tree_sitter_external_and_internal_tokens_external_scanner_serialize(
  void *payload,
  char *buffer
) { return 0; }

void tree_sitter_external_and_internal_tokens_external_scanner_deserialize(
  void *payload,
  const char *buffer,
  unsigned length
) {}

bool tree_sitter_external_and_internal_tokens_external_scanner_scan(
  void *payload,
  TSLexer *lexer,
  const bool *valid_symbols
) {
  // If a line-break is a valid lookahead token, only skip spaces.
  if (valid_symbols[LINE_BREAK]) {
    while (lexer->lookahead == ' ' || lexer->lookahead == '\r') {
      lexer->advance(lexer, true);
    }

    if (lexer->lookahead == '\n') {
      lexer->advance(lexer, false);
      lexer->result_symbol = LINE_BREAK;
      return true;
    }
  }

  // If a line-break is not a valid lookahead token, skip line breaks as well
  // as spaces.
  if (valid_symbols[STRING]) {
    while (lexer->lookahead == ' ' || lexer->lookahead == '\r' || lexer->lookahead == '\n') {
      lexer->advance(lexer, true);
    }

    if (lexer->lookahead == '\'') {
      lexer->advance(lexer, false);

      while (lexer->lookahead != '\'') {
        lexer->advance(lexer, false);
      }

      lexer->advance(lexer, false);
      lexer->result_symbol = STRING;
      return true;
    }
  }

  return false;
}



================================================
FILE: test/fixtures/test_grammars/external_extra_tokens/corpus.txt
================================================
========================
extra external tokens
========================

x = # a comment
y

---

(assignment (variable) (comment) (variable))



================================================
FILE: test/fixtures/test_grammars/external_extra_tokens/grammar.js
================================================
module.exports = grammar({
    name: "external_extra_tokens",

    externals: $ => [
        $.comment
    ],

    extras: $ => [/\s/, $.comment],

    rules: {
        assignment: $ => seq($.variable, '=', $.variable),
        variable: $ => /[a-z]+/
    }
})



================================================
FILE: test/fixtures/test_grammars/external_extra_tokens/scanner.c
================================================
#include "tree_sitter/parser.h"

enum {
  COMMENT,
};

void *tree_sitter_external_extra_tokens_external_scanner_create() {
  return NULL;
}

void tree_sitter_external_extra_tokens_external_scanner_destroy(void *payload) {}

unsigned tree_sitter_external_extra_tokens_external_scanner_serialize(
  void *payload,
  char *buffer
) { return 0; }

void tree_sitter_external_extra_tokens_external_scanner_deserialize(
  void *payload,
  const char *buffer,
  unsigned length
) {}

bool tree_sitter_external_extra_tokens_external_scanner_scan(
  void *payload,
  TSLexer *lexer,
  const bool *valid_symbols
) {
  while (lexer->lookahead == ' ') {
    lexer->advance(lexer, true);
  }

  if (lexer->lookahead == '#') {
    lexer->advance(lexer, false);
    while (lexer->lookahead != '\n') {
      lexer->advance(lexer, false);
    }

    lexer->result_symbol = COMMENT;
    return true;
  }

  return false;
}



================================================
FILE: test/fixtures/test_grammars/external_tokens/corpus.txt
================================================
========================
simple external tokens
=========================

x + %(sup (external) scanner?)

---

(expression (sum (expression (identifier)) (expression (string))))

==================================
external tokens that require state
==================================

%{sup {} #{x + y} {} scanner?}

---

(expression (string
  (expression (sum
    (expression (identifier))
    (expression (identifier))))))



================================================
FILE: test/fixtures/test_grammars/external_tokens/grammar.js
================================================
// This grammar uses an external scanner to match special string literals,
// that track the nesting depth of parentheses, similar to Ruby's percent
// string literals.

module.exports = grammar({
  name: "external_tokens",

  externals: $ => [
    $._percent_string,
    $._percent_string_start,
    $._percent_string_end,
  ],

  extras: $ => [/\s/],

  rules: {
    expression: $ => choice($.string, $.sum, $.identifier),

    sum: $ => prec.left(seq($.expression, '+', $.expression)),

    string: $ => choice($._percent_string, seq(
      $._percent_string_start,
      $.expression,
      $._percent_string_end,
    )),

    identifier: $ => /[a-z]+/
  }
})



================================================
FILE: test/fixtures/test_grammars/external_tokens/scanner.c
================================================
#include "tree_sitter/alloc.h"
#include "tree_sitter/parser.h"

enum {
  percent_string,
  percent_string_start,
  percent_string_end
};

typedef struct {
  int32_t open_delimiter;
  int32_t close_delimiter;
  uint32_t depth;
} Scanner;

void *tree_sitter_external_tokens_external_scanner_create() {
  Scanner *scanner = ts_malloc(sizeof(Scanner));
  *scanner = (Scanner) {
    .open_delimiter = 0,
    .close_delimiter = 0,
    .depth = 0
  };
  return scanner;
}

void tree_sitter_external_tokens_external_scanner_destroy(void *payload) {
  ts_free(payload);
}

unsigned tree_sitter_external_tokens_external_scanner_serialize(
  void *payload,
  char *buffer
) { return true; }

void tree_sitter_external_tokens_external_scanner_deserialize(
  void *payload,
  const char *buffer,
  unsigned length
) {}

bool tree_sitter_external_tokens_external_scanner_scan(
  void *payload, TSLexer *lexer, const bool *valid_symbols) {
  Scanner *scanner = payload;

  if (valid_symbols[percent_string]) {
    while (lexer->lookahead == ' ' ||
           lexer->lookahead == '\t' ||
           lexer->lookahead == '\n' ||
           lexer->lookahead == '\r') {
      lexer->advance(lexer, true);
    }

    if (lexer->lookahead != '%') return false;
    lexer->advance(lexer, false);

    switch (lexer->lookahead) {
      case '(':
        scanner->open_delimiter = '(';
        scanner->close_delimiter = ')';
        scanner->depth = 1;
        break;
      case '[':
        scanner->open_delimiter = '[';
        scanner->close_delimiter = ']';
        scanner->depth = 1;
        break;
      case '{':
        scanner->open_delimiter = '{';
        scanner->close_delimiter = '}';
        scanner->depth = 1;
        break;
      default:
        return false;
    }

    lexer->advance(lexer, false);

    for (;;) {
      if (scanner->depth == 0) {
        lexer->log(lexer, "Found a percent string");
        lexer->result_symbol = percent_string;
        return true;
      }

      if (lexer->lookahead == scanner->open_delimiter) {
        scanner->depth++;
      } else if (lexer->lookahead == scanner->close_delimiter) {
        scanner->depth--;
      } else if (lexer->lookahead == '#') {
        lexer->advance(lexer, false);
        if (lexer->lookahead == '{') {
          lexer->advance(lexer, false);
          lexer->result_symbol = percent_string_start;
          return true;
        }
      }

      lexer->advance(lexer, false);
    }
  } else if (valid_symbols[percent_string_end]) {
    if (lexer->lookahead != '}') return false;
    lexer->advance(lexer, false);

    for (;;) {
      if (scanner->depth == 0) {
        lexer->result_symbol = percent_string_end;
        return true;
      }

      if (lexer->lookahead == scanner->open_delimiter) {
        scanner->depth++;
      } else if (lexer->lookahead == scanner->close_delimiter) {
        scanner->depth--;
      }

      lexer->advance(lexer, false);
    }
  }

  return false;
}



================================================
FILE: test/fixtures/test_grammars/external_unicode_column_alignment/README.md
================================================
This tests that `get_column` correctly counts codepoints since start of line.


================================================
FILE: test/fixtures/test_grammars/external_unicode_column_alignment/corpus.txt
================================================
========================
Single list, no boxes
========================

-
-
-

----------------------

(expression
  (list
    (list_item)
    (list_item)
    (list_item)
  )
)

========================
Two lists, no boxes
========================

 -
 -
 -
  -
  -

----------------------

(expression
  (list
    (list_item)
    (list_item)
    (list_item)
  )
  (list
    (list_item)
    (list_item)
  )
)

========================
List with boxes
========================

 -
□-
 -

----------------------

(expression
  (list
    (list_item)
    (list_item)
    (list_item)
  )
)

========================
Multiple lists with boxes
========================

   -
□ □-
 □ -
□□□□□□-
□ □ □ -
      -
□□□   -
□□□-
□ □-

----------------------

(expression
  (list
    (list_item)
    (list_item)
    (list_item)
  )
  (list
    (list_item)
    (list_item)
    (list_item)
    (list_item)
  )
  (list
    (list_item)
    (list_item)
  )
)



================================================
FILE: test/fixtures/test_grammars/external_unicode_column_alignment/grammar.js
================================================
module.exports = grammar({
  name: "external_unicode_column_alignment",

  externals: $ => [
    $._start_list,
    $.list_item,
    $._end_list
  ],

  extras: $ => [/\s/, '□'],

  rules: {
    expression: $ => repeat($.list),

    list: $ => seq($._start_list, repeat1($.list_item), $._end_list)
  }
})



================================================
FILE: test/fixtures/test_grammars/external_unicode_column_alignment/scanner.c
================================================
#include "tree_sitter/alloc.h"
#include "tree_sitter/parser.h"

#include <wctype.h>
#include <string.h>

enum {
  LIST_START,
  LIST_ITEM,
  LIST_END
};

typedef struct {
  int32_t column;
} Scanner;

void *tree_sitter_external_unicode_column_alignment_external_scanner_create() {
  Scanner *scanner = ts_malloc(sizeof(Scanner));
  *scanner = (Scanner){
    .column = -1
  };
  return scanner;
}

void tree_sitter_external_unicode_column_alignment_external_scanner_destroy(void *payload) {
  ts_free(payload);
}

unsigned tree_sitter_external_unicode_column_alignment_external_scanner_serialize(
  void *payload,
  char *buffer
) {
  Scanner *scanner = payload;
  unsigned copied = sizeof(int32_t);
  memcpy(buffer, &(scanner->column), copied);
  return copied;
}

void tree_sitter_external_unicode_column_alignment_external_scanner_deserialize(
  void *payload,
  const char *buffer,
  unsigned length
) {
  Scanner *scanner = payload;
  scanner->column = -1;
  if (length > 0) {
    memcpy(&(scanner->column), buffer, sizeof(int32_t));
  }
}

bool tree_sitter_external_unicode_column_alignment_external_scanner_scan(
  void *payload,
  TSLexer *lexer,
  const bool *valid_symbols
) {
  Scanner *scanner = payload;
  // U+25A1 is unicode codepoint □
  while (iswspace(lexer->lookahead) || 0x25A1 == lexer->lookahead) {
    lexer->advance(lexer, true);
  }
  if ('-' == lexer->lookahead) {
    const int32_t column = lexer->get_column(lexer);
    if (-1 == scanner->column) {
      lexer->result_symbol = LIST_START;
      scanner->column = column;
      return true;
    } else {
      if (column == scanner->column) {
        lexer->result_symbol = LIST_ITEM;
        lexer->advance(lexer, false);
        return true;
      } else {
        lexer->result_symbol = LIST_END;
        scanner->column = -1;
        return true;
      }
    }
  }

  if (lexer->eof(lexer) && -1 != scanner->column) {
    lexer->result_symbol = LIST_END;
    scanner->column = -1;
    return true;
  }

  return false;
}



================================================
FILE: test/fixtures/test_grammars/extra_non_terminals/corpus.txt
================================================
==============
No extras
==============

a b c d

---

(module)

==============
Extras
==============

a (one) b (two) (three) c d

---

(module
  (comment)
  (comment)
  (comment))



================================================
FILE: test/fixtures/test_grammars/extra_non_terminals/grammar.js
================================================
// This grammar has an "extra" rule, `comment`, that is a non-terminal.

module.exports = grammar({
  name: "extra_non_terminals",

  extras: $ => [
    /\s/,
    $.comment,
  ],

  rules: {
    module: $ => seq('a', 'b', 'c', 'd'),
    comment: $ => seq('(', repeat(/[a-z]+/), ')'),
  }
})



================================================
FILE: test/fixtures/test_grammars/extra_non_terminals_with_shared_rules/corpus.txt
================================================
=====
Extras
=====

;
%;
%foo:;
;
bar: baz:;
;

---

(program
  (statement)
  (macro_statement (statement))
  (macro_statement (statement
    (label_declaration (identifier))))
  (statement)
  (statement
    (label_declaration (identifier))
    (label_declaration (identifier)))
  (statement))



================================================
FILE: test/fixtures/test_grammars/extra_non_terminals_with_shared_rules/grammar.js
================================================
// This grammar has a non-terminal extra rule `macro_statement` that contains
// child rules that are also used elsewhere in the grammar.

module.exports = grammar({
  name: "extra_non_terminals_with_shared_rules",

  extras: $ => [/\s+/, $.macro_statement],

  rules: {
    program: $ => repeat($.statement),
    statement: $ => seq(repeat($.label_declaration), ';'),
    macro_statement: $ => seq('%', $.statement),
    label_declaration: $ => seq($.identifier, ':'),
    identifier: $ => /[a-zA-Z]+/
  }
})


================================================
FILE: test/fixtures/test_grammars/get_col_eof/corpus.txt
================================================
[Empty file]


================================================
FILE: test/fixtures/test_grammars/get_col_eof/grammar.js
================================================
module.exports = grammar({
  name: "get_col_eof",

  externals: $ => [
    $.char
  ],

  rules: {
    source_file: $ => repeat($.char),
  }
});



================================================
FILE: test/fixtures/test_grammars/get_col_eof/scanner.c
================================================
#include "tree_sitter/parser.h"

enum TokenType { CHAR };

void *tree_sitter_get_col_eof_external_scanner_create(void) { return NULL; }

void tree_sitter_get_col_eof_external_scanner_destroy(void *scanner) {}

unsigned tree_sitter_get_col_eof_external_scanner_serialize(void *scanner,
                                                            char *buffer) {
  return 0;
}

void tree_sitter_get_col_eof_external_scanner_deserialize(void *scanner,
                                                          const char *buffer,
                                                          unsigned length) {}

bool tree_sitter_get_col_eof_external_scanner_scan(void *scanner,
                                                   TSLexer *lexer,
                                                   const bool *valid_symbols) {
  if (lexer->eof(lexer)) {
    return false;
  }

  if (valid_symbols[CHAR]) {
    lexer->advance(lexer, false);
    lexer->get_column(lexer);
    lexer->result_symbol = CHAR;
    lexer->mark_end(lexer);
    return true;
  }

  return false;
}



================================================
FILE: test/fixtures/test_grammars/get_col_should_hang_not_crash/corpus.txt
================================================
[Empty file]


================================================
FILE: test/fixtures/test_grammars/get_col_should_hang_not_crash/grammar.js
================================================
module.exports = grammar({
  name: 'get_col_should_hang_not_crash',

  externals: $ => [
    $.test,
  ],

  rules: {
    source_file: $ => seq(
      $.test
    ),
  },
});



================================================
FILE: test/fixtures/test_grammars/get_col_should_hang_not_crash/scanner.c
================================================
#include "tree_sitter/parser.h"

unsigned tree_sitter_get_col_should_hang_not_crash_external_scanner_serialize() { return 0; }

void tree_sitter_get_col_should_hang_not_crash_external_scanner_deserialize() {}

void *tree_sitter_get_col_should_hang_not_crash_external_scanner_create() { return NULL; }

void tree_sitter_get_col_should_hang_not_crash_external_scanner_destroy() {}

bool tree_sitter_get_col_should_hang_not_crash_external_scanner_scan(void *payload, TSLexer *lexer,
                                            const bool *valid_symbols) {
    while (true) {
        lexer->advance(lexer, false);
        lexer->get_column(lexer);
    }
}



================================================
FILE: test/fixtures/test_grammars/immediate_tokens/corpus.txt
================================================
===============================
prefix expressions as arguments
===============================

a ::b ::c

---

(program
  (call
    (call
      (identifier)
      (prefix (identifier)))
    (prefix (identifier))))

===============================
infix expressions
===============================

a::b::c

---

(program
  (infix
    (infix
      (identifier)
      (identifier))
    (identifier)))



================================================
FILE: test/fixtures/test_grammars/immediate_tokens/grammar.js
================================================
// This grammar demonstrates the usage of the IMMEDIATE_TOKEN rule. It allows the parser to produce
// a different token based on whether or not there are `extras` preceding the token's main content.
// When there are *no* leading `extras`, an immediate token is preferred over a normal token which
// would otherwise match.

module.exports = grammar({
  name: "immediate_tokens",

  extras: $ => [/\s/],

  rules: {
    program: $ => $._expression,

    _expression: $ => choice(
      $.call,
      $.infix,
      $.prefix,
      $.identifier,
    ),

    call: $ => prec.left(-1, seq(
      $._expression,
      $._expression,
    )),

    prefix: $ => seq(
      '::',
      $.identifier,
    ),

    infix: $ => seq(
      $._expression,
      token.immediate('::'),
      $.identifier,
    ),

    identifier: $ => /[a-z]+/
  }
})



================================================
FILE: test/fixtures/test_grammars/inline_rules/corpus.txt
================================================
==================================
Expressions
==================================

1 + 2 * 3;
4 * 5 + 6;
7 * (8 + 9);

---

(program
  (statement (sum
    (number)
    (product (number) (number))))
  (statement (sum
    (product (number) (number))
    (number)))
  (statement (product
    (number)
    (parenthesized_expression (sum (number) (number))))))



================================================
FILE: test/fixtures/test_grammars/inline_rules/grammar.js
================================================
module.exports = grammar({
  name: "inline_rules",

  extras: $ => [/\s/],

  inline: $ => [$.expression],

  rules: {
    program: $ => repeat1($.statement),
    statement: $ => seq($.expression, ";"),
    expression: $ => choice(
      $.sum,
      $.product,
      $.number,
      $.parenthesized_expression,
    ),
    parenthesized_expression: $ => seq("(", $.expression, ")"),
    sum: $ => prec.left(seq($.expression, "+", $.expression)),
    product: $ => prec.left(2, seq($.expression, "*", $.expression)),
    number: $ => /\d+/,
  }
})



================================================
FILE: test/fixtures/test_grammars/inlined_aliased_rules/readme.md
================================================
This grammar shows that a rule marked as `inline` can *contain* a `ALIAS` rule.



================================================
FILE: test/fixtures/test_grammars/inlined_aliased_rules/corpus.txt
================================================
======================================
Method calls
======================================

a.b(c(d.e));

---

(statement
  (call_expression
    (member_expression
      (variable_name)
      (property_name))
    (call_expression
      (variable_name)
      (member_expression
        (variable_name)
        (property_name)))))



================================================
FILE: test/fixtures/test_grammars/inlined_aliased_rules/grammar.js
================================================
module.exports = grammar({
  name: "inlined_aliased_rules",

  extras: $ => [/\s/],

  inline: $ => [$.expression],

  rules: {
    statement: $ => seq($.expression, ";"),

    expression: $ =>
      choice(
        $.call_expression,
        $.member_expression,
        alias($.identifier, $.variable_name),
      ),

    call_expression: $ => prec.left(seq($.expression, "(", $.expression, ")")),

    member_expression: $ =>
      prec.left(
        1,
        seq($.expression, ".", alias($.identifier, $.property_name)),
      ),

    identifier: $ => /[a-z]+/,
  },
});



================================================
FILE: test/fixtures/test_grammars/inverted_external_token/readme.md
================================================
This language has an external scanner that calls `lexer->advance(lexer, true)` (in order to skip whitespace) *after* having called `lexer->mark_end(lexer)`. This tests an edge case in the parser's handling of token start and end positions.



================================================
FILE: test/fixtures/test_grammars/inverted_external_token/corpus.txt
================================================
========================
Expressions on one line
=========================

a
b
  .c
d
  .e
  .f

---

(program
  (statement (identifier) (line_break))
  (statement (member_expression (identifier) (identifier)) (line_break))
  (statement (member_expression (member_expression (identifier) (identifier)) (identifier)) (line_break)))

=====================================
Line breaks followed by whitespace
=====================================

a
  b
  c

---

(program
  (statement (identifier) (line_break))
  (statement (identifier) (line_break))
  (statement (identifier) (line_break)))



================================================
FILE: test/fixtures/test_grammars/inverted_external_token/grammar.js
================================================
module.exports = grammar({
  name: "inverted_external_token",

  externals: $ => [$.line_break],

  extras: $ => [/\s/],

  rules: {
    program: $ => repeat($.statement),
    statement: $ => seq($._expression, $.line_break),
    _expression: $ => choice($.identifier, $.member_expression),
    member_expression: $ => prec.left(seq($._expression, ".", $.identifier)),
    identifier: $ => /[a-z]+/,
  },
});



================================================
FILE: test/fixtures/test_grammars/inverted_external_token/scanner.c
================================================
#include "tree_sitter/parser.h"

enum {
  LINE_BREAK
};

void *tree_sitter_inverted_external_token_external_scanner_create() { return NULL; }

void tree_sitter_inverted_external_token_external_scanner_destroy(void *payload) {}

unsigned tree_sitter_inverted_external_token_external_scanner_serialize(
  void *payload,
  char *buffer
) { return true; }

void tree_sitter_inverted_external_token_external_scanner_deserialize(
  void *payload,
  const char *buffer,
  unsigned length
) {}

bool tree_sitter_inverted_external_token_external_scanner_scan(
  void *payload,
  TSLexer *lexer,
  const bool *valid_symbols
) {
  while (lexer->lookahead == ' ' || lexer->lookahead == '\r') {
    lexer->advance(lexer, true);
  }

  if (lexer->lookahead == '\n') {
    lexer->advance(lexer, false);

    // Mark the end of the line break token.
    lexer->mark_end(lexer);

    // Skip whitespace *after* having marked the end.
    while (lexer->lookahead == ' ' || lexer->lookahead == '\n' || lexer->lookahead == '\r') {
      lexer->advance(lexer, true);
    }

    if (lexer->lookahead != '.') {
      lexer->result_symbol = LINE_BREAK;
      return true;
    }
  }

  return false;
}



================================================
FILE: test/fixtures/test_grammars/invisible_start_rule/expected_error.txt
================================================
A grammar's start rule must be visible.


================================================
FILE: test/fixtures/test_grammars/invisible_start_rule/grammar.js
================================================
module.exports = grammar({
  name: "invisible_start_rule",
  rules: {
    _value: $ => choice($.a, $.b),
    a: $ => "a",
    b: $ => "b",
  },
});



================================================
FILE: test/fixtures/test_grammars/lexical_conflicts_due_to_state_merging/readme.md
================================================
This grammar has two tokens, `regex` and `/`, which conflict: when a `/` character is encountered, the lexer can't tell if it is part of a `/` token or a `regex` by looking ahead only one character. But because these tokens are never valid in the same position, this doesn't cause any problem.

When merging similar parse states in order to reduce the size of the parse table, it is important that we avoid merging states in a way that causes these two tokens to both appear as valid lookahead symbols in a given state.

If we weren't careful, this grammar would cause that to happen, because a `regex` is valid in this state:

```
(if (1) /\w+/)
       ^
```

and a `/` is valid in this state:


```
((1) / 2)
    ^
```

And these two states would otherwise be candidates for merging, because they both contain only the action `reduce(parenthesized, 3)`.


================================================
FILE: test/fixtures/test_grammars/lexical_conflicts_due_to_state_merging/corpus.txt
================================================
========================
regexes
========================

/a+/

---

(expression (regex))

========================
conditionals
========================

(if (1) /a+/)

---

(expression (parenthesized (expression (conditional
  (parenthesized (expression (number)))
  (expression (regex))))))

========================
quotients
========================

((1) / 2)

---

(expression (parenthesized (expression (quotient
  (expression (parenthesized (expression (number))))
  (expression (number))))))



================================================
FILE: test/fixtures/test_grammars/lexical_conflicts_due_to_state_merging/grammar.js
================================================
module.exports = grammar({
  name: 'lexical_conflicts_due_to_state_merging',

  rules: {
    expression: $ => choice(
      $.conditional,
      $.quotient,
      $.regex,
      $.number,
      $.parenthesized,
    ),

    conditional: $ => prec.left(1, seq(
      'if',
      $.parenthesized,
      $.expression
    )),

    quotient: $ => prec.left(seq(
      $.expression,
      '/',
      $.expression
    )),

    regex: $ => /\/[^/\n]+\//,

    number: $ => /\d+/,

    parenthesized: $ => seq('(', $.expression, ')'),
  },
});



================================================
FILE: test/fixtures/test_grammars/named_precedences/readme.txt
================================================
This grammar uses named precedences, which have a partial order specified via the grammar's `precedences` field. Named
precedences allow certain conflicts to be resolved statically without accidentally resolving *other* conflicts, which
are intended to be resolved dynamically.



================================================
FILE: test/fixtures/test_grammars/named_precedences/corpus.txt
================================================
=============
Declarations
=============

A||B c = d;
E.F g = h;

=============
Expressions
=============

a || b.c;



================================================
FILE: test/fixtures/test_grammars/named_precedences/grammar.js
================================================
module.exports = grammar({
  name: 'named_precedences',

  conflicts: $ => [
    [$.expression, $.type],
    [$.expression, $.nested_type],
  ],

  precedences: $ => [
    [$.member_expression, "and", "or"],
    [$.nested_type, "type_intersection", "type_union"],
  ],

  rules: {
    program: $ => repeat(choice(
      $.expression_statement,
      $.declaration_statement,
    )),

    expression_statement: $ => seq($.expression, ';'),

    declaration_statement: $ => seq($.type, $.expression, ';'),

    expression: $ => choice(
      $.member_expression,
      $.binary_expression,
      $.identifier,
    ),

    member_expression: $ => seq($.expression, '.', $.identifier),

    binary_expression: $ => choice(
      prec.left('or', seq($.expression, '||', $.expression)),
      prec.left('and', seq($.expression, '&&', $.expression)),
    ),

    type: $ => choice($.nested_type, $.binary_type, $.identifier),

    nested_type: $ => seq($.identifier, '.', $.identifier),

    binary_type: $ => choice(
      prec.left('type_union', seq($.type, '||', $.type)),
      prec.left('type_intersection', seq($.type, '&&', $.type)),
    ),

    identifier: $ => /[a-z]\w+/,
  },
});



================================================
FILE: test/fixtures/test_grammars/named_rule_aliased_as_anonymous/readme.md
================================================
This grammar checks that if a named node is aliased as an anonymous node (e.g. `alias($.foo, 'bar')`), then the rule will behave like an anonymous node. In particular, it will not show up in the tree's S-expression representation.



================================================
FILE: test/fixtures/test_grammars/named_rule_aliased_as_anonymous/corpus.txt
================================================
================================================
Named rules that are aliased as anonymous tokens
================================================

B C B

---

(a (c) (b))



================================================
FILE: test/fixtures/test_grammars/named_rule_aliased_as_anonymous/grammar.js
================================================
module.exports = grammar({
  name: 'named_rule_aliased_as_anonymous',

  rules: {
    a: $ => seq(
      alias($.b, 'the-alias'),
      $.c,
      $.b,
    ),

    b: _ => 'B',

    c: _ => 'C',
  },
});



================================================
FILE: test/fixtures/test_grammars/nested_inlined_rules/readme.md
================================================
This grammar demonstrates that you can have an inlined rule that contains another inlined rule.



================================================
FILE: test/fixtures/test_grammars/nested_inlined_rules/corpus.txt
================================================
==================================
Statements
==================================

return 1;
return 2;

---

(program
  (return_statement (number))
  (return_statement (number)))



================================================
FILE: test/fixtures/test_grammars/nested_inlined_rules/grammar.js
================================================
module.exports = grammar({
  name: 'nested_inlined_rules',

  inline: $ => [
    $.top_level_item,
    $.statement,
  ],

  rules: {
    program: $ => repeat1($.top_level_item),

    top_level_item: $ => choice($.statement, '!'),

    statement: $ => choice($.expression_statement, $.return_statement),

    return_statement: $ => seq('return', $.number, ';'),

    expression_statement: $ => seq($.number, ';'),

    number: _ => /\d+/,
  },
});



================================================
FILE: test/fixtures/test_grammars/next_sibling_from_zwt/corpus.txt
================================================
===========================
missing c node
===========================

abdef

---

(source
    (MISSING "c"))



================================================
FILE: test/fixtures/test_grammars/next_sibling_from_zwt/grammar.js
================================================
module.exports = grammar({
  name: "next_sibling_from_zwt",
  extras: $ => [
    /\s|\\\r?\n/,
  ],

  rules: {
    source: $ => seq(
      'a',
      $._bc,
      'd',
      'e',
      'f',
    ),

    _bc: $ => seq(
      'b',
      'c',
    ),
  }
});



================================================
FILE: test/fixtures/test_grammars/partially_resolved_conflict/readme.txt
================================================
This grammar has a conflict with three possible actions: a shift in the middle of the `binary` rule and two reductions: one for `unary_a` and one for `unary_b`. Both `unary_a` and `unary_b` have a higher precedence than `binary`, therefore we can rule out the interpretation where a `binary` occurs *inside* of a `unary_a` or `unary_b`, so the error message (and suggested `conflict`) should not include that interpretation.


================================================
FILE: test/fixtures/test_grammars/partially_resolved_conflict/expected_error.txt
================================================
Unresolved conflict for symbol sequence:

  '!'  expression  •  '<'  …

Possible interpretations:

  1:  (unary_a  '!'  expression)  •  '<'  …  (precedence: 2)
  2:  (unary_b  '!'  expression)  •  '<'  …  (precedence: 2)

Possible resolutions:

  1:  Specify a higher precedence in `unary_a` than in the other rules.
  2:  Specify a higher precedence in `unary_b` than in the other rules.
  3:  Add a conflict for these rules: `unary_a`, `unary_b`



================================================
FILE: test/fixtures/test_grammars/partially_resolved_conflict/grammar.js
================================================
module.exports = grammar({
  name: 'partially_resolved_conflict',

  rules: {
    expression: $ => choice($.binary, $.identifier),

    unary_a: $ => prec(2, seq('!', $.expression)),

    unary_b: $ => prec(2, seq('!', $.expression)),

    binary: $ => seq(
      choice($.unary_a, $.unary_b, $.expression),
      '<',
      $.expression,
    ),

    identifier: _ => /[a-z]+/,
  },
});



================================================
FILE: test/fixtures/test_grammars/precedence_on_single_child_missing/readme.md
================================================
This language has function calls similar to Ruby's, with no parentheses required, and optional blocks.

There is a shift/reduce conflict here:

```
foo bar { baz }
       ^
```

The possible actions are:

1. `reduce(expression, 1)` - `bar` is an expression being passed to the `foo` function.
2. `shift` - `bar` is a function being called with the block `{ baz }`

The grammars `precedence_on_single_child_negative` and `precedence_on_single_child_positive` show possible resolutions to this conflict.



================================================
FILE: test/fixtures/test_grammars/precedence_on_single_child_missing/expected_error.txt
================================================
Unresolved conflict for symbol sequence:

  identifier  identifier  •  '{'  …

Possible interpretations:

  1:  identifier  (expression  identifier)  •  '{'  …
  2:  identifier  (function_call  identifier  •  block)  (precedence: 0, associativity: Right)

Possible resolutions:

  1:  Specify a higher precedence in `function_call` than in the other rules.
  2:  Specify a higher precedence in `expression` than in the other rules.
  3:  Specify a left or right associativity in `expression`
  4:  Add a conflict for these rules: `expression`, `function_call`



================================================
FILE: test/fixtures/test_grammars/precedence_on_single_child_missing/grammar.js
================================================
module.exports = grammar({
  name: 'precedence_on_single_child_missing',

  rules: {
    expression: $ => choice($.function_call, $.identifier),

    function_call: $ => prec.right(choice(
      seq($.identifier, $.expression),
      seq($.identifier, $.block),
      seq($.identifier, $.expression, $.block),
    )),

    block: $ => seq('{', $.expression, '}'),

    identifier: _ => /[a-zA-Z]+/,
  },
});



================================================
FILE: test/fixtures/test_grammars/precedence_on_single_child_negative/readme.md
================================================
This grammar resolves the conflict shown in the `precedence_on_single_child_missing` grammar by giving `function_call` a negative precedence. This causes reducing the `bar` variable to an expression to be preferred over shifting the `{` token as part of `function_call`.


================================================
FILE: test/fixtures/test_grammars/precedence_on_single_child_negative/corpus.txt
================================================
===========================
function calls with blocks
===========================

foo bar { baz }

---

(expression (function_call
  (identifier)
  (expression (identifier))
  (block (expression (identifier)))))


================================================
FILE: test/fixtures/test_grammars/precedence_on_single_child_negative/grammar.js
================================================
module.exports = grammar({
  name: 'precedence_on_single_child_negative',

  rules: {
    expression: $ => choice($.function_call, $.identifier),

    function_call: $ => prec.right(-1, choice(
      seq($.identifier, $.expression),
      seq($.identifier, $.block),
      seq($.identifier, $.expression, $.block),
    )),

    block: $ => seq('{', $.expression, '}'),

    identifier: _ => /[a-zA-Z]+/,
  },
});



================================================
FILE: test/fixtures/test_grammars/precedence_on_single_child_positive/readme.md
================================================
This grammar resolves the conflict shown in the `precedence_on_single_child_missing` grammar by giving `function_call` a positive precedence. This causes shifting the `{` token as part of `function_call` to be preferred over reducing the `bar` variable to an expression.


================================================
FILE: test/fixtures/test_grammars/precedence_on_single_child_positive/corpus.txt
================================================
===========================
function calls with blocks
===========================

foo bar { baz }

---

(expression (function_call
  (identifier)
  (expression (function_call
    (identifier)
    (block (expression (identifier)))))))


================================================
FILE: test/fixtures/test_grammars/precedence_on_single_child_positive/grammar.js
================================================
module.exports = grammar({
  name: 'precedence_on_single_child_positive',

  rules: {
    expression: $ => choice($.function_call, $.identifier),

    function_call: $ => prec.right(1, choice(
      seq($.identifier, $.expression),
      seq($.identifier, $.block),
      seq($.identifier, $.expression, $.block),
    )),

    block: $ => seq('{', $.expression, '}'),

    identifier: _ => /[a-zA-X]+/,
  },
});



================================================
FILE: test/fixtures/test_grammars/precedence_on_subsequence/corpus.txt
================================================
==========================================
curly brace blocks with high precedence
==========================================

a b {}

---

(expression (function_call
  (identifier)
  (expression (function_call (identifier) (block)))))

==========================================
do blocks with low precedence
==========================================

a b do end

---

(expression (function_call
  (identifier)
  (expression (identifier))
  (do_block)))



================================================
FILE: test/fixtures/test_grammars/precedence_on_subsequence/grammar.js
================================================
module.exports = grammar({
  name: 'precedence_on_subsequence',

  rules: {
    expression: $ => prec.left(choice(
      $.function_call,
      $.identifier,
      $.scope_resolution,
    )),

    function_call: $ => choice(
      seq($.identifier, $.expression),
      prec(1, seq($.identifier, $.block)),
      prec(-1, seq($.identifier, $.do_block)),
      seq($.identifier, prec(1, seq($.expression, $.block))),
      seq($.identifier, prec(-1, seq($.expression, $.do_block))),
    ),

    scope_resolution: $ => prec.left(1, choice(
      seq($.expression, '::', $.expression),
      seq('::', $.expression),
    )),

    block: _ => '{}',

    do_block: _ => 'do end',

    identifier: _ => /[a-zA-Z]+/,
  },
});



================================================
FILE: test/fixtures/test_grammars/precedence_on_token/readme.md
================================================
This grammar shows the behavior of precedence used within a `TOKEN` rule. Tokens with higher precedence are preferred, even if they match a shorter string.



================================================
FILE: test/fixtures/test_grammars/precedence_on_token/corpus.txt
================================================
==========================================
obvious tokens
==========================================

// hi
/* hi */
hi
/
"hi"
/hi/

---

(program
  (comment)
  (comment)
  (identifier)
  (slash)
  (string)
  (regex))

==========================================
strings starting with double slashes
==========================================

/*
The lexer matches the string content correctly even though
a comment could match all the way until the end of the line,
because the string content token has a higher precedence
than the comment token.
*/

"//one\n//two"

---

(program
  (comment)
  (string (escape_sequence)))

==========================================
comments that resemble regexes
==========================================

/*
The lexer matches this as a comment followed by an identifier
even though a regex token could match the entire thing, because
the comment token has a higher precedence than the regex token
*/

/* hello */ui

---

(program
  (comment)
  (comment)
  (identifier))



================================================
FILE: test/fixtures/test_grammars/precedence_on_token/grammar.js
================================================
module.exports = grammar({
  name: 'precedence_on_token',

  extras: $ => [
    /\s/,
    $.comment,
  ],

  rules: {
    program: $ => repeat(choice(
      $.string,
      $.regex,
      $.identifier,
      $.slash,
    )),

    comment: _ => token(prec(1, /\/\/.*|\/\*[^*]*\*\//)),

    string: $ => seq(
      '"',
      repeat(choice(
        token(prec(2, /[^\"\n\\]+/)),
        $.escape_sequence,
      )),
      '"',
    ),

    escape_sequence: _ => /\\./,

    regex: _ => /\/[^\/\n]+\/[a-z]*/,

    identifier: _ => /[a-z]\w*/,

    slash: _ => '/',
  },
});



================================================
FILE: test/fixtures/test_grammars/readme_grammar/corpus.txt
================================================
==================================
the readme example
==================================

a + b * c

---

(expression (sum
  (expression (variable))
  (expression (product
     (expression (variable))
     (expression (variable))))))


================================================
FILE: test/fixtures/test_grammars/readme_grammar/grammar.js
================================================
module.exports = grammar({
  name: 'readme_grammar',

  // Things that can appear anywhere in the language, like comments
  // and whitespace, are expressed as 'extras'.
  extras: $ => [
    /\s/,
    $.comment,
  ],

  rules: {
    // The first rule listed in the grammar becomes the 'start rule'.
    expression: $ => choice(
      $.sum,
      $.product,
      $.number,
      $.variable,
      seq('(', $.expression, ')'),
    ),

    // Tokens like '+' and '*' are described directly within the
    // grammar's rules, as opposed to in a separate lexer description.
    sum: $ => prec.left(1, seq($.expression, '+', $.expression)),

    // Ambiguities can be resolved at compile time by assigning precedence
    // values to rule subtrees.
    product: $ => prec.left(2, seq($.expression, '*', $.expression)),

    // Tokens can be specified using ECMAScript regexps.
    number: _ => /\d+/,

    comment: _ => /#.*/,

    variable: _ => new RustRegex('(?i:[a-z])\\w*'),
  },
});



================================================
FILE: test/fixtures/test_grammars/reserved_words/corpus.txt
================================================
==============
Valid Code
==============

if (a) {
  var b = {
    c: d,
    e: f,
  };
  while (g) {
    h();
  }
}

---

(program
  (if_statement
    (parenthesized_expression (identifier))
      (block
        (var_declaration
          (identifier)
          (object
            (pair (identifier) (identifier))
            (pair (identifier) (identifier))))
        (while_statement
          (parenthesized_expression (identifier))
          (block (expression_statement (call_expression (identifier))))))))

================================================
Error detected at globally-reserved word
================================================

var a =

if (something) {
  c();
}

---

(program
  (ERROR (identifier))
  (if_statement
    (parenthesized_expression (identifier))
    (block
      (expression_statement (call_expression (identifier))))))

================================================
Object keys that are reserved in other contexts
================================================

var x = {
  if: a,
  while: b,
};

---

(program
  (var_declaration
    (identifier)
    (object
      (pair (identifier) (identifier))
      (pair (identifier) (identifier)))))

================================================
Error detected at context-specific reserved word
================================================

var x = {
var y = z;

---

(program
  (ERROR (identifier))

  ; Important - var declaration is still recognized,
  ; because in this example grammar, `var` is a keyword
  ; even within object literals.
  (var_declaration
    (identifier)
    (identifier)))

=============================================
Other tokens that overlap with keyword tokens
=============================================

var a = /reserved-words-should-not-affect-this/;
var d = /if/;

---

(program
  (var_declaration
    (identifier)
    (regex (regex_pattern)))
  (var_declaration
    (identifier)
    (regex (regex_pattern))))



================================================
FILE: test/fixtures/test_grammars/reserved_words/grammar.js
================================================
const RESERVED_NAMES = ["if", "while", "var"];
const RESERVED_PROPERTY_NAMES = ["var"];

module.exports = grammar({
  name: "reserved_words",

  reserved: {
    global: $ => RESERVED_NAMES,
    property: $ => RESERVED_PROPERTY_NAMES,
  },

  word: $ => $.identifier,

  rules: {
    program: $ => repeat($._statement),

    block: $ => seq("{", repeat($._statement), "}"),

    _statement: $ => choice(
      $.var_declaration,
      $.if_statement,
      $.while_statement,
      $.expression_statement,
    ),

    var_declaration: $ => seq("var", $.identifier, "=", $._expression, ";"),

    if_statement: $ => seq("if", $.parenthesized_expression, $.block),

    while_statement: $ => seq("while", $.parenthesized_expression, $.block),

    expression_statement: $ => seq($._expression, ";"),

    _expression: $ => choice(
      $.identifier,
      $.parenthesized_expression,
      $.call_expression,
      $.member_expression,
      $.object,
      $.regex,
    ),

    parenthesized_expression: $ => seq("(", $._expression, ")"),

    member_expression: $ => seq($._expression, ".", $.identifier),

    call_expression: $ => seq($._expression, "(", repeat(seq($._expression, ",")), ")"),

    object: $ => seq("{", repeat(seq(choice($.pair, $.getter), ",")), "}"),

    regex: $ => seq('/', $.regex_pattern, '/'),

    regex_pattern: $ => token(prec(-1, /[^/\n]+/)),

    pair: $ => seq(reserved('property', $.identifier), ":", $._expression),

    getter: $ => seq(
      "get",
      reserved('property', $.identifier),
      "(",
      ")",
      $.block,
    ),

    identifier: $ => /[a-z_]\w*/,
  },
});



================================================
FILE: test/fixtures/test_grammars/start_rule_is_blank/corpus.txt
================================================
========================
the empty string
=======================

---

(first_rule)


================================================
FILE: test/fixtures/test_grammars/start_rule_is_blank/grammar.js
================================================
module.exports = grammar({
  name: 'start_rule_is_blank',

  rules: {
    first_rule: _ => blank(),
  },
});



================================================
FILE: test/fixtures/test_grammars/start_rule_is_token/corpus.txt
================================================
===========================
the single token
==========================
the-value
---
(first_rule)



================================================
FILE: test/fixtures/test_grammars/start_rule_is_token/grammar.js
================================================
module.exports = grammar({
  name: 'start_rule_is_token',

  rules: {
    first_rule: _ => 'the-value',
  },
});



================================================
FILE: test/fixtures/test_grammars/unicode_classes/corpus.txt
================================================
===============
Uppercase words
===============

Δბㄱ  Ψ  Ɓƀ  Ƒ  Ɣ  Śřř

---

(program
  (upper) (upper) (upper) (upper) (upper) (upper))

================
Lowercase words
================

śś  ťť  ßß

---

(program
  (lower) (lower) (lower))

================
Math symbols
================

≺ ≼ ≠ ≝ ⨔∑

---

(program
  (math_sym) (math_sym) (math_sym) (math_sym) (math_sym))

================================
Letterlike numeric characters
================================

ᛯ Ⅵ 〩

---

(program
  (letter_number) (letter_number) (letter_number))



================================================
FILE: test/fixtures/test_grammars/unicode_classes/grammar.js
================================================
module.exports = grammar({
  name: 'unicode_classes',

  rules: {
    program: $ => repeat(choice(
      $.lower,
      $.upper,
      $.math_sym,
      $.letter_number,
    )),

    lower: _ => /\p{Ll}\p{L}*/,

    upper: _ => /\p{Lu}\p{L}*/,

    math_sym: _ => /\p{Sm}+/,

    letter_number: _ => /\p{Letter_Number}/,
  },
});



================================================
FILE: test/fixtures/test_grammars/unused_rules/readme.md
================================================
The generated parsers use the grammar's token count to distinguish between terminal and non-terminal symbols. When the grammar has unused tokens, these tokens don't appear in the parser, so they need to be omitted from the token count.


================================================
FILE: test/fixtures/test_grammars/unused_rules/corpus.txt
================================================
=========================
the language
=========================

E F I J

---

(a (d (e) (f)) (h (i) (j)))


================================================
FILE: test/fixtures/test_grammars/unused_rules/grammar.js
================================================
module.exports = grammar({
  name: 'unused_rules',

  rules: {
    a: $ => seq($.d, $.h),

    b: _ => 'B',

    c: _ => 'C',

    d: $ => seq($.e, $.f),

    e: _ => 'E',

    f: _ => 'F',

    g: _ => 'G',

    h: $ => seq($.i, $.j),

    i: _ => 'I',

    j: _ => 'J',

    k: _ => 'K',
  },
});



================================================
FILE: test/fixtures/test_grammars/uses_current_column/corpus.txt
================================================
===============
Simple blocks
===============

do a
   e
f

---

(block
  (do_expression (block
    (identifier)
    (identifier)))
  (identifier))

=====================
Nested blocks
=====================

a = do b
       c + do e
              f
              g
       h
i

---

(block
  (binary_expression
    (identifier)
    (do_expression (block
      (identifier)
      (binary_expression
        (identifier)
        (do_expression (block
          (identifier)
          (identifier)
          (identifier))))
      (identifier))))
  (identifier))

===============================
Blocks with leading newlines
===============================

do


   a = b
   do
      c
      d
   e
 f

---

(block
  (do_expression (block
    (binary_expression (identifier) (identifier))
    (do_expression (block
      (identifier)
      (identifier)))
    (identifier)
    (identifier))))

=====================
Unterminated blocks
=====================

do
---

(ERROR)



================================================
FILE: test/fixtures/test_grammars/uses_current_column/grammar.js
================================================
module.exports = grammar({
  name: 'uses_current_column',

  externals: $ => [
    $._indent,
    $._dedent,
    $._newline,
  ],

  rules: {
    block: $ => repeat1($._statement),

    _statement: $ => seq($._expression, $._newline),

    _expression: $ => choice(
      $.do_expression,
      $.binary_expression,
      $.identifier,
    ),

    do_expression: $ => seq(
      'do',
      $._indent,
      $.block,
      $._dedent,
    ),

    binary_expression: $ => prec.left(1, seq(
      $._expression,
      choice('=', '+', '-'),
      $._expression,
    )),

    identifier: _ => /\w+/,
  },
});



================================================
FILE: test/fixtures/test_grammars/uses_current_column/scanner.c
================================================
#include "tree_sitter/alloc.h"
#include "tree_sitter/parser.h"

#include <stdlib.h>
#include <wctype.h>

enum TokenType {
  INDENT,
  DEDENT,
  NEWLINE,
};

typedef struct {
  uint8_t queued_dedent_count;
  uint8_t indent_count;
  int8_t indents[32];
} Scanner;

void *tree_sitter_uses_current_column_external_scanner_create() {
  Scanner *self = ts_malloc(sizeof(Scanner));
  self->queued_dedent_count = 0;
  self->indent_count = 1;
  self->indents[0] = 0;
  return (void *)self;
}

void tree_sitter_uses_current_column_external_scanner_destroy(void *payload) {
  ts_free(payload);
}

unsigned tree_sitter_uses_current_column_external_scanner_serialize(
  void *payload,
  char *buffer
) {
  Scanner *self = (Scanner *)payload;
  buffer[0] = self->queued_dedent_count;
  for (unsigned i = 0; i < self->indent_count; i++) {
    buffer[i + 1] = self->indents[i];
  }
  return self->indent_count + 1;
}

void tree_sitter_uses_current_column_external_scanner_deserialize(
  void *payload,
  const char *buffer,
  unsigned length
) {
  Scanner *self = (Scanner *)payload;
  if (length > 0) {
    self->queued_dedent_count = buffer[0];
    self->indent_count = length - 1;
    for (unsigned i = 0; i < self->indent_count; i++) {
      self->indents[i] = buffer[i + 1];
    }
  } else {
    self->queued_dedent_count = 0;
    self->indent_count = 1;
    self->indents[0] = 0;
  }
}

bool tree_sitter_uses_current_column_external_scanner_scan(
  void *payload,
  TSLexer *lexer,
  const bool *valid_symbols
) {
  Scanner *self = (Scanner *)payload;
  lexer->mark_end(lexer);

  // If dedents were found in a previous run, and are valid now,
  // then return a dedent.
  if (self->queued_dedent_count > 0 && valid_symbols[DEDENT]) {
    lexer->result_symbol = DEDENT;
    self->queued_dedent_count--;
    return true;
  }

  // If an indent is valid, then add an entry to the indent stack
  // for the current column, and return an indent.
  if (valid_symbols[INDENT]) {
    while (iswspace(lexer->lookahead)) {
      lexer->advance(lexer, false);
    }
    uint32_t column = lexer->get_column(lexer);
    if (column > self->indents[self->indent_count - 1]) {
      self->indents[self->indent_count++] = column - 2;
      lexer->result_symbol = INDENT;
      return true;
    } else {
      return false;
    }
  }

  // If at the end of a statement, then get the current indent
  // level and pop some number of entries off of the indent stack.
  if (valid_symbols[NEWLINE] || valid_symbols[DEDENT]) {
    while (iswspace(lexer->lookahead) && lexer->lookahead != '\n') {
      lexer->advance(lexer, false);
    }

    if (lexer->lookahead == '\n') {
      lexer->advance(lexer, false);

      uint32_t next_column = 0;
      for (;;) {
        if (lexer->lookahead == ' ') {
          next_column++;
          lexer->advance(lexer, false);
        } else if (lexer->lookahead == '\n') {
          next_column = 0;
          lexer->advance(lexer, false);
        } else {
          break;
        }
      }

      unsigned dedent_count = 0;
      while (next_column < self->indents[self->indent_count - 1]) {
        dedent_count++;
        self->indent_count--;
      }

      if (dedent_count > 0 && valid_symbols[DEDENT]) {
        lexer->result_symbol = DEDENT;
        return true;
      } else if (valid_symbols[NEWLINE]) {
        self->queued_dedent_count += dedent_count;
        lexer->result_symbol = NEWLINE;
        return true;
      }
    }
  }

  return false;
}



================================================
FILE: .cargo/config.toml
================================================
[alias]
xtask = "run --package xtask --"



================================================
FILE: .github/cliff.toml
================================================
[changelog]
# changelog header
header = """
# Changelog\n
"""
# template for the changelog body
# https://tera.netlify.app/docs/#introduction
body = """
{% if version %}\
    ## [{{ version | trim_start_matches(pat="v") }}] - {{ timestamp | date(format="%Y-%m-%d") }}
{% else %}\
    ## [unreleased]
{% endif %}\
{% for group, commits in commits | group_by(attribute="group") %}
    ### {{ group | striptags | upper_first }}
    {% for commit in commits%}\
        {% if not commit.scope %}\
            - {{ commit.message | upper_first }}\
              {% if commit.remote.pr_number %} (<https://github.com/{{ remote.github.owner }}/{{ remote.github.repo }}/pull/{{ commit.remote.pr_number }}>){%- endif %}
        {% endif %}\
    {% endfor %}\
    {% for group, commits in commits | group_by(attribute="scope") %}\
        {% for commit in commits %}\
            - **{{commit.scope}}**: {{ commit.message | upper_first }}\
                {% if commit.remote.pr_number %} (<https://github.com/{{ remote.github.owner }}/{{ remote.github.repo }}/pull/{{ commit.remote.pr_number }}>){%- endif %}
        {% endfor %}\
    {% endfor %}
{% endfor %}
"""
# remove the leading and trailing whitespace from the template
trim = true

[git]
# parse the commits based on https://www.conventionalcommits.org
conventional_commits = true
# filter out the commits that are not conventional
filter_unconventional = false
# process each line of a commit as an individual commit
split_commits = false
# regex for preprocessing the commit messages
commit_preprocessors = [
  #    { pattern = '\((\w+\s)?#([0-9]+)\)', replace = "([#${2}](https://github.com/neovim/neovim/issues/${2}))"},
]
# regex for parsing and grouping commits
commit_parsers = [
  { message = "!:", group = "<!-- 0 -->Breaking" },
  { message = "^feat", group = "<!-- 1 -->Features" },
  { message = "^fix", group = "<!-- 2 -->Bug Fixes" },
  { message = "^perf", group = "<!-- 3 -->Performance" },
  { message = "^doc", group = "<!-- 4 -->Documentation" },
  { message = "^refactor", group = "<!-- 5 -->Refactor" },
  { message = "^test", group = "<!-- 6 -->Testing" },
  { message = "^build", group = "<!-- 7 -->Build System and CI" },
  { message = "^ci", group = "<!-- 7 -->Build System and CI" },
  { message = ".*", group = "<!-- 8 -->Other" },
]
# filter out the commits that are not matched by commit parsers
filter_commits = false
# glob pattern for matching git tags
tag_pattern = "v[0-9]*"
# regex for skipping tags
skip_tags = "v0.1.0-beta.1"
# regex for ignoring tags
ignore_tags = ""
# sort the tags chronologically
date_order = false
# sort the commits inside sections by oldest/newest order
sort_commits = "oldest"

[remote.github]
owner = "tree-sitter"
repo = "tree-sitter"



================================================
FILE: .github/dependabot.yml
================================================
version: 2
updates:
  - package-ecosystem: "cargo"
    directory: "/"
    schedule:
      interval: "weekly"
    commit-message:
      prefix: "build(deps)"
    labels:
      - "dependencies"
      - "cargo"
    groups:
      cargo:
        patterns: ["*"]
  - package-ecosystem: "github-actions"
    directory: "/"
    schedule:
      interval: "weekly"
    commit-message:
      prefix: "ci"
    labels:
      - "dependencies"
      - "github-actions"
    groups:
      actions:
        patterns: ["*"]



================================================
FILE: .github/FUNDING.yml
================================================
# These are supported funding model platforms

github: tree-sitter
patreon: # Replace with a single Patreon username
open_collective: tree-sitter # Replace with a single Open Collective username
ko_fi: amaanq
tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
liberapay: # Replace with a single Liberapay username
issuehunt: # Replace with a single IssueHunt username
lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry
polar: # Replace with a single Polar username
buy_me_a_coffee: # Replace with a single Buy Me a Coffee username
thanks_dev: # Replace with a single thanks.dev username
custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']



================================================
FILE: .github/actions/cache/action.yml
================================================
name: Cache

description: This action caches fixtures

outputs:
  cache-hit:
    description: Cache hit
    value: ${{ steps.cache.outputs.cache-hit }}

runs:
  using: composite
  steps:
    - uses: actions/cache@v4
      id: cache
      with:
        path: |
          test/fixtures/grammars
          target/release/tree-sitter-*.wasm
        key: fixtures-${{ join(matrix.*, '_') }}-${{ hashFiles(
          'cli/generate/src/**',
          'lib/src/parser.h',
          'lib/src/array.h',
          'lib/src/alloc.h',
          'xtask/src/*',
          'test/fixtures/grammars/*/**/src/*.c',
          '.github/actions/cache/action.yml') }}



================================================
FILE: .github/ISSUE_TEMPLATE/bug_report.yml
================================================
name: Bug Report
description: Report a problem
type: Bug
body:
  - type: textarea
    attributes:
      label: "Problem"
      description: "Describe the current behavior. May include logs, images, or videos."
    validations:
      required: true

  - type: textarea
    attributes:
      label: "Steps to reproduce"
      placeholder: |
        ```sh
        git clone --depth=1 https://github.com/tree-sitter/tree-sitter-ruby
        cd tree-sitter-ruby
        tree-sitter generate
        ```
    validations:
      required: true

  - type: textarea
    attributes:
      label: "Expected behavior"
      description: "Describe the behavior you expect."
    validations:
      required: true

  - type: input
    attributes:
      label: "Tree-sitter version (tree-sitter --version)"
      placeholder: "tree-sitter 0.20.9"
    validations:
      required: true

  - type: input
    attributes:
      label: "Operating system/version"
      placeholder: "macOS 11.5"
    validations:
      required: true



================================================
FILE: .github/ISSUE_TEMPLATE/config.yml
================================================
blank_issues_enabled: false



================================================
FILE: .github/ISSUE_TEMPLATE/feature_request.yml
================================================
name: Feature request
description: Request an enhancement
type: Feature
body:
  - type: markdown
    attributes:
      value: |
        Before requesting: search [existing feature requests](https://github.com/tree-sitter/tree-sitter/labels/enhancement).

  - type: textarea
    attributes:
      label: "Problem"
      description: "Describe the problem to be solved."
      placeholder: "No smurf icons available. Smurfs are useful because ..."
    validations:
      required: false

  - type: textarea
    attributes:
      label: "Expected behavior"
      description: "Describe what the new feature or behavior would look like. How does it solve the problem? Is it worth the cost?"
    validations:
      required: false



================================================
FILE: .github/scripts/close_unresponsive.js
================================================
function labeledEvent(data) {
  return (
    data.event === "labeled" && data.label.name === "more-information-needed"
  );
}

const numberOfDaysLimit = 30;
const close_message = `This has been closed since a request for information has \
not been answered for ${numberOfDaysLimit} days. It can be reopened when the \
requested information is provided.`;

module.exports = async ({ github, context }) => {
  const owner = context.repo.owner;
  const repo = context.repo.repo;

  const issues = await github.rest.issues.listForRepo({
    owner: owner,
    repo: repo,
    labels: "more-information-needed",
  });
  const numbers = issues.data.map((e) => e.number);

  for (const number of numbers) {
    const events = await github.paginate(
      github.rest.issues.listEventsForTimeline,
      {
        owner: owner,
        repo: repo,
        issue_number: number,
      },
      (response) => response.data.filter(labeledEvent),
    );

    const latest_response_label = events[events.length - 1];

    const created_at = new Date(latest_response_label.created_at);
    const now = new Date();
    const diff = now - created_at;
    const diffDays = diff / (1000 * 60 * 60 * 24);

    if (diffDays > numberOfDaysLimit) {
      github.rest.issues.update({
        owner: owner,
        repo: repo,
        issue_number: number,
        state_reason: "not_planned",
        state: "closed",
      });

      github.rest.issues.createComment({
        owner: owner,
        repo: repo,
        issue_number: number,
        body: close_message,
      });
    }
  }
};



================================================
FILE: .github/scripts/cross.sh
================================================
#!/bin/bash -eu

exec docker run --rm -v /home/runner:/home/runner -w "$PWD" "$CROSS_IMAGE" "$@"



================================================
FILE: .github/scripts/make.sh
================================================
#!/bin/bash -eu

tree_sitter="$ROOT"/target/"$TARGET"/release/tree-sitter

if [[ $BUILD_CMD == cross ]]; then
  cross.sh make CC="$CC" AR="$AR" "$@"
else
  exec make "$@"
fi



================================================
FILE: .github/scripts/remove_response_label.js
================================================
module.exports = async ({ github, context }) => {
  const commenter = context.actor;
  const issue = await github.rest.issues.get({
    owner: context.repo.owner,
    repo: context.repo.repo,
    issue_number: context.issue.number,
  });
  const author = issue.data.user.login;
  const labels = issue.data.labels.map((e) => e.name);

  if (author === commenter && labels.includes("more-information-needed")) {
    github.rest.issues.removeLabel({
      owner: context.repo.owner,
      repo: context.repo.repo,
      issue_number: context.issue.number,
      name: "more-information-needed",
    });
  }
};



================================================
FILE: .github/scripts/reviewers_remove.js
================================================
module.exports = async ({ github, context }) => {
  const requestedReviewers = await github.rest.pulls.listRequestedReviewers({
    owner: context.repo.owner,
    repo: context.repo.repo,
    pull_number: context.issue.number,
  });

  const reviewers = requestedReviewers.data.users.map((e) => e.login);

  github.rest.pulls.removeRequestedReviewers({
    owner: context.repo.owner,
    repo: context.repo.repo,
    pull_number: context.issue.number,
    reviewers: reviewers,
  });
};



================================================
FILE: .github/scripts/tree-sitter.sh
================================================
#!/bin/bash -eu

tree_sitter="$ROOT"/target/"$TARGET"/release/tree-sitter

if [[ $BUILD_CMD == cross ]]; then
  cross.sh "$CROSS_RUNNER" "$tree_sitter" "$@"
else
  exec "$tree_sitter" "$@"
fi



================================================
FILE: .github/workflows/backport.yml
================================================
name: Backport Pull Request

on:
  pull_request_target:
    types: [closed, labeled]

permissions:
  contents: write
  pull-requests: write

jobs:
  backport:
    if: github.event.pull_request.merged
    runs-on: ubuntu-latest
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4

      - name: Create app token
        uses: actions/create-github-app-token@v2
        id: app-token
        with:
          app-id: ${{ vars.BACKPORT_APP }}
          private-key: ${{ secrets.BACKPORT_KEY }}

      - name: Create backport PR
        uses: korthout/backport-action@v3
        with:
          pull_title: "${pull_title}"
          label_pattern: "^ci:backport ([^ ]+)$"
          github_token: ${{ steps.app-token.outputs.token }}



================================================
FILE: .github/workflows/bindgen.yml
================================================
name: Check Bindgen Output

on:
  pull_request:
    paths:
      - lib/include/tree_sitter/api.h
      - lib/binding_rust/bindings.rs
  push:
    branches: [master]
    paths:
      - lib/include/tree_sitter/api.h
      - lib/binding_rust/bindings.rs

jobs:
  check-bindgen:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4

      - name: Set up stable Rust toolchain
        uses: actions-rust-lang/setup-rust-toolchain@v1
        with:
          toolchain: stable

      - name: Generate bindings
        run: cargo xtask generate-bindings

      - name: Check if the bindgen output changed
        run: git diff --exit-code lib/binding_rust/bindings.rs



================================================
FILE: .github/workflows/build.yml
================================================
name: Build & Test

env:
  CARGO_TERM_COLOR: always
  RUSTFLAGS: "-D warnings"
  CROSS_DEBUG: 1

on:
  workflow_call:
    inputs:
      run-test:
        default: true
        type: boolean

jobs:
  build:
    name: ${{ matrix.platform }} (${{ matrix.target }}) (${{ matrix.os }})
    runs-on: ${{ matrix.os }}
    timeout-minutes: 40
    strategy:
      fail-fast: false
      matrix:
        platform:
          - linux-arm64
          - linux-arm
          - linux-x64
          - linux-x86
          - linux-powerpc64
          - windows-arm64
          - windows-x64
          - windows-x86
          - macos-arm64
          - macos-x64

        include:
          # When adding a new `target`:
          # 1. Define a new platform alias above
          # 2. Add a new record to the matrix map in `cli/npm/install.js`
          - { platform: linux-arm64       , target: aarch64-unknown-linux-gnu   , os: ubuntu-latest    , use-cross: true }
          - { platform: linux-arm         , target: arm-unknown-linux-gnueabi   , os: ubuntu-latest    , use-cross: true }
          - { platform: linux-x64         , target: x86_64-unknown-linux-gnu    , os: ubuntu-22.04     , features: wasm  }
          - { platform: linux-x86         , target: i686-unknown-linux-gnu      , os: ubuntu-latest    , use-cross: true }
          - { platform: linux-powerpc64   , target: powerpc64-unknown-linux-gnu , os: ubuntu-latest    , use-cross: true }
          - { platform: windows-arm64     , target: aarch64-pc-windows-msvc     , os: windows-latest                     }
          - { platform: windows-x64       , target: x86_64-pc-windows-msvc      , os: windows-latest   , features: wasm  }
          - { platform: windows-x86       , target: i686-pc-windows-msvc        , os: windows-latest                     }
          - { platform: macos-arm64       , target: aarch64-apple-darwin        , os: macos-latest     , features: wasm  }
          - { platform: macos-x64         , target: x86_64-apple-darwin         , os: macos-13         , features: wasm  }

          # Cross compilers for C library
          - { platform: linux-arm64       , cc: aarch64-linux-gnu-gcc           , ar: aarch64-linux-gnu-ar   }
          - { platform: linux-arm         , cc: arm-linux-gnueabi-gcc           , ar: arm-linux-gnueabi-ar   }
          - { platform: linux-x86         , cc: i686-linux-gnu-gcc              , ar: i686-linux-gnu-ar      }
          - { platform: linux-powerpc64   , cc: powerpc64-linux-gnu-gcc         , ar: powerpc64-linux-gnu-ar }

          # Prevent race condition (see #2041)
          - { platform: windows-x64   , rust-test-threads: 1 }
          - { platform: windows-x86   , rust-test-threads: 1 }

          # Can't natively run CLI on Github runner's host
          - { platform: windows-arm64 , no-run: true }

    env:
      BUILD_CMD: cargo
      SUFFIX: ${{ contains(matrix.target, 'windows') && '.exe' || '' }}

    defaults:
      run:
        shell: bash

    steps:
    - name: Checkout repository
      uses: actions/checkout@v4

    - name: Read Emscripten version
      run: printf 'EMSCRIPTEN_VERSION=%s\n' "$(<crates/loader/emscripten-version)" >> $GITHUB_ENV

    - name: Install Emscripten
      if: ${{ !matrix.no-run && !matrix.use-cross }}
      uses: mymindstorm/setup-emsdk@v14
      with:
        version: ${{ env.EMSCRIPTEN_VERSION }}

    - name: Set up Rust
      uses: actions-rust-lang/setup-rust-toolchain@v1
      with:
        target: ${{ matrix.target }}

    - name: Install cross
      if: ${{ matrix.use-cross }}
      run: |
        if [ ! -x "$(command -v cross)" ]; then
          # TODO: Remove 'RUSTFLAGS=""' once https://github.com/cross-rs/cross/issues/1561 is resolved
          RUSTFLAGS="" cargo install cross --git https://github.com/cross-rs/cross
        fi

    - name: Configure cross
      if: ${{ matrix.use-cross }}
      run: |
        printf '%s\n' > Cross.toml \
          '[target.${{ matrix.target }}]'                                   \
          'image = "ghcr.io/cross-rs/${{ matrix.target }}:edge"'            \
          '[build]'                                                         \
          'pre-build = ['                                                   \
          '  "dpkg --add-architecture $CROSS_DEB_ARCH",'                    \
          '  "curl -fsSL https://deb.nodesource.com/setup_22.x | bash -",'  \
          '  "apt-get update && apt-get -y install libssl-dev nodejs"'      \
          ']'
        cat - Cross.toml <<< 'Cross.toml:'
        printf '%s\n' >> $GITHUB_ENV \
          "CROSS_CONFIG=$PWD/Cross.toml" \
          "CROSS_IMAGE=ghcr.io/cross-rs/${{ matrix.target }}:edge"

    - name: Set up environment
      env:
        RUST_TEST_THREADS: ${{ matrix.rust-test-threads }}
        USE_CROSS: ${{ matrix.use-cross }}
        TARGET: ${{ matrix.target }}
        CC: ${{ matrix.cc }}
        AR: ${{ matrix.ar }}
      run: |
        PATH="$PWD/.github/scripts:$PATH"
        printf '%s/.github/scripts\n' "$PWD" >> $GITHUB_PATH

        printf '%s\n' >> $GITHUB_ENV \
          'TREE_SITTER=tree-sitter.sh' \
          "TARGET=$TARGET" \
          "ROOT=$PWD"

        [[ -n $RUST_TEST_THREADS ]] && \
          printf 'RUST_TEST_THREADS=%s\n' "$RUST_TEST_THREADS" >> $GITHUB_ENV

        [[ -n $CC ]] && printf 'CC=%s\n' "$CC" >> $GITHUB_ENV
        [[ -n $AR ]] && printf 'AR=%s\n' "$AR" >> $GITHUB_ENV

        if [[ $USE_CROSS == true ]]; then
          printf 'BUILD_CMD=cross\n' >> $GITHUB_ENV
          runner=$(cross.sh bash -c "env | sed -n 's/^CARGO_TARGET_.*_RUNNER=//p'")
          [[ -n $runner ]] && printf 'CROSS_RUNNER=%s\n' "$runner" >> $GITHUB_ENV
        fi

    - name: Build wasmtime library
      if: ${{ !matrix.use-cross && contains(matrix.features, 'wasm') }}
      run: |
        mkdir -p target
        WASMTIME_VERSION=$(cargo metadata --format-version=1 --locked --features wasm | \
                           jq -r '.packages[] | select(.name == "wasmtime-c-api-impl") | .version')
        curl -LSs "$WASMTIME_REPO/archive/refs/tags/v${WASMTIME_VERSION}.tar.gz" | tar xzf - -C target
        cd target/wasmtime-${WASMTIME_VERSION}
        cmake -S crates/c-api -B target/c-api \
          -DCMAKE_INSTALL_PREFIX="$PWD/artifacts" \
          -DWASMTIME_DISABLE_ALL_FEATURES=ON \
          -DWASMTIME_FEATURE_CRANELIFT=ON \
          -DWASMTIME_TARGET='${{ matrix.target }}'
        cmake --build target/c-api && cmake --install target/c-api
        printf 'CMAKE_PREFIX_PATH=%s\n' "$PWD/artifacts" >> $GITHUB_ENV
      env:
        WASMTIME_REPO: https://github.com/bytecodealliance/wasmtime

    - name: Build C library (make)
      if: ${{ runner.os != 'Windows' }}
      run: make.sh -j CFLAGS="$CFLAGS"
      env:
        CFLAGS: -g -Werror -Wall -Wextra -Wshadow -Wpedantic -Werror=incompatible-pointer-types

    - name: Build C library (CMake)
      if: ${{ !matrix.use-cross }}
      run: |
        cmake -S lib -B build/static \
          -DBUILD_SHARED_LIBS=OFF \
          -DCMAKE_BUILD_TYPE=Debug \
          -DCMAKE_COMPILE_WARNING_AS_ERROR=ON \
          -DTREE_SITTER_FEATURE_WASM=$WASM
        cmake --build build/static --verbose

        cmake -S lib -B build/shared \
          -DBUILD_SHARED_LIBS=ON \
          -DCMAKE_BUILD_TYPE=Debug \
          -DCMAKE_COMPILE_WARNING_AS_ERROR=ON \
          -DTREE_SITTER_FEATURE_WASM=$WASM
        cmake --build build/shared --verbose
      env:
        CC: ${{ contains(matrix.target, 'linux') && 'clang' || '' }}
        WASM: ${{ contains(matrix.features, 'wasm') && 'ON' || 'OFF' }}

    - name: Build wasm library
      # No reason to build on the same Github runner hosts many times
      if: ${{ !matrix.no-run && !matrix.use-cross }}
      shell: bash
      run: |
        cd lib/binding_web
        npm ci
        CJS=true npm run build
        CJS=true npm run build:debug
        npm run build
        npm run build:debug

    - name: Check no_std builds
      if: ${{ !matrix.no-run && inputs.run-test }}
      shell: bash
      run: |
        cd lib
        $BUILD_CMD check --no-default-features

    - name: Build target
      run: $BUILD_CMD build --release --target=${{ matrix.target }} --features=${{ matrix.features }}

    - name: Cache fixtures
      id: cache
      if: ${{ !matrix.no-run && inputs.run-test }}
      uses: ./.github/actions/cache

    - name: Fetch fixtures
      if: ${{ !matrix.no-run && inputs.run-test }}
      run: $BUILD_CMD run -p xtask -- fetch-fixtures

    - name: Generate fixtures
      if: ${{ !matrix.no-run && inputs.run-test && steps.cache.outputs.cache-hit != 'true' }}
      run: $BUILD_CMD run -p xtask -- generate-fixtures

    - name: Generate Wasm fixtures
      if: ${{ !matrix.no-run && !matrix.use-cross && inputs.run-test && steps.cache.outputs.cache-hit != 'true' }}
      run: $BUILD_CMD run -p xtask -- generate-fixtures --wasm

    - name: Run main tests
      if: ${{ !matrix.no-run && inputs.run-test }}
      run: $BUILD_CMD test --target=${{ matrix.target }} --features=${{ matrix.features }}

    - name: Run generate unit tests
      if: ${{ !matrix.no-run && inputs.run-test }}
      run: |
        cd crates/generate
        $BUILD_CMD test --target=${{ matrix.target }}

    - name: Run wasm tests
      if: ${{ !matrix.no-run && !matrix.use-cross && inputs.run-test }}
      run: $BUILD_CMD run -p xtask -- test-wasm

    - name: Upload CLI artifact
      uses: actions/upload-artifact@v4
      with:
        name: tree-sitter.${{ matrix.platform }}
        path: target/${{ matrix.target }}/release/tree-sitter${{ env.SUFFIX }}
        if-no-files-found: error
        retention-days: 7

    - name: Upload Wasm artifacts
      if: ${{ matrix.platform == 'linux-x64' }}
      uses: actions/upload-artifact@v4
      with:
        name: tree-sitter.wasm
        path: |
          lib/binding_web/web-tree-sitter.js
          lib/binding_web/web-tree-sitter.js.map
          lib/binding_web/web-tree-sitter.cjs
          lib/binding_web/web-tree-sitter.cjs.map
          lib/binding_web/web-tree-sitter.wasm
          lib/binding_web/web-tree-sitter.wasm.map
          lib/binding_web/debug/web-tree-sitter.cjs
          lib/binding_web/debug/web-tree-sitter.cjs.map
          lib/binding_web/debug/web-tree-sitter.js
          lib/binding_web/debug/web-tree-sitter.js.map
          lib/binding_web/debug/web-tree-sitter.wasm
          lib/binding_web/debug/web-tree-sitter.wasm.map
          lib/binding_web/lib/*.c
          lib/binding_web/lib/*.h
          lib/binding_web/lib/*.ts
          lib/binding_web/src/*.ts
        if-no-files-found: error
        retention-days: 7



================================================
FILE: .github/workflows/ci.yml
================================================
name: CI

on:
  pull_request:
    paths-ignore:
      - docs/**
      - "**/README.md"
      - CONTRIBUTING.md
      - LICENSE
      - cli/src/templates
  push:
    branches: [master]
    paths-ignore:
      - docs/**
      - "**/README.md"
      - CONTRIBUTING.md
      - LICENSE
      - cli/src/templates

concurrency:
  group: ${{ github.workflow }}-${{ github.ref }}
  cancel-in-progress: ${{ github.event_name != 'push' }}

jobs:
  checks:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4

      - name: Set up stable Rust toolchain
        uses: actions-rust-lang/setup-rust-toolchain@v1
        with:
          toolchain: stable
          components: clippy, rustfmt

      - name: Lint files
        run: |
          make lint
          make lint-web

  sanitize:
    uses: ./.github/workflows/sanitize.yml

  build:
    uses: ./.github/workflows/build.yml



================================================
FILE: .github/workflows/docs.yml
================================================
name: Deploy Docs
on:
  push:
    branches: [master]
    paths: [docs/**]

jobs:
  deploy-docs:
    runs-on: ubuntu-latest

    permissions:
      contents: write
      pages: write
      id-token: write

    steps:
      - name: Checkout repository
        uses: actions/checkout@v4

      - name: Set up Rust
        uses: actions-rust-lang/setup-rust-toolchain@v1

      - name: Install mdbook
        env:
          GH_TOKEN: ${{ github.token }}
        run: |
          jq_expr='.assets[] | select(.name | contains("x86_64-unknown-linux-gnu")) | .browser_download_url'
          url=$(gh api repos/rust-lang/mdbook/releases/latest --jq "$jq_expr")
          mkdir mdbook
          curl -sSL "$url" | tar -xz -C mdbook
          printf '%s/mdbook\n' "$PWD" >> "$GITHUB_PATH"

      - name: Install mdbook-admonish
        run: cargo install mdbook-admonish

      - name: Build Book
        run: mdbook build docs

      - name: Setup Pages
        uses: actions/configure-pages@v5

      - name: Upload artifact
        uses: actions/upload-pages-artifact@v3
        with:
          path: docs/book

      - name: Deploy to GitHub Pages
        id: deployment
        uses: actions/deploy-pages@v4



================================================
FILE: .github/workflows/emscripten.yml
================================================
name: Update Emscripten

on:
  pull_request:
    types: [opened, synchronize]

permissions:
  contents: write
  pull-requests: read

jobs:
  update-emscripten:
    if: github.actor == 'dependabot[bot]'
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
        with:
          ref: ${{ github.event.pull_request.head.sha }}

      - name: Set up stable Rust toolchain
        uses: actions-rust-lang/setup-rust-toolchain@v1

      - name: Run emscripten update xtask
        run: |
          git config --global user.name "dependabot[bot]"
          git config --global user.email "49699333+dependabot[bot]@users.noreply.github.com"
          cargo xtask upgrade-emscripten

      - name: Push updated version
        run: git push origin HEAD:$GITHUB_HEAD_REF



================================================
FILE: .github/workflows/nvim_ts.yml
================================================
name: nvim-treesitter parser tests

on:
  pull_request:
    paths:
      - 'crates/cli/**'
      - 'crates/config/**'
      - 'crates/generate/**'
      - 'crates/loader/**'
      - '.github/workflows/nvim_ts.yml'
  workflow_dispatch:

concurrency:
  group: ${{ github.workflow }}-${{ github.ref }}
  cancel-in-progress: true

jobs:
  check_compilation:
    timeout-minutes: 30
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest, windows-latest, macos-latest]
        type: [generate, build]
    name: ${{ matrix.os }} - ${{ matrix.type }}
    runs-on: ${{ matrix.os }}
    env:
      NVIM: ${{ matrix.os == 'windows-latest' && 'nvim-win64\\bin\\nvim.exe' || 'nvim' }}
      NVIM_TS_DIR: nvim-treesitter
    steps:
      - uses: actions/checkout@v4

      - uses: actions/checkout@v4
        with:
          repository: nvim-treesitter/nvim-treesitter
          path: ${{ env.NVIM_TS_DIR }}
          ref: main

      - if: runner.os != 'Windows'
        run: echo ${{ github.workspace }}/target/release >> $GITHUB_PATH

      - if: runner.os == 'Windows'
        run: echo ${{ github.workspace }}/target/release >> $env:GITHUB_PATH

      - uses: actions-rust-lang/setup-rust-toolchain@v1
      - run: cargo build --release
      - uses: ilammy/msvc-dev-cmd@v1

      - name: Install and prepare Neovim
        run: bash ./scripts/ci-install.sh
        working-directory: ${{ env.NVIM_TS_DIR }}

      - if: matrix.type == 'generate'
        name: Generate and compile parsers
        run: $NVIM -l ./scripts/install-parsers.lua --generate --max-jobs=2
        working-directory: ${{ env.NVIM_TS_DIR }}
        shell: bash

      - if: matrix.type == 'build'
        name: Compile parsers
        run: $NVIM -l ./scripts/install-parsers.lua --max-jobs=10
        working-directory: ${{ env.NVIM_TS_DIR }}
        shell: bash

      - if: "!cancelled()"
        name: Check query files
        run: $NVIM -l ./scripts/check-queries.lua
        working-directory: ${{ env.NVIM_TS_DIR }}
        shell: bash



================================================
FILE: .github/workflows/release.yml
================================================
name: Release

on:
  workflow_dispatch:
  push:
    tags:
      - v[0-9]+.[0-9]+.[0-9]+

jobs:
  build:
    uses: ./.github/workflows/build.yml
    with:
      run-test: false

  release:
    name: Release on GitHub
    runs-on: ubuntu-latest
    needs: build
    permissions:
      contents: write
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4

      - name: Download build artifacts
        uses: actions/download-artifact@v4
        with:
          path: artifacts

      - name: Display structure of downloaded files
        run: ls -lR
        working-directory: artifacts

      - name: Prepare release artifacts
        run: |
          mkdir -p target web
          mv artifacts/tree-sitter.wasm/* web/

          tar -czf target/web-tree-sitter.tar.gz -C web .

          rm -r artifacts/tree-sitter.wasm

          for platform in $(cd artifacts; ls | sed 's/^tree-sitter\.//'); do
            exe=$(ls artifacts/tree-sitter.$platform/tree-sitter*)
            gzip --stdout --name $exe > target/tree-sitter-$platform.gz
          done
          rm -rf artifacts
          ls -l target/

      - name: Create release
        run: |-
          gh release create ${{ github.ref_name }} \
            target/tree-sitter-*.gz \
            target/web-tree-sitter.tar.gz
        env:
          GH_TOKEN: ${{ github.token }}

  crates_io:
    name: Publish packages to Crates.io
    runs-on: ubuntu-latest
    needs: release
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4

      - name: Set up Rust
        uses: actions-rust-lang/setup-rust-toolchain@v1

      - name: Publish crates to Crates.io
        uses: katyo/publish-crates@v2
        with:
          registry-token: ${{ secrets.CARGO_REGISTRY_TOKEN }}

  npm:
    name: Publish packages to npmjs.com
    runs-on: ubuntu-latest
    needs: release
    strategy:
      fail-fast: false
      matrix:
        directory: [cli/npm, lib/binding_web]
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4

      - name: Set up Node
        uses: actions/setup-node@v4
        with:
          node-version: 20
          registry-url: https://registry.npmjs.org

      - name: Set up Rust
        uses: actions-rust-lang/setup-rust-toolchain@v1

      - name: Build wasm
        if: matrix.directory == 'lib/binding_web'
        run: |
          cd ${{ matrix.directory }}
          npm ci
          npm run build
          npm run build:debug
          CJS=true npm run build
          CJS=true npm run build:debug

      - name: Publish to npmjs.com
        working-directory: ${{ matrix.directory }}
        run: npm publish
        env:
          NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}



================================================
FILE: .github/workflows/response.yml
================================================
name: No response

on:
  schedule:
    - cron: "30 1 * * *" # Run every day at 01:30
  workflow_dispatch:
  issue_comment:

permissions:
  issues: write
  pull-requests: write

jobs:
  close:
    name: Close issues with no response
    if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
    runs-on: ubuntu-latest
    steps:
      - name: Checkout script
        uses: actions/checkout@v4
        with:
          sparse-checkout: .github/scripts/close_unresponsive.js
          sparse-checkout-cone-mode: false

      - name: Run script
        uses: actions/github-script@v7
        with:
          script: |
            const script = require('./.github/scripts/close_unresponsive.js')
            await script({github, context})

  remove_label:
    name: Remove response label
    if: github.event_name == 'issue_comment'
    runs-on: ubuntu-latest
    steps:
      - name: Checkout script
        uses: actions/checkout@v4
        with:
          sparse-checkout: .github/scripts/remove_response_label.js
          sparse-checkout-cone-mode: false

      - name: Run script
        uses: actions/github-script@v7
        with:
          script: |
            const script = require('./.github/scripts/remove_response_label.js')
            await script({github, context})



================================================
FILE: .github/workflows/reviewers_remove.yml
================================================
name: Remove Reviewers

on:
  pull_request_target:
    types: [converted_to_draft, closed]

permissions:
  pull-requests: write

jobs:
  remove-reviewers:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout script
        uses: actions/checkout@v4
        with:
          sparse-checkout: .github/scripts/reviewers_remove.js
          sparse-checkout-cone-mode: false

      - name: Run script
        uses: actions/github-script@v7
        with:
          script: |
            const script = require('./.github/scripts/reviewers_remove.js')
            await script({github, context})



================================================
FILE: .github/workflows/sanitize.yml
================================================
name: Sanitize

env:
  CARGO_TERM_COLOR: always
  RUSTFLAGS: -D warnings

on:
  workflow_call:

jobs:
  check-undefined-behaviour:
    runs-on: ubuntu-latest
    timeout-minutes: 20
    env:
      TREE_SITTER: ${{ github.workspace }}/target/release/tree-sitter
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4

      - name: Install UBSAN library
        run: sudo apt-get update -y && sudo apt-get install -y libubsan1

      - name: Set up Rust
        uses: actions-rust-lang/setup-rust-toolchain@v1

      - name: Build project
        run: cargo build --release

      - name: Cache fixtures
        uses: ./.github/actions/cache
        id: cache

      - name: Fetch fixtures
        run: cargo xtask fetch-fixtures

      - name: Generate fixtures
        if: ${{ steps.cache.outputs.cache-hit != 'true' }}
        run: cargo xtask generate-fixtures

      - name: Run main tests with undefined behaviour sanitizer (UBSAN)
        run: cargo test -- --test-threads 1
        env:
          CFLAGS: -fsanitize=undefined
          RUSTFLAGS: ${{ env.RUSTFLAGS }} -lubsan

      - name: Run main tests with address sanitizer (ASAN)
        run: cargo test -- --test-threads 1
        env:
          ASAN_OPTIONS: verify_asan_link_order=0
          CFLAGS: -fsanitize=address
          RUSTFLAGS: ${{ env.RUSTFLAGS }} -lasan --cfg sanitizing



================================================
FILE: .github/workflows/wasm_exports.yml
================================================
name: Check WASM Exports

on:
  pull_request:
    paths:
      - lib/include/tree_sitter/api.h
      - lib/binding_web/**
      - xtask/src/**
  push:
    branches: [master]
    paths:
      - lib/include/tree_sitter/api.h
      - lib/binding_rust/bindings.rs
      - lib/CMakeLists.txt

jobs:
  check-wasm-exports:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4

      - name: Set up stable Rust toolchain
        uses: actions-rust-lang/setup-rust-toolchain@v1
        with:
          toolchain: stable

      - name: Install wasm-objdump
        run: sudo apt-get update -y && sudo apt-get install -y wabt

      - name: Build C library (make)
        run: make -j CFLAGS="$CFLAGS"
        env:
          CFLAGS: -g -Werror -Wall -Wextra -Wshadow -Wpedantic -Werror=incompatible-pointer-types

      - name: Build WASM Library
        working-directory: lib/binding_web
        run: npm ci && npm run build:debug

      - name: Check WASM exports
        run: cargo xtask check-wasm-exports



================================================
FILE: .zed/settings.json
================================================
{
  "lsp": {
    "rust-analyzer": {
      "initialization_options": {
        "cargo": {
          "features": "all"
        }
      }
    }
  }
}
