# Dependencies
node_modules/

# Build output
dist/
src/provenance-parser.generated.js

# Environment variables
.env

# IDE files
.vscode/
.idea/

# Logs
*.log
npm-debug.log*

# OS files
.DS_Store
Thumbs.db

# Local files
offline/
.claude/
.codex/
.agents/
skills-lock.json
CLAUDE.md
CLAUDE.md.bak
AGENTS.md

# Vocabulary database (generated, ~1 GB)
# Using data/* instead of data/ so we can negate seed/ below — git won't
# recurse into a wholly-ignored directory, which breaks subsequent re-
# inclusions.
data/*
# Exception: hand-curated seed files that are inputs to the harvest, not
# outputs of it (#229 Part A). Small, stable, versioned.
!data/seed/
# v0.25 geocoding-bundle assets — large pre-harvest downloads (~2 GB WOF + ~125 MB
# Pleiades) that exceed GitHub's per-file limit. SHA-pinned via committed manifests
# (`data/seed/wof/MANIFEST.tsv`) and re-downloaded by `scripts/_download_v025_assets.sh`.
data/seed/wof/*.parquet
data/seed/pleiades-places.json.gz
data/seed/pleiades-places.json.gz.sha256
!data/seed/wof/MANIFEST.tsv
# Exception: audit README + manifest (document which audit files exist, which
# LLM batches were collected, by date/model/cost) — small, stable, versioned,
# enable cross-session reconstruction when the audit JSON files are regenerated.
!data/audit/
data/audit/*
!data/audit/README.md
!data/audit/MANIFEST.md
# Exception: hand-curated CSVs that drive the post-harvest backfill chain
# (place-overrides, vei-additions, coord-corrections, label-corrections).
# Small, stable, versioned — these are the system of record for manual
# interventions that must survive a fresh harvest. See scripts/RELEASE.md.
# They sit in data/backfills/ alongside large regenerable harvest inputs that
# stay ignored — so un-ignore the dir, re-ignore its contents, then re-include
# just the curated-*.csv files (same shape as the seed/ block above).
!data/backfills/
data/backfills/*
!data/backfills/curated-*.csv
__pycache__/
