# ═══════════════════════════════════════════════════════════════
#  PROJECT JAMES — Python Dependencies
#  v0.1.0-alpha
# ═══════════════════════════════════════════════════════════════

# ── Web Framework / Server ─────────────────────────────────────
fastapi>=0.110.0
uvicorn[standard]>=0.27.0
python-multipart>=0.0.27  # GHSA / Dependabot alerts #5,#6 (DoS via unbounded multipart headers; 0.0.27 raises the floor past the newer disclosure)
# Dependabot alerts #15/#16 — GHSA-86qp-5c8j-p5mr / CVE-2026-48710
# (badhost): unvalidated Host header poisons request.url.path,
# bypassing path-based security middleware. Patched in starlette
# 1.0.1. Pulled in transitively by fastapi; pin the floor here so a
# fresh install with a stale wheel cache cannot regress us, matching
# the urllib3 / python-multipart / idna approach above.
starlette>=1.0.1
pydantic>=2.5.0

# ── Authentication / Security ──────────────────────────────────
python-jose[cryptography]>=3.3.0
passlib[bcrypt]>=1.7.4
python-dotenv>=1.0.0

# ── HTTP / Networking ──────────────────────────────────────────
requests>=2.31.0
# Dependabot alerts #7-#10 — urllib3 must be ≥ 2.7.0 to close both
# the decompression-bomb bypass in the streaming API and the
# sensitive-header leak across origins in proxied low-level
# redirects. Pinning the floor here even though urllib3 is also
# a transitive of requests, so a fresh install with a stale wheel
# cache cannot regress us.
urllib3>=2.7.0
# Dependabot alerts #11/#12 — GHSA-65pc-fj4g-8rjx / CVE-2026-45409:
# specially-crafted inputs to idna.encode() bypass the CVE-2024-3651
# fix. Pulled in transitively by requests; pin the floor here so a
# fresh install with a stale wheel cache cannot regress us, matching
# the urllib3 / python-multipart approach above.
idna>=3.15

# ── Vector DB / Embeddings / Search ────────────────────────────
# Dependabot alerts #13/#14 — GHSA-f4j7-r4q5-qw2c / CVE-2026-45829
# (chromatoast): pre-auth code injection in the chromadb HTTP server's
# /api/v2/tenants/{tenant}/databases/{db}/collections endpoint when
# the request body sets trust_remote_code=true on a malicious model
# repository. Vulnerable range 1.0.0–1.5.9 with NO patched version
# published as of 2026-06-10.
#
# JAMES risk acceptance: not exploitable in this deployment.
#   - core/vector_store.py uses chromadb.PersistentClient (embedded
#     local mode); the HTTP server is never started.
#   - No JAMES code path calls trust_remote_code or constructs
#     chromadb collections from user-controlled model repositories.
#   - JAMES is local-first per CLAUDE.md / docs/ARCHITECTURE.md.
# Floor kept at 0.4.22 (no advisory ≥ patched version exists to
# raise to). Track upstream chroma-core/chroma#6717 and bump when
# a fixed release ships. See
# docs/security/dependabot-2026-06-10-risk-assessment.md for the
# full assessment + dismissal rationale.
chromadb>=0.4.22
sentence-transformers>=2.5.0
rank-bm25>=0.2.2

# ── Numerical / Data ───────────────────────────────────────────
numpy>=1.24.0
PyYAML>=6.0
# Excel output for workspace jobs (W8-A excel_build handler).
# Read-side openpyxl is already imported by file_processor for .xlsx
# ingestion; W8 makes it an explicit declared dep.
openpyxl>=3.1.0

# ── Hardware Monitoring (auto-recommendation) ──────────────────
psutil>=5.9.0
# pynvml: strongly recommended on NVIDIA systems. Without it the
# admin /hardware/ endpoint falls back to nvidia-smi subprocess
# (slower, requires nvidia-smi in PATH) or wmic (Windows only,
# removed in Win11 24H2+). pynvml works cross-platform on any
# system with NVIDIA drivers installed.
pynvml>=11.5.0

# ── Web Search (Tavily primary, DDG fallback) ──────────────────
tavily-python>=0.3.0
duckduckgo-search>=5.0.0

# ── Document Parsing + Export ──────────────────────────────────
markdown2>=2.4.0
markitdown[pdf,docx,xlsx,pptx]>=0.1.5
pdf2image>=1.17.0
# python-docx powers the .docx answer-export feature in chat
# (PR #93 / item #4). Without it /export/ silently degrades to
# .md with a fallback_reason header — still works but loses Word
# formatting.
python-docx>=1.1.0

# ── OCR (Image text extraction) ────────────────────────────────
pytesseract>=0.3.10
easyocr>=1.7.0

# ── Image Processing ───────────────────────────────────────────
Pillow>=10.0.0
opencv-python>=4.9.0

# ── Video / Audio Processing ───────────────────────────────────
ffmpeg-python>=0.2.0
openai-whisper>=20231117

# ── Evaluation Harness (Issue #46, Axis 2-B) ───────────────────
# RAGAS pulls scikit-network which needs MSVC build tools on Windows
# unless wheels are available. If install fails on Windows, run:
#   pip install ragas --only-binary=:all:
ragas>=0.4.0

# α-5 external benchmark — MultiHop-RAG (Tang & Yang 2024, EMNLP).
# Used by scripts/hotpot/download_multihop_rag.py to pull the dataset
# from HuggingFace into workspaces/hotpot_eval/. Read-only adapter;
# no impact on production unless JAMES_WORKSPACE is set to the
# benchmark workspace.
datasets>=2.14.0

# ═══════════════════════════════════════════════════════════════
#  System Requirements (install separately, not via pip)
# ═══════════════════════════════════════════════════════════════
#
#  • Ollama (LLM runtime)
#      https://ollama.ai
#      ollama pull gemma4:e4b           # default (config.GEMMA_MODEL)
#      ollama pull qwen2.5-coder:32b    # coding mode (config.CODING_MODEL)
#      ollama pull llava:13b            # vision mode (optional)
#      Override defaults via .env:
#        JAMES_LLM_MODEL=...
#        JAMES_CODING_MODEL=...
#
#  • Tesseract OCR (for pytesseract)
#      Windows: https://github.com/UB-Mannheim/tesseract/wiki
#      macOS:   brew install tesseract
#      Linux:   apt install tesseract-ocr
#
#  • FFmpeg (for video/audio processing)
#      Windows: https://ffmpeg.org/download.html
#      macOS:   brew install ffmpeg
#      Linux:   apt install ffmpeg
#
#  • Poppler (for pdf2image)
#      Windows: https://github.com/oschwartz10612/poppler-windows
#      macOS:   brew install poppler
#      Linux:   apt install poppler-utils
#
# ═══════════════════════════════════════════════════════════════
